{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9999280558286769, "eval_steps": 500, "global_step": 5212, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00019185112352814216, "grad_norm": 8.309519923962705, "learning_rate": 1.2738853503184715e-07, "loss": 1.3879, "step": 1 }, { "epoch": 0.0003837022470562843, "grad_norm": 6.5544856294850895, "learning_rate": 2.547770700636943e-07, "loss": 1.4294, "step": 2 }, { "epoch": 0.0005755533705844265, "grad_norm": 8.57065547757484, "learning_rate": 3.8216560509554143e-07, "loss": 1.4731, "step": 3 }, { "epoch": 0.0007674044941125686, "grad_norm": 7.031837984101586, "learning_rate": 5.095541401273886e-07, "loss": 1.3943, "step": 4 }, { "epoch": 0.0009592556176407108, "grad_norm": 7.3543794258248, "learning_rate": 6.369426751592357e-07, "loss": 1.4745, "step": 5 }, { "epoch": 0.001151106741168853, "grad_norm": 7.106565322668942, "learning_rate": 7.643312101910829e-07, "loss": 1.3682, "step": 6 }, { "epoch": 0.001342957864696995, "grad_norm": 8.134830639064468, "learning_rate": 8.9171974522293e-07, "loss": 1.4406, "step": 7 }, { "epoch": 0.0015348089882251373, "grad_norm": 7.339643417321367, "learning_rate": 1.0191082802547772e-06, "loss": 1.386, "step": 8 }, { "epoch": 0.0017266601117532795, "grad_norm": 5.981493854389214, "learning_rate": 1.1464968152866242e-06, "loss": 1.3391, "step": 9 }, { "epoch": 0.0019185112352814216, "grad_norm": 5.887231068767325, "learning_rate": 1.2738853503184715e-06, "loss": 1.407, "step": 10 }, { "epoch": 0.002110362358809564, "grad_norm": 6.3034962175986, "learning_rate": 1.4012738853503185e-06, "loss": 1.2551, "step": 11 }, { "epoch": 0.002302213482337706, "grad_norm": 4.927799973370543, "learning_rate": 1.5286624203821657e-06, "loss": 1.3652, "step": 12 }, { "epoch": 0.002494064605865848, "grad_norm": 4.150425881362178, "learning_rate": 1.6560509554140127e-06, "loss": 1.2292, "step": 13 }, { "epoch": 0.00268591572939399, "grad_norm": 2.96054951771272, "learning_rate": 1.78343949044586e-06, "loss": 1.2738, "step": 14 }, { "epoch": 0.0028777668529221326, "grad_norm": 3.0731364587457732, "learning_rate": 1.9108280254777074e-06, "loss": 1.2415, "step": 15 }, { "epoch": 0.0030696179764502745, "grad_norm": 2.978756951475304, "learning_rate": 2.0382165605095544e-06, "loss": 1.3404, "step": 16 }, { "epoch": 0.003261469099978417, "grad_norm": 3.56522473630036, "learning_rate": 2.1656050955414015e-06, "loss": 1.3217, "step": 17 }, { "epoch": 0.003453320223506559, "grad_norm": 2.4296216349542354, "learning_rate": 2.2929936305732485e-06, "loss": 1.1804, "step": 18 }, { "epoch": 0.003645171347034701, "grad_norm": 1.9465093471406025, "learning_rate": 2.420382165605096e-06, "loss": 1.2027, "step": 19 }, { "epoch": 0.0038370224705628433, "grad_norm": 2.2283468424206863, "learning_rate": 2.547770700636943e-06, "loss": 1.1156, "step": 20 }, { "epoch": 0.004028873594090985, "grad_norm": 2.365661538981942, "learning_rate": 2.67515923566879e-06, "loss": 1.1718, "step": 21 }, { "epoch": 0.004220724717619128, "grad_norm": 2.1959933256249755, "learning_rate": 2.802547770700637e-06, "loss": 1.1421, "step": 22 }, { "epoch": 0.00441257584114727, "grad_norm": 2.012530464792094, "learning_rate": 2.9299363057324844e-06, "loss": 1.122, "step": 23 }, { "epoch": 0.004604426964675412, "grad_norm": 2.2133999687157657, "learning_rate": 3.0573248407643314e-06, "loss": 1.1553, "step": 24 }, { "epoch": 0.004796278088203554, "grad_norm": 1.746186636028179, "learning_rate": 3.1847133757961785e-06, "loss": 1.1241, "step": 25 }, { "epoch": 0.004988129211731696, "grad_norm": 1.7639311296725597, "learning_rate": 3.3121019108280255e-06, "loss": 1.0734, "step": 26 }, { "epoch": 0.005179980335259838, "grad_norm": 1.9797811931640201, "learning_rate": 3.4394904458598725e-06, "loss": 1.1167, "step": 27 }, { "epoch": 0.00537183145878798, "grad_norm": 1.8345378177163378, "learning_rate": 3.56687898089172e-06, "loss": 1.1076, "step": 28 }, { "epoch": 0.005563682582316123, "grad_norm": 1.9397294038841095, "learning_rate": 3.694267515923567e-06, "loss": 1.0441, "step": 29 }, { "epoch": 0.005755533705844265, "grad_norm": 1.6795537546036898, "learning_rate": 3.821656050955415e-06, "loss": 1.1209, "step": 30 }, { "epoch": 0.005947384829372407, "grad_norm": 1.7825331679465573, "learning_rate": 3.949044585987262e-06, "loss": 1.018, "step": 31 }, { "epoch": 0.006139235952900549, "grad_norm": 1.607613764565851, "learning_rate": 4.076433121019109e-06, "loss": 1.0617, "step": 32 }, { "epoch": 0.0063310870764286915, "grad_norm": 1.3427564319762897, "learning_rate": 4.203821656050956e-06, "loss": 0.246, "step": 33 }, { "epoch": 0.006522938199956834, "grad_norm": 1.2123940418381272, "learning_rate": 4.331210191082803e-06, "loss": 0.2698, "step": 34 }, { "epoch": 0.0067147893234849754, "grad_norm": 1.6680239057868702, "learning_rate": 4.45859872611465e-06, "loss": 1.1023, "step": 35 }, { "epoch": 0.006906640447013118, "grad_norm": 1.553491418398545, "learning_rate": 4.585987261146497e-06, "loss": 1.0394, "step": 36 }, { "epoch": 0.00709849157054126, "grad_norm": 1.482998526871875, "learning_rate": 4.713375796178344e-06, "loss": 0.9641, "step": 37 }, { "epoch": 0.007290342694069402, "grad_norm": 1.6499671993173155, "learning_rate": 4.840764331210192e-06, "loss": 1.0411, "step": 38 }, { "epoch": 0.007482193817597544, "grad_norm": 1.622566907414385, "learning_rate": 4.968152866242039e-06, "loss": 0.9847, "step": 39 }, { "epoch": 0.007674044941125687, "grad_norm": 1.5917505834345058, "learning_rate": 5.095541401273886e-06, "loss": 1.0072, "step": 40 }, { "epoch": 0.007865896064653828, "grad_norm": 1.6202413991286806, "learning_rate": 5.222929936305733e-06, "loss": 1.0373, "step": 41 }, { "epoch": 0.00805774718818197, "grad_norm": 1.3894630837788655, "learning_rate": 5.35031847133758e-06, "loss": 0.9459, "step": 42 }, { "epoch": 0.008249598311710113, "grad_norm": 1.3463519149142977, "learning_rate": 5.477707006369427e-06, "loss": 1.002, "step": 43 }, { "epoch": 0.008441449435238255, "grad_norm": 1.4663715470967404, "learning_rate": 5.605095541401274e-06, "loss": 0.9468, "step": 44 }, { "epoch": 0.008633300558766398, "grad_norm": 1.2671240038693938, "learning_rate": 5.732484076433121e-06, "loss": 0.9904, "step": 45 }, { "epoch": 0.00882515168229454, "grad_norm": 1.5193620002276167, "learning_rate": 5.859872611464969e-06, "loss": 1.0777, "step": 46 }, { "epoch": 0.00901700280582268, "grad_norm": 1.366414247206402, "learning_rate": 5.987261146496816e-06, "loss": 0.9938, "step": 47 }, { "epoch": 0.009208853929350823, "grad_norm": 1.4089356331087586, "learning_rate": 6.114649681528663e-06, "loss": 0.9075, "step": 48 }, { "epoch": 0.009400705052878966, "grad_norm": 1.3690988734415628, "learning_rate": 6.24203821656051e-06, "loss": 1.0059, "step": 49 }, { "epoch": 0.009592556176407108, "grad_norm": 1.3776414048321335, "learning_rate": 6.369426751592357e-06, "loss": 0.9657, "step": 50 }, { "epoch": 0.00978440729993525, "grad_norm": 1.334377161981155, "learning_rate": 6.496815286624204e-06, "loss": 0.9712, "step": 51 }, { "epoch": 0.009976258423463393, "grad_norm": 1.3312464449327375, "learning_rate": 6.624203821656051e-06, "loss": 0.9646, "step": 52 }, { "epoch": 0.010168109546991535, "grad_norm": 1.2245724329124537, "learning_rate": 6.751592356687898e-06, "loss": 0.9734, "step": 53 }, { "epoch": 0.010359960670519676, "grad_norm": 1.374586527344631, "learning_rate": 6.878980891719745e-06, "loss": 0.9876, "step": 54 }, { "epoch": 0.010551811794047818, "grad_norm": 0.8432119679856314, "learning_rate": 7.006369426751593e-06, "loss": 0.2818, "step": 55 }, { "epoch": 0.01074366291757596, "grad_norm": 1.7403268463118444, "learning_rate": 7.13375796178344e-06, "loss": 1.0757, "step": 56 }, { "epoch": 0.010935514041104103, "grad_norm": 1.3063306147990181, "learning_rate": 7.261146496815287e-06, "loss": 0.9368, "step": 57 }, { "epoch": 0.011127365164632246, "grad_norm": 1.4187563562496974, "learning_rate": 7.388535031847134e-06, "loss": 0.9471, "step": 58 }, { "epoch": 0.011319216288160388, "grad_norm": 1.3079623696656588, "learning_rate": 7.515923566878982e-06, "loss": 1.0237, "step": 59 }, { "epoch": 0.01151106741168853, "grad_norm": 1.6225029330029834, "learning_rate": 7.64331210191083e-06, "loss": 0.9923, "step": 60 }, { "epoch": 0.011702918535216673, "grad_norm": 1.6735114156466486, "learning_rate": 7.770700636942676e-06, "loss": 0.9308, "step": 61 }, { "epoch": 0.011894769658744813, "grad_norm": 1.2167501533274558, "learning_rate": 7.898089171974524e-06, "loss": 1.0438, "step": 62 }, { "epoch": 0.012086620782272956, "grad_norm": 1.3219198207442167, "learning_rate": 8.02547770700637e-06, "loss": 0.9246, "step": 63 }, { "epoch": 0.012278471905801098, "grad_norm": 1.5325425921708964, "learning_rate": 8.152866242038218e-06, "loss": 1.0221, "step": 64 }, { "epoch": 0.01247032302932924, "grad_norm": 1.2774538248045044, "learning_rate": 8.280254777070064e-06, "loss": 0.9533, "step": 65 }, { "epoch": 0.012662174152857383, "grad_norm": 1.4654038005226873, "learning_rate": 8.407643312101912e-06, "loss": 0.9631, "step": 66 }, { "epoch": 0.012854025276385525, "grad_norm": 1.2925984920239828, "learning_rate": 8.53503184713376e-06, "loss": 0.911, "step": 67 }, { "epoch": 0.013045876399913668, "grad_norm": 1.462532364773549, "learning_rate": 8.662420382165606e-06, "loss": 0.8967, "step": 68 }, { "epoch": 0.013237727523441808, "grad_norm": 1.1894572120640454, "learning_rate": 8.789808917197454e-06, "loss": 0.9665, "step": 69 }, { "epoch": 0.013429578646969951, "grad_norm": 1.247242941935983, "learning_rate": 8.9171974522293e-06, "loss": 0.9581, "step": 70 }, { "epoch": 0.013621429770498093, "grad_norm": 1.3823221026618566, "learning_rate": 9.044585987261148e-06, "loss": 0.9114, "step": 71 }, { "epoch": 0.013813280894026236, "grad_norm": 1.3460473808125706, "learning_rate": 9.171974522292994e-06, "loss": 0.8501, "step": 72 }, { "epoch": 0.014005132017554378, "grad_norm": 1.31668624032117, "learning_rate": 9.299363057324842e-06, "loss": 1.0103, "step": 73 }, { "epoch": 0.01419698314108252, "grad_norm": 1.530248344337194, "learning_rate": 9.426751592356688e-06, "loss": 0.9778, "step": 74 }, { "epoch": 0.014388834264610663, "grad_norm": 1.1855101825844625, "learning_rate": 9.554140127388536e-06, "loss": 0.9262, "step": 75 }, { "epoch": 0.014580685388138804, "grad_norm": 1.3966010210868671, "learning_rate": 9.681528662420384e-06, "loss": 1.0169, "step": 76 }, { "epoch": 0.014772536511666946, "grad_norm": 0.69391866879556, "learning_rate": 9.80891719745223e-06, "loss": 0.2889, "step": 77 }, { "epoch": 0.014964387635195088, "grad_norm": 1.4229546025486925, "learning_rate": 9.936305732484078e-06, "loss": 1.012, "step": 78 }, { "epoch": 0.01515623875872323, "grad_norm": 1.4139635568439926, "learning_rate": 1.0063694267515924e-05, "loss": 0.9205, "step": 79 }, { "epoch": 0.015348089882251373, "grad_norm": 1.2647523290929157, "learning_rate": 1.0191082802547772e-05, "loss": 0.9804, "step": 80 }, { "epoch": 0.015539941005779516, "grad_norm": 1.4215195468437603, "learning_rate": 1.0318471337579618e-05, "loss": 0.9789, "step": 81 }, { "epoch": 0.015731792129307656, "grad_norm": 1.3628563772112003, "learning_rate": 1.0445859872611466e-05, "loss": 0.9164, "step": 82 }, { "epoch": 0.0159236432528358, "grad_norm": 1.296239433151627, "learning_rate": 1.0573248407643314e-05, "loss": 0.9207, "step": 83 }, { "epoch": 0.01611549437636394, "grad_norm": 1.316963876889074, "learning_rate": 1.070063694267516e-05, "loss": 1.0284, "step": 84 }, { "epoch": 0.016307345499892083, "grad_norm": 1.334684079799317, "learning_rate": 1.0828025477707008e-05, "loss": 0.9453, "step": 85 }, { "epoch": 0.016499196623420226, "grad_norm": 1.2357106657155108, "learning_rate": 1.0955414012738854e-05, "loss": 0.9168, "step": 86 }, { "epoch": 0.016691047746948368, "grad_norm": 1.2648273378177979, "learning_rate": 1.1082802547770702e-05, "loss": 0.9977, "step": 87 }, { "epoch": 0.01688289887047651, "grad_norm": 1.2423049064170908, "learning_rate": 1.1210191082802548e-05, "loss": 0.9791, "step": 88 }, { "epoch": 0.017074749994004653, "grad_norm": 1.348768503854252, "learning_rate": 1.1337579617834396e-05, "loss": 0.9326, "step": 89 }, { "epoch": 0.017266601117532795, "grad_norm": 1.243016254078276, "learning_rate": 1.1464968152866242e-05, "loss": 0.8902, "step": 90 }, { "epoch": 0.017458452241060938, "grad_norm": 0.6697076208648785, "learning_rate": 1.159235668789809e-05, "loss": 0.285, "step": 91 }, { "epoch": 0.01765030336458908, "grad_norm": 0.6433456050612851, "learning_rate": 1.1719745222929938e-05, "loss": 0.2732, "step": 92 }, { "epoch": 0.017842154488117223, "grad_norm": 1.4321129329739055, "learning_rate": 1.1847133757961784e-05, "loss": 0.9526, "step": 93 }, { "epoch": 0.01803400561164536, "grad_norm": 1.2955514079577926, "learning_rate": 1.1974522292993632e-05, "loss": 0.9133, "step": 94 }, { "epoch": 0.018225856735173504, "grad_norm": 1.4561530095202402, "learning_rate": 1.2101910828025478e-05, "loss": 0.9076, "step": 95 }, { "epoch": 0.018417707858701646, "grad_norm": 1.5010956866851128, "learning_rate": 1.2229299363057326e-05, "loss": 0.9488, "step": 96 }, { "epoch": 0.01860955898222979, "grad_norm": 1.3331305680622094, "learning_rate": 1.2356687898089172e-05, "loss": 0.9653, "step": 97 }, { "epoch": 0.01880141010575793, "grad_norm": 1.2298130113812467, "learning_rate": 1.248407643312102e-05, "loss": 0.9547, "step": 98 }, { "epoch": 0.018993261229286074, "grad_norm": 1.4331099288858873, "learning_rate": 1.2611464968152866e-05, "loss": 0.9183, "step": 99 }, { "epoch": 0.019185112352814216, "grad_norm": 1.3279555271336527, "learning_rate": 1.2738853503184714e-05, "loss": 0.9726, "step": 100 }, { "epoch": 0.01937696347634236, "grad_norm": 1.3439149160988995, "learning_rate": 1.2866242038216562e-05, "loss": 0.9528, "step": 101 }, { "epoch": 0.0195688145998705, "grad_norm": 1.2008607132695277, "learning_rate": 1.2993630573248408e-05, "loss": 0.91, "step": 102 }, { "epoch": 0.019760665723398643, "grad_norm": 1.2286594540619995, "learning_rate": 1.3121019108280256e-05, "loss": 0.9282, "step": 103 }, { "epoch": 0.019952516846926786, "grad_norm": 1.2909190228771434, "learning_rate": 1.3248407643312102e-05, "loss": 0.9284, "step": 104 }, { "epoch": 0.020144367970454928, "grad_norm": 1.3293329720848146, "learning_rate": 1.337579617834395e-05, "loss": 0.8985, "step": 105 }, { "epoch": 0.02033621909398307, "grad_norm": 1.5551176531469013, "learning_rate": 1.3503184713375796e-05, "loss": 0.9782, "step": 106 }, { "epoch": 0.020528070217511213, "grad_norm": 1.4465776160710204, "learning_rate": 1.3630573248407644e-05, "loss": 0.9398, "step": 107 }, { "epoch": 0.020719921341039352, "grad_norm": 1.5252480522145055, "learning_rate": 1.375796178343949e-05, "loss": 0.9745, "step": 108 }, { "epoch": 0.020911772464567494, "grad_norm": 1.5745223839164513, "learning_rate": 1.3885350318471338e-05, "loss": 0.9041, "step": 109 }, { "epoch": 0.021103623588095637, "grad_norm": 1.2621260033143955, "learning_rate": 1.4012738853503186e-05, "loss": 0.9512, "step": 110 }, { "epoch": 0.02129547471162378, "grad_norm": 1.743590304924422, "learning_rate": 1.4140127388535032e-05, "loss": 0.9379, "step": 111 }, { "epoch": 0.02148732583515192, "grad_norm": 1.3920232326814344, "learning_rate": 1.426751592356688e-05, "loss": 0.9341, "step": 112 }, { "epoch": 0.021679176958680064, "grad_norm": 1.2948064882471486, "learning_rate": 1.4394904458598726e-05, "loss": 0.8481, "step": 113 }, { "epoch": 0.021871028082208206, "grad_norm": 1.2609259927893832, "learning_rate": 1.4522292993630574e-05, "loss": 0.9287, "step": 114 }, { "epoch": 0.02206287920573635, "grad_norm": 1.2760364216967057, "learning_rate": 1.464968152866242e-05, "loss": 0.8783, "step": 115 }, { "epoch": 0.02225473032926449, "grad_norm": 1.233137788054757, "learning_rate": 1.4777070063694268e-05, "loss": 0.9675, "step": 116 }, { "epoch": 0.022446581452792633, "grad_norm": 1.177367012051211, "learning_rate": 1.4904458598726114e-05, "loss": 0.991, "step": 117 }, { "epoch": 0.022638432576320776, "grad_norm": 1.3439703793390037, "learning_rate": 1.5031847133757964e-05, "loss": 0.8878, "step": 118 }, { "epoch": 0.022830283699848918, "grad_norm": 1.326298899880249, "learning_rate": 1.5159235668789811e-05, "loss": 0.8743, "step": 119 }, { "epoch": 0.02302213482337706, "grad_norm": 1.1256162650108632, "learning_rate": 1.528662420382166e-05, "loss": 0.9298, "step": 120 }, { "epoch": 0.023213985946905203, "grad_norm": 1.2049797810571379, "learning_rate": 1.5414012738853506e-05, "loss": 0.9786, "step": 121 }, { "epoch": 0.023405837070433345, "grad_norm": 1.2479861316154546, "learning_rate": 1.5541401273885352e-05, "loss": 0.9142, "step": 122 }, { "epoch": 0.023597688193961484, "grad_norm": 1.1512372831554425, "learning_rate": 1.56687898089172e-05, "loss": 0.962, "step": 123 }, { "epoch": 0.023789539317489627, "grad_norm": 1.4258340408809904, "learning_rate": 1.5796178343949047e-05, "loss": 0.9389, "step": 124 }, { "epoch": 0.02398139044101777, "grad_norm": 1.6503194972182587, "learning_rate": 1.5923566878980894e-05, "loss": 0.9883, "step": 125 }, { "epoch": 0.02417324156454591, "grad_norm": 1.1928805966509746, "learning_rate": 1.605095541401274e-05, "loss": 0.961, "step": 126 }, { "epoch": 0.024365092688074054, "grad_norm": 1.2022664902140943, "learning_rate": 1.617834394904459e-05, "loss": 0.9424, "step": 127 }, { "epoch": 0.024556943811602196, "grad_norm": 1.2713821048408975, "learning_rate": 1.6305732484076436e-05, "loss": 0.9645, "step": 128 }, { "epoch": 0.02474879493513034, "grad_norm": 1.2391948077600345, "learning_rate": 1.643312101910828e-05, "loss": 0.9384, "step": 129 }, { "epoch": 0.02494064605865848, "grad_norm": 1.442912629758886, "learning_rate": 1.6560509554140128e-05, "loss": 0.9167, "step": 130 }, { "epoch": 0.025132497182186624, "grad_norm": 1.2893615801065443, "learning_rate": 1.6687898089171977e-05, "loss": 0.9488, "step": 131 }, { "epoch": 0.025324348305714766, "grad_norm": 1.3145919533263781, "learning_rate": 1.6815286624203824e-05, "loss": 0.8992, "step": 132 }, { "epoch": 0.02551619942924291, "grad_norm": 1.2215847461955838, "learning_rate": 1.694267515923567e-05, "loss": 0.9224, "step": 133 }, { "epoch": 0.02570805055277105, "grad_norm": 1.1833688575491863, "learning_rate": 1.707006369426752e-05, "loss": 0.9432, "step": 134 }, { "epoch": 0.025899901676299193, "grad_norm": 1.1295606344598794, "learning_rate": 1.7197452229299365e-05, "loss": 0.895, "step": 135 }, { "epoch": 0.026091752799827336, "grad_norm": 1.3165199876964537, "learning_rate": 1.732484076433121e-05, "loss": 0.9631, "step": 136 }, { "epoch": 0.026283603923355475, "grad_norm": 1.3190281476012027, "learning_rate": 1.7452229299363058e-05, "loss": 0.8903, "step": 137 }, { "epoch": 0.026475455046883617, "grad_norm": 1.0897116252277774, "learning_rate": 1.7579617834394907e-05, "loss": 0.9124, "step": 138 }, { "epoch": 0.02666730617041176, "grad_norm": 1.2514783450387368, "learning_rate": 1.7707006369426754e-05, "loss": 0.8844, "step": 139 }, { "epoch": 0.026859157293939902, "grad_norm": 1.3201744283314951, "learning_rate": 1.78343949044586e-05, "loss": 0.8913, "step": 140 }, { "epoch": 0.027051008417468044, "grad_norm": 1.601058905503655, "learning_rate": 1.796178343949045e-05, "loss": 0.9729, "step": 141 }, { "epoch": 0.027242859540996187, "grad_norm": 1.075044597947074, "learning_rate": 1.8089171974522295e-05, "loss": 0.9892, "step": 142 }, { "epoch": 0.02743471066452433, "grad_norm": 1.2868352424264113, "learning_rate": 1.821656050955414e-05, "loss": 0.8692, "step": 143 }, { "epoch": 0.02762656178805247, "grad_norm": 1.2837791614059528, "learning_rate": 1.8343949044585988e-05, "loss": 0.8652, "step": 144 }, { "epoch": 0.027818412911580614, "grad_norm": 1.585747831608059, "learning_rate": 1.8471337579617837e-05, "loss": 0.9103, "step": 145 }, { "epoch": 0.028010264035108756, "grad_norm": 1.3402364735136298, "learning_rate": 1.8598726114649684e-05, "loss": 0.9083, "step": 146 }, { "epoch": 0.0282021151586369, "grad_norm": 1.2769484830113353, "learning_rate": 1.872611464968153e-05, "loss": 0.9247, "step": 147 }, { "epoch": 0.02839396628216504, "grad_norm": 1.1928041597186112, "learning_rate": 1.8853503184713376e-05, "loss": 0.9204, "step": 148 }, { "epoch": 0.028585817405693183, "grad_norm": 0.9577799946180064, "learning_rate": 1.8980891719745225e-05, "loss": 0.298, "step": 149 }, { "epoch": 0.028777668529221326, "grad_norm": 1.312993897414373, "learning_rate": 1.910828025477707e-05, "loss": 0.912, "step": 150 }, { "epoch": 0.028969519652749465, "grad_norm": 1.4013039764146114, "learning_rate": 1.9235668789808918e-05, "loss": 1.0048, "step": 151 }, { "epoch": 0.029161370776277607, "grad_norm": 1.1103378213229542, "learning_rate": 1.9363057324840767e-05, "loss": 0.8725, "step": 152 }, { "epoch": 0.02935322189980575, "grad_norm": 1.1603361094065372, "learning_rate": 1.9490445859872614e-05, "loss": 0.9576, "step": 153 }, { "epoch": 0.029545073023333892, "grad_norm": 1.3685060253976191, "learning_rate": 1.961783439490446e-05, "loss": 0.8828, "step": 154 }, { "epoch": 0.029736924146862034, "grad_norm": 1.2772235332949726, "learning_rate": 1.9745222929936306e-05, "loss": 0.9252, "step": 155 }, { "epoch": 0.029928775270390177, "grad_norm": 1.1985776083342456, "learning_rate": 1.9872611464968155e-05, "loss": 0.9305, "step": 156 }, { "epoch": 0.03012062639391832, "grad_norm": 1.3645326826916575, "learning_rate": 2e-05, "loss": 0.8919, "step": 157 }, { "epoch": 0.03031247751744646, "grad_norm": 1.3520368160947174, "learning_rate": 1.9999998068799278e-05, "loss": 0.9341, "step": 158 }, { "epoch": 0.030504328640974604, "grad_norm": 1.0981412066651774, "learning_rate": 1.9999992275197847e-05, "loss": 0.8879, "step": 159 }, { "epoch": 0.030696179764502746, "grad_norm": 1.218753396150103, "learning_rate": 1.999998261919795e-05, "loss": 0.9531, "step": 160 }, { "epoch": 0.03088803088803089, "grad_norm": 1.325415170752261, "learning_rate": 1.999996910080331e-05, "loss": 0.9642, "step": 161 }, { "epoch": 0.03107988201155903, "grad_norm": 1.1204531273360994, "learning_rate": 1.9999951720019162e-05, "loss": 0.9084, "step": 162 }, { "epoch": 0.031271733135087174, "grad_norm": 1.2077246460429112, "learning_rate": 1.9999930476852207e-05, "loss": 0.9878, "step": 163 }, { "epoch": 0.03146358425861531, "grad_norm": 1.3846423427171772, "learning_rate": 1.9999905371310653e-05, "loss": 0.9112, "step": 164 }, { "epoch": 0.03165543538214346, "grad_norm": 1.3838513118204594, "learning_rate": 1.99998764034042e-05, "loss": 0.9251, "step": 165 }, { "epoch": 0.0318472865056716, "grad_norm": 1.329335410865676, "learning_rate": 1.999984357314403e-05, "loss": 0.9641, "step": 166 }, { "epoch": 0.03203913762919974, "grad_norm": 1.2176571677660535, "learning_rate": 1.999980688054283e-05, "loss": 0.9871, "step": 167 }, { "epoch": 0.03223098875272788, "grad_norm": 2.0796859620372126, "learning_rate": 1.9999766325614767e-05, "loss": 0.3145, "step": 168 }, { "epoch": 0.03242283987625603, "grad_norm": 1.4194911603538591, "learning_rate": 1.9999721908375512e-05, "loss": 0.9959, "step": 169 }, { "epoch": 0.03261469099978417, "grad_norm": 1.1072763964120214, "learning_rate": 1.9999673628842214e-05, "loss": 0.9899, "step": 170 }, { "epoch": 0.03280654212331231, "grad_norm": 1.3434244641156128, "learning_rate": 1.9999621487033524e-05, "loss": 0.9895, "step": 171 }, { "epoch": 0.03299839324684045, "grad_norm": 1.2278158862573196, "learning_rate": 1.999956548296958e-05, "loss": 0.9073, "step": 172 }, { "epoch": 0.03319024437036859, "grad_norm": 1.360012757761412, "learning_rate": 1.999950561667201e-05, "loss": 0.9041, "step": 173 }, { "epoch": 0.033382095493896737, "grad_norm": 1.3420534290494168, "learning_rate": 1.9999441888163945e-05, "loss": 0.9687, "step": 174 }, { "epoch": 0.033573946617424875, "grad_norm": 1.2097075883347397, "learning_rate": 1.999937429746999e-05, "loss": 0.9313, "step": 175 }, { "epoch": 0.03376579774095302, "grad_norm": 1.1645915757504182, "learning_rate": 1.9999302844616256e-05, "loss": 0.9466, "step": 176 }, { "epoch": 0.03395764886448116, "grad_norm": 1.23521355249762, "learning_rate": 1.9999227529630343e-05, "loss": 0.9205, "step": 177 }, { "epoch": 0.034149499988009306, "grad_norm": 1.20782608520942, "learning_rate": 1.9999148352541334e-05, "loss": 0.9071, "step": 178 }, { "epoch": 0.034341351111537445, "grad_norm": 1.1147506176080564, "learning_rate": 1.999906531337982e-05, "loss": 0.9397, "step": 179 }, { "epoch": 0.03453320223506559, "grad_norm": 1.0865806136806266, "learning_rate": 1.9998978412177866e-05, "loss": 0.9253, "step": 180 }, { "epoch": 0.03472505335859373, "grad_norm": 1.300302999418569, "learning_rate": 1.9998887648969038e-05, "loss": 0.9094, "step": 181 }, { "epoch": 0.034916904482121876, "grad_norm": 1.28821543900934, "learning_rate": 1.9998793023788395e-05, "loss": 0.9518, "step": 182 }, { "epoch": 0.035108755605650015, "grad_norm": 1.2592716549579113, "learning_rate": 1.999869453667248e-05, "loss": 0.8812, "step": 183 }, { "epoch": 0.03530060672917816, "grad_norm": 1.3316035030852174, "learning_rate": 1.9998592187659343e-05, "loss": 0.8975, "step": 184 }, { "epoch": 0.0354924578527063, "grad_norm": 1.2336596231345158, "learning_rate": 1.9998485976788506e-05, "loss": 1.0029, "step": 185 }, { "epoch": 0.035684308976234445, "grad_norm": 1.4185048737581714, "learning_rate": 1.9998375904100996e-05, "loss": 0.9098, "step": 186 }, { "epoch": 0.035876160099762584, "grad_norm": 1.195427839914222, "learning_rate": 1.9998261969639324e-05, "loss": 0.897, "step": 187 }, { "epoch": 0.03606801122329072, "grad_norm": 1.1645383524866624, "learning_rate": 1.99981441734475e-05, "loss": 0.9479, "step": 188 }, { "epoch": 0.03625986234681887, "grad_norm": 1.3345740047101402, "learning_rate": 1.999802251557102e-05, "loss": 0.9448, "step": 189 }, { "epoch": 0.03645171347034701, "grad_norm": 1.0903371753306024, "learning_rate": 1.999789699605687e-05, "loss": 0.9277, "step": 190 }, { "epoch": 0.036643564593875154, "grad_norm": 1.2202546998223112, "learning_rate": 1.9997767614953536e-05, "loss": 0.9187, "step": 191 }, { "epoch": 0.03683541571740329, "grad_norm": 1.1654721596489077, "learning_rate": 1.9997634372310987e-05, "loss": 0.8666, "step": 192 }, { "epoch": 0.03702726684093144, "grad_norm": 1.2381453468755688, "learning_rate": 1.999749726818069e-05, "loss": 0.908, "step": 193 }, { "epoch": 0.03721911796445958, "grad_norm": 1.398610901670479, "learning_rate": 1.9997356302615594e-05, "loss": 0.9035, "step": 194 }, { "epoch": 0.037410969087987724, "grad_norm": 1.2084639046844299, "learning_rate": 1.999721147567015e-05, "loss": 0.9163, "step": 195 }, { "epoch": 0.03760282021151586, "grad_norm": 1.32598363953888, "learning_rate": 1.9997062787400298e-05, "loss": 0.8804, "step": 196 }, { "epoch": 0.03779467133504401, "grad_norm": 1.5672689019269657, "learning_rate": 1.999691023786346e-05, "loss": 0.3541, "step": 197 }, { "epoch": 0.03798652245857215, "grad_norm": 1.2285463026279295, "learning_rate": 1.9996753827118565e-05, "loss": 0.9125, "step": 198 }, { "epoch": 0.03817837358210029, "grad_norm": 1.2971402924384812, "learning_rate": 1.999659355522602e-05, "loss": 0.9145, "step": 199 }, { "epoch": 0.03837022470562843, "grad_norm": 1.2585514659778703, "learning_rate": 1.999642942224773e-05, "loss": 0.8916, "step": 200 }, { "epoch": 0.03856207582915658, "grad_norm": 1.2258335801341402, "learning_rate": 1.999626142824709e-05, "loss": 0.9315, "step": 201 }, { "epoch": 0.03875392695268472, "grad_norm": 1.3301870696555773, "learning_rate": 1.9996089573288985e-05, "loss": 0.9707, "step": 202 }, { "epoch": 0.038945778076212856, "grad_norm": 1.4361620617597899, "learning_rate": 1.9995913857439792e-05, "loss": 0.9034, "step": 203 }, { "epoch": 0.039137629199741, "grad_norm": 1.1106928773763434, "learning_rate": 1.9995734280767382e-05, "loss": 0.3359, "step": 204 }, { "epoch": 0.03932948032326914, "grad_norm": 1.0786358261493851, "learning_rate": 1.9995550843341116e-05, "loss": 0.8956, "step": 205 }, { "epoch": 0.039521331446797286, "grad_norm": 1.2348423531901596, "learning_rate": 1.9995363545231832e-05, "loss": 0.9395, "step": 206 }, { "epoch": 0.039713182570325425, "grad_norm": 1.2162075217269144, "learning_rate": 1.9995172386511893e-05, "loss": 0.8734, "step": 207 }, { "epoch": 0.03990503369385357, "grad_norm": 0.7248367693609558, "learning_rate": 1.9994977367255117e-05, "loss": 0.3029, "step": 208 }, { "epoch": 0.04009688481738171, "grad_norm": 1.213023307507464, "learning_rate": 1.9994778487536833e-05, "loss": 0.9237, "step": 209 }, { "epoch": 0.040288735940909856, "grad_norm": 1.2520057486091567, "learning_rate": 1.9994575747433855e-05, "loss": 0.8673, "step": 210 }, { "epoch": 0.040480587064437995, "grad_norm": 0.6911705798634095, "learning_rate": 1.999436914702449e-05, "loss": 0.2954, "step": 211 }, { "epoch": 0.04067243818796614, "grad_norm": 1.3026823022423344, "learning_rate": 1.9994158686388535e-05, "loss": 0.9131, "step": 212 }, { "epoch": 0.04086428931149428, "grad_norm": 1.1905613386134644, "learning_rate": 1.9993944365607283e-05, "loss": 0.9047, "step": 213 }, { "epoch": 0.041056140435022426, "grad_norm": 1.2658659508512689, "learning_rate": 1.9993726184763506e-05, "loss": 0.9195, "step": 214 }, { "epoch": 0.041247991558550565, "grad_norm": 1.1428035943397308, "learning_rate": 1.9993504143941478e-05, "loss": 0.8748, "step": 215 }, { "epoch": 0.041439842682078704, "grad_norm": 1.3082175423709015, "learning_rate": 1.9993278243226958e-05, "loss": 1.0251, "step": 216 }, { "epoch": 0.04163169380560685, "grad_norm": 1.6144736190714037, "learning_rate": 1.99930484827072e-05, "loss": 0.8756, "step": 217 }, { "epoch": 0.04182354492913499, "grad_norm": 1.1243765992106414, "learning_rate": 1.9992814862470947e-05, "loss": 0.9011, "step": 218 }, { "epoch": 0.042015396052663134, "grad_norm": 1.4812192426572233, "learning_rate": 1.9992577382608434e-05, "loss": 0.8821, "step": 219 }, { "epoch": 0.04220724717619127, "grad_norm": 1.1153246316900913, "learning_rate": 1.999233604321138e-05, "loss": 0.9842, "step": 220 }, { "epoch": 0.04239909829971942, "grad_norm": 0.9949409127358709, "learning_rate": 1.9992090844373003e-05, "loss": 0.8945, "step": 221 }, { "epoch": 0.04259094942324756, "grad_norm": 1.424953993908323, "learning_rate": 1.999184178618801e-05, "loss": 0.8802, "step": 222 }, { "epoch": 0.042782800546775704, "grad_norm": 0.9660130047667277, "learning_rate": 1.9991588868752594e-05, "loss": 0.3115, "step": 223 }, { "epoch": 0.04297465167030384, "grad_norm": 1.1269390528487209, "learning_rate": 1.9991332092164442e-05, "loss": 0.9259, "step": 224 }, { "epoch": 0.04316650279383199, "grad_norm": 1.108922124892747, "learning_rate": 1.999107145652274e-05, "loss": 0.9252, "step": 225 }, { "epoch": 0.04335835391736013, "grad_norm": 1.426236095075045, "learning_rate": 1.9990806961928143e-05, "loss": 0.9678, "step": 226 }, { "epoch": 0.043550205040888273, "grad_norm": 1.2615527929980408, "learning_rate": 1.9990538608482816e-05, "loss": 0.8858, "step": 227 }, { "epoch": 0.04374205616441641, "grad_norm": 1.1705525825410092, "learning_rate": 1.999026639629041e-05, "loss": 0.8475, "step": 228 }, { "epoch": 0.04393390728794456, "grad_norm": 1.1300392299632758, "learning_rate": 1.9989990325456058e-05, "loss": 0.9584, "step": 229 }, { "epoch": 0.0441257584114727, "grad_norm": 1.1469826506344112, "learning_rate": 1.9989710396086396e-05, "loss": 0.9093, "step": 230 }, { "epoch": 0.044317609535000836, "grad_norm": 1.265295276915559, "learning_rate": 1.9989426608289545e-05, "loss": 0.9075, "step": 231 }, { "epoch": 0.04450946065852898, "grad_norm": 1.1396835613054541, "learning_rate": 1.9989138962175105e-05, "loss": 0.9427, "step": 232 }, { "epoch": 0.04470131178205712, "grad_norm": 1.2747908396407557, "learning_rate": 1.9988847457854182e-05, "loss": 0.8044, "step": 233 }, { "epoch": 0.04489316290558527, "grad_norm": 1.1655553024440137, "learning_rate": 1.9988552095439372e-05, "loss": 0.8221, "step": 234 }, { "epoch": 0.045085014029113406, "grad_norm": 1.0580182641787153, "learning_rate": 1.9988252875044752e-05, "loss": 0.8628, "step": 235 }, { "epoch": 0.04527686515264155, "grad_norm": 1.1009810287332455, "learning_rate": 1.9987949796785892e-05, "loss": 0.8784, "step": 236 }, { "epoch": 0.04546871627616969, "grad_norm": 1.1525255144637883, "learning_rate": 1.998764286077985e-05, "loss": 0.9617, "step": 237 }, { "epoch": 0.045660567399697836, "grad_norm": 1.3009868814102035, "learning_rate": 1.998733206714518e-05, "loss": 0.8767, "step": 238 }, { "epoch": 0.045852418523225975, "grad_norm": 1.2577343066742988, "learning_rate": 1.998701741600193e-05, "loss": 0.9858, "step": 239 }, { "epoch": 0.04604426964675412, "grad_norm": 1.2000624099309805, "learning_rate": 1.998669890747162e-05, "loss": 0.9129, "step": 240 }, { "epoch": 0.04623612077028226, "grad_norm": 1.199396765155728, "learning_rate": 1.998637654167727e-05, "loss": 0.9149, "step": 241 }, { "epoch": 0.046427971893810406, "grad_norm": 1.0524870314289279, "learning_rate": 1.9986050318743406e-05, "loss": 0.9626, "step": 242 }, { "epoch": 0.046619823017338545, "grad_norm": 1.1179155212986773, "learning_rate": 1.9985720238796012e-05, "loss": 0.9023, "step": 243 }, { "epoch": 0.04681167414086669, "grad_norm": 1.029677411721405, "learning_rate": 1.9985386301962585e-05, "loss": 0.8999, "step": 244 }, { "epoch": 0.04700352526439483, "grad_norm": 1.0714546393541904, "learning_rate": 1.9985048508372103e-05, "loss": 0.9338, "step": 245 }, { "epoch": 0.04719537638792297, "grad_norm": 1.2137229634532312, "learning_rate": 1.9984706858155037e-05, "loss": 0.889, "step": 246 }, { "epoch": 0.047387227511451115, "grad_norm": 1.3318572818958774, "learning_rate": 1.9984361351443343e-05, "loss": 0.8592, "step": 247 }, { "epoch": 0.047579078634979254, "grad_norm": 1.0084790926035703, "learning_rate": 1.9984011988370478e-05, "loss": 0.875, "step": 248 }, { "epoch": 0.0477709297585074, "grad_norm": 1.0552198395944588, "learning_rate": 1.998365876907137e-05, "loss": 0.8879, "step": 249 }, { "epoch": 0.04796278088203554, "grad_norm": 1.054518494295875, "learning_rate": 1.9983301693682452e-05, "loss": 0.9012, "step": 250 }, { "epoch": 0.048154632005563684, "grad_norm": 1.1991196653158753, "learning_rate": 1.9982940762341637e-05, "loss": 0.9081, "step": 251 }, { "epoch": 0.04834648312909182, "grad_norm": 0.9785951042069102, "learning_rate": 1.9982575975188337e-05, "loss": 0.875, "step": 252 }, { "epoch": 0.04853833425261997, "grad_norm": 1.1015356892020538, "learning_rate": 1.9982207332363442e-05, "loss": 0.8843, "step": 253 }, { "epoch": 0.04873018537614811, "grad_norm": 1.2704324490573589, "learning_rate": 1.9981834834009337e-05, "loss": 0.8667, "step": 254 }, { "epoch": 0.048922036499676254, "grad_norm": 1.4302499898268728, "learning_rate": 1.99814584802699e-05, "loss": 0.9383, "step": 255 }, { "epoch": 0.04911388762320439, "grad_norm": 1.1386496615007924, "learning_rate": 1.998107827129049e-05, "loss": 0.8776, "step": 256 }, { "epoch": 0.04930573874673254, "grad_norm": 1.056051128386424, "learning_rate": 1.9980694207217964e-05, "loss": 0.9205, "step": 257 }, { "epoch": 0.04949758987026068, "grad_norm": 1.1042931774030103, "learning_rate": 1.9980306288200655e-05, "loss": 0.8929, "step": 258 }, { "epoch": 0.049689440993788817, "grad_norm": 1.0238245440642695, "learning_rate": 1.9979914514388397e-05, "loss": 0.8615, "step": 259 }, { "epoch": 0.04988129211731696, "grad_norm": 1.135777639599852, "learning_rate": 1.9979518885932512e-05, "loss": 0.7974, "step": 260 }, { "epoch": 0.0500731432408451, "grad_norm": 1.3709598174529176, "learning_rate": 1.9979119402985803e-05, "loss": 0.9024, "step": 261 }, { "epoch": 0.05026499436437325, "grad_norm": 1.4672894048252816, "learning_rate": 1.9978716065702566e-05, "loss": 0.9209, "step": 262 }, { "epoch": 0.050456845487901386, "grad_norm": 1.0519081034374433, "learning_rate": 1.9978308874238595e-05, "loss": 0.9139, "step": 263 }, { "epoch": 0.05064869661142953, "grad_norm": 0.9430869159345551, "learning_rate": 1.9977897828751153e-05, "loss": 0.8047, "step": 264 }, { "epoch": 0.05084054773495767, "grad_norm": 1.010076783138593, "learning_rate": 1.9977482929399e-05, "loss": 0.8946, "step": 265 }, { "epoch": 0.05103239885848582, "grad_norm": 1.1813503761366477, "learning_rate": 1.99770641763424e-05, "loss": 0.953, "step": 266 }, { "epoch": 0.051224249982013956, "grad_norm": 1.263211973925142, "learning_rate": 1.997664156974308e-05, "loss": 0.9203, "step": 267 }, { "epoch": 0.0514161011055421, "grad_norm": 1.0666531789818297, "learning_rate": 1.9976215109764277e-05, "loss": 0.8826, "step": 268 }, { "epoch": 0.05160795222907024, "grad_norm": 0.9504319873701818, "learning_rate": 1.99757847965707e-05, "loss": 0.9053, "step": 269 }, { "epoch": 0.051799803352598386, "grad_norm": 1.105774270619266, "learning_rate": 1.9975350630328558e-05, "loss": 0.8574, "step": 270 }, { "epoch": 0.051991654476126525, "grad_norm": 1.2188264418318984, "learning_rate": 1.9974912611205538e-05, "loss": 0.9131, "step": 271 }, { "epoch": 0.05218350559965467, "grad_norm": 1.0974623282202944, "learning_rate": 1.997447073937083e-05, "loss": 0.9119, "step": 272 }, { "epoch": 0.05237535672318281, "grad_norm": 1.14460802137221, "learning_rate": 1.997402501499509e-05, "loss": 0.9659, "step": 273 }, { "epoch": 0.05256720784671095, "grad_norm": 1.124852681377001, "learning_rate": 1.9973575438250483e-05, "loss": 0.9277, "step": 274 }, { "epoch": 0.052759058970239095, "grad_norm": 1.1031918872472923, "learning_rate": 1.997312200931065e-05, "loss": 0.9096, "step": 275 }, { "epoch": 0.052950910093767234, "grad_norm": 1.0892935735191924, "learning_rate": 1.9972664728350727e-05, "loss": 0.9311, "step": 276 }, { "epoch": 0.05314276121729538, "grad_norm": 1.228966856609846, "learning_rate": 1.9972203595547334e-05, "loss": 0.8866, "step": 277 }, { "epoch": 0.05333461234082352, "grad_norm": 0.9955607196798957, "learning_rate": 1.997173861107858e-05, "loss": 0.3337, "step": 278 }, { "epoch": 0.053526463464351665, "grad_norm": 1.0645413934637633, "learning_rate": 1.9971269775124053e-05, "loss": 0.8762, "step": 279 }, { "epoch": 0.053718314587879804, "grad_norm": 1.254082768323514, "learning_rate": 1.9970797087864842e-05, "loss": 0.8959, "step": 280 }, { "epoch": 0.05391016571140795, "grad_norm": 1.136853511166649, "learning_rate": 1.9970320549483516e-05, "loss": 0.8696, "step": 281 }, { "epoch": 0.05410201683493609, "grad_norm": 1.1162102041585384, "learning_rate": 1.9969840160164134e-05, "loss": 0.9487, "step": 282 }, { "epoch": 0.054293867958464234, "grad_norm": 1.313363990484945, "learning_rate": 1.9969355920092245e-05, "loss": 0.8676, "step": 283 }, { "epoch": 0.05448571908199237, "grad_norm": 1.2553561510659512, "learning_rate": 1.996886782945488e-05, "loss": 0.8797, "step": 284 }, { "epoch": 0.05467757020552052, "grad_norm": 1.1983305810008225, "learning_rate": 1.9968375888440554e-05, "loss": 0.8866, "step": 285 }, { "epoch": 0.05486942132904866, "grad_norm": 1.2326110482200618, "learning_rate": 1.9967880097239282e-05, "loss": 0.8724, "step": 286 }, { "epoch": 0.055061272452576804, "grad_norm": 1.079665060436393, "learning_rate": 1.9967380456042552e-05, "loss": 0.862, "step": 287 }, { "epoch": 0.05525312357610494, "grad_norm": 1.4589673748834715, "learning_rate": 1.996687696504335e-05, "loss": 0.909, "step": 288 }, { "epoch": 0.05544497469963308, "grad_norm": 1.2335701453567334, "learning_rate": 1.9966369624436142e-05, "loss": 0.9052, "step": 289 }, { "epoch": 0.05563682582316123, "grad_norm": 1.2012181122519727, "learning_rate": 1.9965858434416886e-05, "loss": 0.9819, "step": 290 }, { "epoch": 0.055828676946689366, "grad_norm": 1.629311348366643, "learning_rate": 1.9965343395183023e-05, "loss": 0.9171, "step": 291 }, { "epoch": 0.05602052807021751, "grad_norm": 1.0465575975043628, "learning_rate": 1.996482450693348e-05, "loss": 0.9344, "step": 292 }, { "epoch": 0.05621237919374565, "grad_norm": 1.1655567111908163, "learning_rate": 1.9964301769868673e-05, "loss": 0.9204, "step": 293 }, { "epoch": 0.0564042303172738, "grad_norm": 1.176576552748848, "learning_rate": 1.9963775184190508e-05, "loss": 0.9615, "step": 294 }, { "epoch": 0.056596081440801936, "grad_norm": 1.1724327858303611, "learning_rate": 1.9963244750102365e-05, "loss": 0.9797, "step": 295 }, { "epoch": 0.05678793256433008, "grad_norm": 1.1308718982119326, "learning_rate": 1.996271046780913e-05, "loss": 0.8748, "step": 296 }, { "epoch": 0.05697978368785822, "grad_norm": 1.2917248858991601, "learning_rate": 1.9962172337517154e-05, "loss": 0.9063, "step": 297 }, { "epoch": 0.05717163481138637, "grad_norm": 1.0455810509795067, "learning_rate": 1.996163035943429e-05, "loss": 0.8765, "step": 298 }, { "epoch": 0.057363485934914506, "grad_norm": 1.1139705886774427, "learning_rate": 1.996108453376987e-05, "loss": 0.8593, "step": 299 }, { "epoch": 0.05755533705844265, "grad_norm": 1.3555092679106084, "learning_rate": 1.9960534860734718e-05, "loss": 0.8667, "step": 300 }, { "epoch": 0.05774718818197079, "grad_norm": 0.945678688123213, "learning_rate": 1.995998134054113e-05, "loss": 0.847, "step": 301 }, { "epoch": 0.05793903930549893, "grad_norm": 1.0435422787111888, "learning_rate": 1.995942397340291e-05, "loss": 0.9043, "step": 302 }, { "epoch": 0.058130890429027075, "grad_norm": 1.1027993932755438, "learning_rate": 1.9958862759535327e-05, "loss": 0.8627, "step": 303 }, { "epoch": 0.058322741552555214, "grad_norm": 1.0608037722932737, "learning_rate": 1.9958297699155147e-05, "loss": 0.8987, "step": 304 }, { "epoch": 0.05851459267608336, "grad_norm": 1.1880529736027508, "learning_rate": 1.9957728792480617e-05, "loss": 0.9026, "step": 305 }, { "epoch": 0.0587064437996115, "grad_norm": 0.9935086163759201, "learning_rate": 1.995715603973148e-05, "loss": 0.8855, "step": 306 }, { "epoch": 0.058898294923139645, "grad_norm": 1.0568142153580429, "learning_rate": 1.9956579441128942e-05, "loss": 0.8707, "step": 307 }, { "epoch": 0.059090146046667784, "grad_norm": 1.0331854721522176, "learning_rate": 1.9955998996895716e-05, "loss": 0.888, "step": 308 }, { "epoch": 0.05928199717019593, "grad_norm": 1.5783022191119822, "learning_rate": 1.9955414707255998e-05, "loss": 0.9053, "step": 309 }, { "epoch": 0.05947384829372407, "grad_norm": 1.0650473072252957, "learning_rate": 1.995482657243546e-05, "loss": 0.8955, "step": 310 }, { "epoch": 0.059665699417252215, "grad_norm": 0.6811891203134276, "learning_rate": 1.9954234592661258e-05, "loss": 0.2889, "step": 311 }, { "epoch": 0.059857550540780353, "grad_norm": 1.002715642457841, "learning_rate": 1.9953638768162042e-05, "loss": 0.8794, "step": 312 }, { "epoch": 0.0600494016643085, "grad_norm": 0.968980074532701, "learning_rate": 1.9953039099167948e-05, "loss": 0.8433, "step": 313 }, { "epoch": 0.06024125278783664, "grad_norm": 1.0225626836041846, "learning_rate": 1.9952435585910584e-05, "loss": 0.9254, "step": 314 }, { "epoch": 0.060433103911364784, "grad_norm": 1.1087046605010746, "learning_rate": 1.9951828228623057e-05, "loss": 0.9558, "step": 315 }, { "epoch": 0.06062495503489292, "grad_norm": 1.2102187880685795, "learning_rate": 1.9951217027539954e-05, "loss": 0.9484, "step": 316 }, { "epoch": 0.06081680615842106, "grad_norm": 1.1285034214096057, "learning_rate": 1.9950601982897338e-05, "loss": 0.9351, "step": 317 }, { "epoch": 0.06100865728194921, "grad_norm": 1.0365378379580703, "learning_rate": 1.9949983094932773e-05, "loss": 0.8834, "step": 318 }, { "epoch": 0.06120050840547735, "grad_norm": 1.2086173088858028, "learning_rate": 1.994936036388529e-05, "loss": 0.8257, "step": 319 }, { "epoch": 0.06139235952900549, "grad_norm": 1.0645262218464433, "learning_rate": 1.9948733789995417e-05, "loss": 0.893, "step": 320 }, { "epoch": 0.06158421065253363, "grad_norm": 1.2503134952005224, "learning_rate": 1.9948103373505163e-05, "loss": 0.9074, "step": 321 }, { "epoch": 0.06177606177606178, "grad_norm": 1.135898105461334, "learning_rate": 1.994746911465802e-05, "loss": 0.876, "step": 322 }, { "epoch": 0.061967912899589916, "grad_norm": 1.1501718232900862, "learning_rate": 1.994683101369896e-05, "loss": 0.9605, "step": 323 }, { "epoch": 0.06215976402311806, "grad_norm": 1.054822982259393, "learning_rate": 1.9946189070874447e-05, "loss": 0.8586, "step": 324 }, { "epoch": 0.0623516151466462, "grad_norm": 1.0993199525313848, "learning_rate": 1.9945543286432426e-05, "loss": 0.9223, "step": 325 }, { "epoch": 0.06254346627017435, "grad_norm": 0.7660378019616219, "learning_rate": 1.994489366062232e-05, "loss": 0.3004, "step": 326 }, { "epoch": 0.06273531739370249, "grad_norm": 1.31896701928407, "learning_rate": 1.9944240193695043e-05, "loss": 0.8777, "step": 327 }, { "epoch": 0.06292716851723062, "grad_norm": 1.1599417728971761, "learning_rate": 1.9943582885902995e-05, "loss": 0.8543, "step": 328 }, { "epoch": 0.06311901964075878, "grad_norm": 1.1166011530139688, "learning_rate": 1.994292173750005e-05, "loss": 0.9184, "step": 329 }, { "epoch": 0.06331087076428692, "grad_norm": 1.1315372255239982, "learning_rate": 1.9942256748741567e-05, "loss": 0.8847, "step": 330 }, { "epoch": 0.06350272188781506, "grad_norm": 1.2665291751108985, "learning_rate": 1.9941587919884394e-05, "loss": 0.9089, "step": 331 }, { "epoch": 0.0636945730113432, "grad_norm": 1.0771048151150704, "learning_rate": 1.9940915251186867e-05, "loss": 0.8697, "step": 332 }, { "epoch": 0.06388642413487133, "grad_norm": 1.3139086370604487, "learning_rate": 1.9940238742908786e-05, "loss": 0.8345, "step": 333 }, { "epoch": 0.06407827525839949, "grad_norm": 1.1067625370528482, "learning_rate": 1.9939558395311452e-05, "loss": 0.8805, "step": 334 }, { "epoch": 0.06427012638192763, "grad_norm": 1.0859851358556438, "learning_rate": 1.993887420865764e-05, "loss": 0.9312, "step": 335 }, { "epoch": 0.06446197750545576, "grad_norm": 1.0588150743705962, "learning_rate": 1.9938186183211614e-05, "loss": 0.9089, "step": 336 }, { "epoch": 0.0646538286289839, "grad_norm": 1.1044690637855687, "learning_rate": 1.9937494319239112e-05, "loss": 0.8526, "step": 337 }, { "epoch": 0.06484567975251206, "grad_norm": 1.0527025148328428, "learning_rate": 1.9936798617007364e-05, "loss": 0.9215, "step": 338 }, { "epoch": 0.0650375308760402, "grad_norm": 0.9955755064926194, "learning_rate": 1.993609907678508e-05, "loss": 0.8834, "step": 339 }, { "epoch": 0.06522938199956833, "grad_norm": 1.0047742384494325, "learning_rate": 1.993539569884244e-05, "loss": 0.9481, "step": 340 }, { "epoch": 0.06542123312309647, "grad_norm": 0.7209766929247666, "learning_rate": 1.9934688483451127e-05, "loss": 0.2899, "step": 341 }, { "epoch": 0.06561308424662463, "grad_norm": 1.4858805014413576, "learning_rate": 1.9933977430884294e-05, "loss": 0.9093, "step": 342 }, { "epoch": 0.06580493537015276, "grad_norm": 1.0604918321686714, "learning_rate": 1.9933262541416576e-05, "loss": 0.8652, "step": 343 }, { "epoch": 0.0659967864936809, "grad_norm": 1.404058484932256, "learning_rate": 1.9932543815324093e-05, "loss": 0.8772, "step": 344 }, { "epoch": 0.06618863761720904, "grad_norm": 1.057768028326944, "learning_rate": 1.9931821252884443e-05, "loss": 0.9601, "step": 345 }, { "epoch": 0.06638048874073718, "grad_norm": 1.0065250202127358, "learning_rate": 1.9931094854376716e-05, "loss": 0.9238, "step": 346 }, { "epoch": 0.06657233986426533, "grad_norm": 1.0989254255996719, "learning_rate": 1.9930364620081467e-05, "loss": 0.9106, "step": 347 }, { "epoch": 0.06676419098779347, "grad_norm": 1.1051193300547548, "learning_rate": 1.992963055028075e-05, "loss": 0.9359, "step": 348 }, { "epoch": 0.06695604211132161, "grad_norm": 1.4480649236486005, "learning_rate": 1.9928892645258088e-05, "loss": 0.9039, "step": 349 }, { "epoch": 0.06714789323484975, "grad_norm": 0.6258708442303733, "learning_rate": 1.992815090529849e-05, "loss": 0.3241, "step": 350 }, { "epoch": 0.0673397443583779, "grad_norm": 1.0808590523740595, "learning_rate": 1.9927405330688444e-05, "loss": 0.9333, "step": 351 }, { "epoch": 0.06753159548190604, "grad_norm": 1.2687763850485247, "learning_rate": 1.9926655921715924e-05, "loss": 0.9108, "step": 352 }, { "epoch": 0.06772344660543418, "grad_norm": 1.0928303101618706, "learning_rate": 1.992590267867038e-05, "loss": 0.9311, "step": 353 }, { "epoch": 0.06791529772896232, "grad_norm": 1.1701408625648029, "learning_rate": 1.9925145601842747e-05, "loss": 0.9671, "step": 354 }, { "epoch": 0.06810714885249047, "grad_norm": 1.428048736037287, "learning_rate": 1.9924384691525435e-05, "loss": 0.89, "step": 355 }, { "epoch": 0.06829899997601861, "grad_norm": 1.0854072545522542, "learning_rate": 1.9923619948012338e-05, "loss": 0.8959, "step": 356 }, { "epoch": 0.06849085109954675, "grad_norm": 0.9171112018948836, "learning_rate": 1.9922851371598834e-05, "loss": 0.8651, "step": 357 }, { "epoch": 0.06868270222307489, "grad_norm": 1.1125770733351514, "learning_rate": 1.9922078962581777e-05, "loss": 0.8977, "step": 358 }, { "epoch": 0.06887455334660304, "grad_norm": 1.140580676979817, "learning_rate": 1.9921302721259502e-05, "loss": 0.9073, "step": 359 }, { "epoch": 0.06906640447013118, "grad_norm": 1.2484277981029028, "learning_rate": 1.9920522647931826e-05, "loss": 0.8506, "step": 360 }, { "epoch": 0.06925825559365932, "grad_norm": 1.0343658367959077, "learning_rate": 1.9919738742900044e-05, "loss": 0.927, "step": 361 }, { "epoch": 0.06945010671718746, "grad_norm": 1.1757318655308635, "learning_rate": 1.9918951006466927e-05, "loss": 0.9349, "step": 362 }, { "epoch": 0.0696419578407156, "grad_norm": 0.658140852662941, "learning_rate": 1.9918159438936735e-05, "loss": 0.2944, "step": 363 }, { "epoch": 0.06983380896424375, "grad_norm": 1.0807163925824572, "learning_rate": 1.9917364040615204e-05, "loss": 0.8674, "step": 364 }, { "epoch": 0.07002566008777189, "grad_norm": 1.2343342733061253, "learning_rate": 1.9916564811809543e-05, "loss": 0.9382, "step": 365 }, { "epoch": 0.07021751121130003, "grad_norm": 1.1360412674320075, "learning_rate": 1.9915761752828457e-05, "loss": 0.8622, "step": 366 }, { "epoch": 0.07040936233482817, "grad_norm": 1.121745423300933, "learning_rate": 1.9914954863982106e-05, "loss": 0.8813, "step": 367 }, { "epoch": 0.07060121345835632, "grad_norm": 1.088653087738131, "learning_rate": 1.9914144145582155e-05, "loss": 0.9115, "step": 368 }, { "epoch": 0.07079306458188446, "grad_norm": 1.3558966409543955, "learning_rate": 1.9913329597941727e-05, "loss": 0.8607, "step": 369 }, { "epoch": 0.0709849157054126, "grad_norm": 1.0857031063983287, "learning_rate": 1.991251122137544e-05, "loss": 0.9143, "step": 370 }, { "epoch": 0.07117676682894074, "grad_norm": 1.06436022172851, "learning_rate": 1.991168901619938e-05, "loss": 0.8626, "step": 371 }, { "epoch": 0.07136861795246889, "grad_norm": 1.0491078775544957, "learning_rate": 1.9910862982731114e-05, "loss": 0.9455, "step": 372 }, { "epoch": 0.07156046907599703, "grad_norm": 0.9771478232319321, "learning_rate": 1.9910033121289695e-05, "loss": 0.897, "step": 373 }, { "epoch": 0.07175232019952517, "grad_norm": 1.0267273399994243, "learning_rate": 1.9909199432195644e-05, "loss": 0.8394, "step": 374 }, { "epoch": 0.07194417132305331, "grad_norm": 0.9782775167162667, "learning_rate": 1.9908361915770962e-05, "loss": 0.8374, "step": 375 }, { "epoch": 0.07213602244658145, "grad_norm": 1.2244652467284245, "learning_rate": 1.9907520572339143e-05, "loss": 0.8775, "step": 376 }, { "epoch": 0.0723278735701096, "grad_norm": 1.1835325897402336, "learning_rate": 1.9906675402225137e-05, "loss": 0.8969, "step": 377 }, { "epoch": 0.07251972469363774, "grad_norm": 1.0805795772083102, "learning_rate": 1.9905826405755388e-05, "loss": 0.8986, "step": 378 }, { "epoch": 0.07271157581716588, "grad_norm": 1.1060987892818643, "learning_rate": 1.990497358325781e-05, "loss": 0.8721, "step": 379 }, { "epoch": 0.07290342694069402, "grad_norm": 1.0960456505201468, "learning_rate": 1.99041169350618e-05, "loss": 0.895, "step": 380 }, { "epoch": 0.07309527806422217, "grad_norm": 1.2215293216641312, "learning_rate": 1.9903256461498226e-05, "loss": 0.8625, "step": 381 }, { "epoch": 0.07328712918775031, "grad_norm": 1.1204414529937496, "learning_rate": 1.990239216289944e-05, "loss": 0.8396, "step": 382 }, { "epoch": 0.07347898031127845, "grad_norm": 1.0996494325869748, "learning_rate": 1.9901524039599266e-05, "loss": 0.903, "step": 383 }, { "epoch": 0.07367083143480659, "grad_norm": 1.0359075611557327, "learning_rate": 1.9900652091933015e-05, "loss": 0.8273, "step": 384 }, { "epoch": 0.07386268255833474, "grad_norm": 0.7315635258702436, "learning_rate": 1.989977632023746e-05, "loss": 0.3181, "step": 385 }, { "epoch": 0.07405453368186288, "grad_norm": 0.927393198706477, "learning_rate": 1.9898896724850863e-05, "loss": 0.9042, "step": 386 }, { "epoch": 0.07424638480539102, "grad_norm": 1.3703852191527985, "learning_rate": 1.989801330611296e-05, "loss": 0.8745, "step": 387 }, { "epoch": 0.07443823592891916, "grad_norm": 1.2094350481101228, "learning_rate": 1.9897126064364964e-05, "loss": 0.7813, "step": 388 }, { "epoch": 0.0746300870524473, "grad_norm": 1.0955956904463646, "learning_rate": 1.9896234999949558e-05, "loss": 0.8668, "step": 389 }, { "epoch": 0.07482193817597545, "grad_norm": 1.0361182339216932, "learning_rate": 1.989534011321091e-05, "loss": 0.9612, "step": 390 }, { "epoch": 0.07501378929950359, "grad_norm": 1.063054665156223, "learning_rate": 1.9894441404494657e-05, "loss": 0.8983, "step": 391 }, { "epoch": 0.07520564042303172, "grad_norm": 0.6590873081885287, "learning_rate": 1.9893538874147928e-05, "loss": 0.2697, "step": 392 }, { "epoch": 0.07539749154655986, "grad_norm": 1.1240665078490395, "learning_rate": 1.9892632522519305e-05, "loss": 0.9012, "step": 393 }, { "epoch": 0.07558934267008802, "grad_norm": 1.1621325844127357, "learning_rate": 1.989172234995886e-05, "loss": 0.8828, "step": 394 }, { "epoch": 0.07578119379361616, "grad_norm": 1.111085671239978, "learning_rate": 1.989080835681814e-05, "loss": 0.8806, "step": 395 }, { "epoch": 0.0759730449171443, "grad_norm": 0.6486805998535944, "learning_rate": 1.9889890543450166e-05, "loss": 0.2979, "step": 396 }, { "epoch": 0.07616489604067243, "grad_norm": 1.1650683182966006, "learning_rate": 1.9888968910209433e-05, "loss": 0.9759, "step": 397 }, { "epoch": 0.07635674716420059, "grad_norm": 1.0948637517569064, "learning_rate": 1.9888043457451915e-05, "loss": 0.8452, "step": 398 }, { "epoch": 0.07654859828772873, "grad_norm": 1.0278062631674778, "learning_rate": 1.9887114185535055e-05, "loss": 0.9268, "step": 399 }, { "epoch": 0.07674044941125686, "grad_norm": 1.021742291076481, "learning_rate": 1.9886181094817777e-05, "loss": 0.9469, "step": 400 }, { "epoch": 0.076932300534785, "grad_norm": 0.9550488546391342, "learning_rate": 1.9885244185660482e-05, "loss": 0.8973, "step": 401 }, { "epoch": 0.07712415165831316, "grad_norm": 1.0153190140626984, "learning_rate": 1.988430345842504e-05, "loss": 0.8813, "step": 402 }, { "epoch": 0.0773160027818413, "grad_norm": 0.9997473647722896, "learning_rate": 1.988335891347479e-05, "loss": 0.8972, "step": 403 }, { "epoch": 0.07750785390536943, "grad_norm": 0.9333877177942593, "learning_rate": 1.9882410551174563e-05, "loss": 0.8517, "step": 404 }, { "epoch": 0.07769970502889757, "grad_norm": 0.6674013225119231, "learning_rate": 1.988145837189065e-05, "loss": 0.2699, "step": 405 }, { "epoch": 0.07789155615242571, "grad_norm": 1.02619178448356, "learning_rate": 1.9880502375990823e-05, "loss": 0.8642, "step": 406 }, { "epoch": 0.07808340727595386, "grad_norm": 1.1339058386147571, "learning_rate": 1.9879542563844324e-05, "loss": 0.8335, "step": 407 }, { "epoch": 0.078275258399482, "grad_norm": 1.0800226574714633, "learning_rate": 1.987857893582187e-05, "loss": 0.8577, "step": 408 }, { "epoch": 0.07846710952301014, "grad_norm": 1.0239143089041285, "learning_rate": 1.9877611492295655e-05, "loss": 0.9058, "step": 409 }, { "epoch": 0.07865896064653828, "grad_norm": 0.6419099052958638, "learning_rate": 1.9876640233639346e-05, "loss": 0.2947, "step": 410 }, { "epoch": 0.07885081177006643, "grad_norm": 1.2476955548485833, "learning_rate": 1.987566516022808e-05, "loss": 0.9341, "step": 411 }, { "epoch": 0.07904266289359457, "grad_norm": 1.1045604867766547, "learning_rate": 1.9874686272438467e-05, "loss": 0.9644, "step": 412 }, { "epoch": 0.07923451401712271, "grad_norm": 1.320502369209586, "learning_rate": 1.9873703570648598e-05, "loss": 0.9126, "step": 413 }, { "epoch": 0.07942636514065085, "grad_norm": 0.927402733082516, "learning_rate": 1.9872717055238027e-05, "loss": 0.9226, "step": 414 }, { "epoch": 0.079618216264179, "grad_norm": 0.6484018374786427, "learning_rate": 1.9871726726587792e-05, "loss": 0.2789, "step": 415 }, { "epoch": 0.07981006738770714, "grad_norm": 1.1721972817556587, "learning_rate": 1.9870732585080392e-05, "loss": 0.9284, "step": 416 }, { "epoch": 0.08000191851123528, "grad_norm": 1.0922730400575984, "learning_rate": 1.9869734631099807e-05, "loss": 0.8999, "step": 417 }, { "epoch": 0.08019376963476342, "grad_norm": 1.0663795440594603, "learning_rate": 1.9868732865031482e-05, "loss": 0.87, "step": 418 }, { "epoch": 0.08038562075829156, "grad_norm": 1.1555231164049498, "learning_rate": 1.9867727287262347e-05, "loss": 0.8832, "step": 419 }, { "epoch": 0.08057747188181971, "grad_norm": 1.1061492047506423, "learning_rate": 1.9866717898180795e-05, "loss": 0.8783, "step": 420 }, { "epoch": 0.08076932300534785, "grad_norm": 1.1151457414789903, "learning_rate": 1.9865704698176684e-05, "loss": 0.8776, "step": 421 }, { "epoch": 0.08096117412887599, "grad_norm": 1.1378723433380382, "learning_rate": 1.9864687687641365e-05, "loss": 0.8636, "step": 422 }, { "epoch": 0.08115302525240413, "grad_norm": 1.119957066058914, "learning_rate": 1.986366686696764e-05, "loss": 0.928, "step": 423 }, { "epoch": 0.08134487637593228, "grad_norm": 1.0109177803526315, "learning_rate": 1.9862642236549794e-05, "loss": 0.8887, "step": 424 }, { "epoch": 0.08153672749946042, "grad_norm": 1.144770763890048, "learning_rate": 1.986161379678358e-05, "loss": 0.8566, "step": 425 }, { "epoch": 0.08172857862298856, "grad_norm": 0.9785248028335533, "learning_rate": 1.9860581548066216e-05, "loss": 0.8751, "step": 426 }, { "epoch": 0.0819204297465167, "grad_norm": 1.2024287428372011, "learning_rate": 1.9859545490796414e-05, "loss": 0.902, "step": 427 }, { "epoch": 0.08211228087004485, "grad_norm": 1.0423965294265738, "learning_rate": 1.9858505625374325e-05, "loss": 0.96, "step": 428 }, { "epoch": 0.08230413199357299, "grad_norm": 1.055673683039577, "learning_rate": 1.9857461952201596e-05, "loss": 0.8464, "step": 429 }, { "epoch": 0.08249598311710113, "grad_norm": 1.1769868052295644, "learning_rate": 1.985641447168133e-05, "loss": 0.8414, "step": 430 }, { "epoch": 0.08268783424062927, "grad_norm": 0.9989282775562404, "learning_rate": 1.9855363184218108e-05, "loss": 0.8953, "step": 431 }, { "epoch": 0.08287968536415741, "grad_norm": 0.9705428496601529, "learning_rate": 1.9854308090217983e-05, "loss": 0.8459, "step": 432 }, { "epoch": 0.08307153648768556, "grad_norm": 1.0603538887339279, "learning_rate": 1.985324919008847e-05, "loss": 0.9068, "step": 433 }, { "epoch": 0.0832633876112137, "grad_norm": 1.1935747023866285, "learning_rate": 1.985218648423856e-05, "loss": 0.9164, "step": 434 }, { "epoch": 0.08345523873474184, "grad_norm": 0.9926170402591994, "learning_rate": 1.985111997307871e-05, "loss": 0.9161, "step": 435 }, { "epoch": 0.08364708985826998, "grad_norm": 1.037417859684577, "learning_rate": 1.9850049657020857e-05, "loss": 0.8284, "step": 436 }, { "epoch": 0.08383894098179813, "grad_norm": 1.0395237618018496, "learning_rate": 1.9848975536478394e-05, "loss": 0.8545, "step": 437 }, { "epoch": 0.08403079210532627, "grad_norm": 1.0090583448186787, "learning_rate": 1.9847897611866188e-05, "loss": 0.8537, "step": 438 }, { "epoch": 0.08422264322885441, "grad_norm": 1.0970052105834847, "learning_rate": 1.984681588360058e-05, "loss": 0.9237, "step": 439 }, { "epoch": 0.08441449435238255, "grad_norm": 1.1256433017814491, "learning_rate": 1.9845730352099383e-05, "loss": 0.9103, "step": 440 }, { "epoch": 0.0846063454759107, "grad_norm": 0.9964946613016772, "learning_rate": 1.984464101778186e-05, "loss": 0.8802, "step": 441 }, { "epoch": 0.08479819659943884, "grad_norm": 0.7653947484574162, "learning_rate": 1.9843547881068763e-05, "loss": 0.2779, "step": 442 }, { "epoch": 0.08499004772296698, "grad_norm": 1.1075378866514165, "learning_rate": 1.9842450942382306e-05, "loss": 0.8132, "step": 443 }, { "epoch": 0.08518189884649512, "grad_norm": 1.0590208067734312, "learning_rate": 1.9841350202146167e-05, "loss": 0.8941, "step": 444 }, { "epoch": 0.08537374997002327, "grad_norm": 1.2130944701894737, "learning_rate": 1.9840245660785496e-05, "loss": 0.9092, "step": 445 }, { "epoch": 0.08556560109355141, "grad_norm": 1.2056062656942577, "learning_rate": 1.983913731872692e-05, "loss": 0.9378, "step": 446 }, { "epoch": 0.08575745221707955, "grad_norm": 1.238426201480339, "learning_rate": 1.9838025176398515e-05, "loss": 0.9072, "step": 447 }, { "epoch": 0.08594930334060769, "grad_norm": 1.127068698248301, "learning_rate": 1.983690923422984e-05, "loss": 0.8994, "step": 448 }, { "epoch": 0.08614115446413582, "grad_norm": 1.1144881269528577, "learning_rate": 1.983578949265191e-05, "loss": 0.8837, "step": 449 }, { "epoch": 0.08633300558766398, "grad_norm": 1.0057720051241985, "learning_rate": 1.9834665952097223e-05, "loss": 0.8918, "step": 450 }, { "epoch": 0.08652485671119212, "grad_norm": 1.1343533029474784, "learning_rate": 1.9833538612999732e-05, "loss": 0.8978, "step": 451 }, { "epoch": 0.08671670783472026, "grad_norm": 1.329644677522826, "learning_rate": 1.983240747579486e-05, "loss": 0.8054, "step": 452 }, { "epoch": 0.0869085589582484, "grad_norm": 1.3415941661626452, "learning_rate": 1.9831272540919497e-05, "loss": 0.8145, "step": 453 }, { "epoch": 0.08710041008177655, "grad_norm": 1.0388041030792485, "learning_rate": 1.9830133808812e-05, "loss": 0.8605, "step": 454 }, { "epoch": 0.08729226120530469, "grad_norm": 1.1600723380709148, "learning_rate": 1.98289912799122e-05, "loss": 0.8615, "step": 455 }, { "epoch": 0.08748411232883282, "grad_norm": 0.7504199975332975, "learning_rate": 1.9827844954661376e-05, "loss": 0.32, "step": 456 }, { "epoch": 0.08767596345236096, "grad_norm": 1.1111296943797555, "learning_rate": 1.9826694833502295e-05, "loss": 0.8956, "step": 457 }, { "epoch": 0.08786781457588912, "grad_norm": 1.050717874904841, "learning_rate": 1.9825540916879174e-05, "loss": 0.8544, "step": 458 }, { "epoch": 0.08805966569941726, "grad_norm": 0.6779340270219385, "learning_rate": 1.9824383205237706e-05, "loss": 0.2951, "step": 459 }, { "epoch": 0.0882515168229454, "grad_norm": 1.110799285822405, "learning_rate": 1.982322169902504e-05, "loss": 0.8467, "step": 460 }, { "epoch": 0.08844336794647353, "grad_norm": 1.1908783611590368, "learning_rate": 1.9822056398689803e-05, "loss": 0.8564, "step": 461 }, { "epoch": 0.08863521907000167, "grad_norm": 1.140040883823396, "learning_rate": 1.9820887304682074e-05, "loss": 0.8854, "step": 462 }, { "epoch": 0.08882707019352983, "grad_norm": 1.0653163205344138, "learning_rate": 1.981971441745341e-05, "loss": 0.8682, "step": 463 }, { "epoch": 0.08901892131705796, "grad_norm": 1.1195790486024801, "learning_rate": 1.9818537737456826e-05, "loss": 0.9348, "step": 464 }, { "epoch": 0.0892107724405861, "grad_norm": 0.9806302261400454, "learning_rate": 1.9817357265146798e-05, "loss": 0.862, "step": 465 }, { "epoch": 0.08940262356411424, "grad_norm": 1.0297181372045936, "learning_rate": 1.9816173000979276e-05, "loss": 0.9107, "step": 466 }, { "epoch": 0.0895944746876424, "grad_norm": 1.02019062851358, "learning_rate": 1.981498494541167e-05, "loss": 0.9203, "step": 467 }, { "epoch": 0.08978632581117053, "grad_norm": 1.2184221239928072, "learning_rate": 1.9813793098902858e-05, "loss": 0.8908, "step": 468 }, { "epoch": 0.08997817693469867, "grad_norm": 1.2486122391088592, "learning_rate": 1.981259746191317e-05, "loss": 0.9075, "step": 469 }, { "epoch": 0.09017002805822681, "grad_norm": 1.2971327820253584, "learning_rate": 1.981139803490442e-05, "loss": 0.9057, "step": 470 }, { "epoch": 0.09036187918175496, "grad_norm": 1.0887255801370699, "learning_rate": 1.9810194818339868e-05, "loss": 0.9427, "step": 471 }, { "epoch": 0.0905537303052831, "grad_norm": 0.9491776205816826, "learning_rate": 1.9808987812684247e-05, "loss": 0.9392, "step": 472 }, { "epoch": 0.09074558142881124, "grad_norm": 0.946609915906753, "learning_rate": 1.9807777018403746e-05, "loss": 0.8488, "step": 473 }, { "epoch": 0.09093743255233938, "grad_norm": 1.1286385545044189, "learning_rate": 1.980656243596603e-05, "loss": 0.8905, "step": 474 }, { "epoch": 0.09112928367586752, "grad_norm": 1.0800306644924955, "learning_rate": 1.9805344065840212e-05, "loss": 0.8964, "step": 475 }, { "epoch": 0.09132113479939567, "grad_norm": 1.2281338828895865, "learning_rate": 1.9804121908496887e-05, "loss": 0.873, "step": 476 }, { "epoch": 0.09151298592292381, "grad_norm": 1.1956587656373823, "learning_rate": 1.9802895964408088e-05, "loss": 0.8906, "step": 477 }, { "epoch": 0.09170483704645195, "grad_norm": 1.1773118830828395, "learning_rate": 1.980166623404733e-05, "loss": 0.925, "step": 478 }, { "epoch": 0.09189668816998009, "grad_norm": 0.9682330385257332, "learning_rate": 1.9800432717889583e-05, "loss": 0.8876, "step": 479 }, { "epoch": 0.09208853929350824, "grad_norm": 1.1675595170400084, "learning_rate": 1.979919541641128e-05, "loss": 0.8758, "step": 480 }, { "epoch": 0.09228039041703638, "grad_norm": 1.0360049058323677, "learning_rate": 1.9797954330090322e-05, "loss": 0.8763, "step": 481 }, { "epoch": 0.09247224154056452, "grad_norm": 1.092674078342506, "learning_rate": 1.979670945940606e-05, "loss": 0.8654, "step": 482 }, { "epoch": 0.09266409266409266, "grad_norm": 1.056483626961011, "learning_rate": 1.9795460804839313e-05, "loss": 0.8305, "step": 483 }, { "epoch": 0.09285594378762081, "grad_norm": 1.0539913190748105, "learning_rate": 1.9794208366872368e-05, "loss": 0.927, "step": 484 }, { "epoch": 0.09304779491114895, "grad_norm": 1.023490670944689, "learning_rate": 1.9792952145988958e-05, "loss": 0.8393, "step": 485 }, { "epoch": 0.09323964603467709, "grad_norm": 1.1812467439069396, "learning_rate": 1.979169214267429e-05, "loss": 0.8964, "step": 486 }, { "epoch": 0.09343149715820523, "grad_norm": 1.1768023611494784, "learning_rate": 1.979042835741503e-05, "loss": 0.8703, "step": 487 }, { "epoch": 0.09362334828173338, "grad_norm": 1.0154927821254778, "learning_rate": 1.97891607906993e-05, "loss": 0.8955, "step": 488 }, { "epoch": 0.09381519940526152, "grad_norm": 0.9547422503562932, "learning_rate": 1.978788944301669e-05, "loss": 0.8486, "step": 489 }, { "epoch": 0.09400705052878966, "grad_norm": 1.0980969755169578, "learning_rate": 1.978661431485824e-05, "loss": 0.8193, "step": 490 }, { "epoch": 0.0941989016523178, "grad_norm": 1.4590617323385104, "learning_rate": 1.9785335406716454e-05, "loss": 0.3205, "step": 491 }, { "epoch": 0.09439075277584594, "grad_norm": 1.0065750943574205, "learning_rate": 1.97840527190853e-05, "loss": 0.8611, "step": 492 }, { "epoch": 0.09458260389937409, "grad_norm": 0.9567838386108075, "learning_rate": 1.978276625246021e-05, "loss": 0.9236, "step": 493 }, { "epoch": 0.09477445502290223, "grad_norm": 0.9662070979776258, "learning_rate": 1.9781476007338058e-05, "loss": 0.8566, "step": 494 }, { "epoch": 0.09496630614643037, "grad_norm": 0.9833867491337047, "learning_rate": 1.9780181984217196e-05, "loss": 0.9082, "step": 495 }, { "epoch": 0.09515815726995851, "grad_norm": 1.0384338754920337, "learning_rate": 1.9778884183597425e-05, "loss": 0.9204, "step": 496 }, { "epoch": 0.09535000839348666, "grad_norm": 1.1065166405595714, "learning_rate": 1.9777582605980007e-05, "loss": 0.8983, "step": 497 }, { "epoch": 0.0955418595170148, "grad_norm": 1.0279265898956533, "learning_rate": 1.9776277251867665e-05, "loss": 0.8007, "step": 498 }, { "epoch": 0.09573371064054294, "grad_norm": 1.0126126972551437, "learning_rate": 1.9774968121764583e-05, "loss": 0.8901, "step": 499 }, { "epoch": 0.09592556176407108, "grad_norm": 1.0993846205182913, "learning_rate": 1.9773655216176388e-05, "loss": 0.9321, "step": 500 }, { "epoch": 0.09611741288759923, "grad_norm": 1.0480912742927488, "learning_rate": 1.977233853561019e-05, "loss": 0.9049, "step": 501 }, { "epoch": 0.09630926401112737, "grad_norm": 1.2005673264397039, "learning_rate": 1.9771018080574534e-05, "loss": 0.8183, "step": 502 }, { "epoch": 0.09650111513465551, "grad_norm": 1.052780491050887, "learning_rate": 1.976969385157944e-05, "loss": 0.8567, "step": 503 }, { "epoch": 0.09669296625818365, "grad_norm": 1.365888534301929, "learning_rate": 1.9768365849136377e-05, "loss": 0.8526, "step": 504 }, { "epoch": 0.09688481738171179, "grad_norm": 1.1524341065722448, "learning_rate": 1.9767034073758266e-05, "loss": 0.8413, "step": 505 }, { "epoch": 0.09707666850523994, "grad_norm": 1.0900903318242239, "learning_rate": 1.97656985259595e-05, "loss": 0.8141, "step": 506 }, { "epoch": 0.09726851962876808, "grad_norm": 1.1799599319193588, "learning_rate": 1.976435920625592e-05, "loss": 0.8546, "step": 507 }, { "epoch": 0.09746037075229622, "grad_norm": 0.9667275672500774, "learning_rate": 1.9763016115164823e-05, "loss": 0.8917, "step": 508 }, { "epoch": 0.09765222187582435, "grad_norm": 1.1620489220311614, "learning_rate": 1.976166925320496e-05, "loss": 0.8656, "step": 509 }, { "epoch": 0.09784407299935251, "grad_norm": 1.1661922913424756, "learning_rate": 1.9760318620896557e-05, "loss": 0.8936, "step": 510 }, { "epoch": 0.09803592412288065, "grad_norm": 1.338792194701358, "learning_rate": 1.9758964218761268e-05, "loss": 0.8879, "step": 511 }, { "epoch": 0.09822777524640879, "grad_norm": 1.0055624499315754, "learning_rate": 1.9757606047322225e-05, "loss": 0.8859, "step": 512 }, { "epoch": 0.09841962636993692, "grad_norm": 0.9586462576773881, "learning_rate": 1.9756244107104005e-05, "loss": 0.8776, "step": 513 }, { "epoch": 0.09861147749346508, "grad_norm": 1.1060545582654229, "learning_rate": 1.9754878398632647e-05, "loss": 0.8972, "step": 514 }, { "epoch": 0.09880332861699322, "grad_norm": 1.1869577343088114, "learning_rate": 1.975350892243564e-05, "loss": 0.8263, "step": 515 }, { "epoch": 0.09899517974052136, "grad_norm": 1.1250260757024486, "learning_rate": 1.9752135679041932e-05, "loss": 0.8321, "step": 516 }, { "epoch": 0.0991870308640495, "grad_norm": 1.043495452368223, "learning_rate": 1.9750758668981925e-05, "loss": 0.8289, "step": 517 }, { "epoch": 0.09937888198757763, "grad_norm": 1.1434552127822208, "learning_rate": 1.9749377892787476e-05, "loss": 0.8687, "step": 518 }, { "epoch": 0.09957073311110579, "grad_norm": 0.9469073094691975, "learning_rate": 1.9747993350991895e-05, "loss": 0.8134, "step": 519 }, { "epoch": 0.09976258423463392, "grad_norm": 1.2705396390827455, "learning_rate": 1.9746605044129944e-05, "loss": 0.905, "step": 520 }, { "epoch": 0.09995443535816206, "grad_norm": 1.068877089301482, "learning_rate": 1.974521297273785e-05, "loss": 0.8879, "step": 521 }, { "epoch": 0.1001462864816902, "grad_norm": 1.2597856978917585, "learning_rate": 1.9743817137353283e-05, "loss": 0.8271, "step": 522 }, { "epoch": 0.10033813760521836, "grad_norm": 1.2727506327453473, "learning_rate": 1.974241753851537e-05, "loss": 0.8384, "step": 523 }, { "epoch": 0.1005299887287465, "grad_norm": 0.9629065094603706, "learning_rate": 1.9741014176764698e-05, "loss": 0.9535, "step": 524 }, { "epoch": 0.10072183985227463, "grad_norm": 1.1343229430337507, "learning_rate": 1.9739607052643293e-05, "loss": 0.9135, "step": 525 }, { "epoch": 0.10091369097580277, "grad_norm": 1.5440475538062604, "learning_rate": 1.9738196166694647e-05, "loss": 0.3571, "step": 526 }, { "epoch": 0.10110554209933093, "grad_norm": 1.1103511359495348, "learning_rate": 1.97367815194637e-05, "loss": 0.8653, "step": 527 }, { "epoch": 0.10129739322285906, "grad_norm": 1.0499107985876333, "learning_rate": 1.9735363111496847e-05, "loss": 0.8423, "step": 528 }, { "epoch": 0.1014892443463872, "grad_norm": 1.0599010807612248, "learning_rate": 1.9733940943341935e-05, "loss": 0.8909, "step": 529 }, { "epoch": 0.10168109546991534, "grad_norm": 1.2395620216962013, "learning_rate": 1.9732515015548258e-05, "loss": 0.9061, "step": 530 }, { "epoch": 0.1018729465934435, "grad_norm": 1.4174751611250558, "learning_rate": 1.9731085328666572e-05, "loss": 0.9541, "step": 531 }, { "epoch": 0.10206479771697163, "grad_norm": 1.2061182816413578, "learning_rate": 1.9729651883249075e-05, "loss": 0.9263, "step": 532 }, { "epoch": 0.10225664884049977, "grad_norm": 1.25101832289639, "learning_rate": 1.9728214679849423e-05, "loss": 0.8264, "step": 533 }, { "epoch": 0.10244849996402791, "grad_norm": 1.1230033108670838, "learning_rate": 1.9726773719022723e-05, "loss": 0.8794, "step": 534 }, { "epoch": 0.10264035108755605, "grad_norm": 1.0234956344143578, "learning_rate": 1.9725329001325527e-05, "loss": 0.8434, "step": 535 }, { "epoch": 0.1028322022110842, "grad_norm": 1.161888721759996, "learning_rate": 1.972388052731585e-05, "loss": 0.8983, "step": 536 }, { "epoch": 0.10302405333461234, "grad_norm": 0.9510246790860014, "learning_rate": 1.972242829755315e-05, "loss": 0.8979, "step": 537 }, { "epoch": 0.10321590445814048, "grad_norm": 1.0158869225223472, "learning_rate": 1.972097231259833e-05, "loss": 0.8475, "step": 538 }, { "epoch": 0.10340775558166862, "grad_norm": 1.0840514581400569, "learning_rate": 1.9719512573013753e-05, "loss": 0.8776, "step": 539 }, { "epoch": 0.10359960670519677, "grad_norm": 0.9240983149921004, "learning_rate": 1.9718049079363227e-05, "loss": 0.8723, "step": 540 }, { "epoch": 0.10379145782872491, "grad_norm": 1.065198651250999, "learning_rate": 1.971658183221202e-05, "loss": 0.8487, "step": 541 }, { "epoch": 0.10398330895225305, "grad_norm": 0.6751121691520532, "learning_rate": 1.9715110832126835e-05, "loss": 0.3269, "step": 542 }, { "epoch": 0.10417516007578119, "grad_norm": 1.1116813828605958, "learning_rate": 1.9713636079675835e-05, "loss": 0.8979, "step": 543 }, { "epoch": 0.10436701119930934, "grad_norm": 1.0305111014959867, "learning_rate": 1.9712157575428617e-05, "loss": 0.8725, "step": 544 }, { "epoch": 0.10455886232283748, "grad_norm": 0.9696216835346776, "learning_rate": 1.9710675319956256e-05, "loss": 0.8724, "step": 545 }, { "epoch": 0.10475071344636562, "grad_norm": 0.9555218741337556, "learning_rate": 1.970918931383125e-05, "loss": 0.8764, "step": 546 }, { "epoch": 0.10494256456989376, "grad_norm": 1.2902341311460839, "learning_rate": 1.9707699557627554e-05, "loss": 0.9539, "step": 547 }, { "epoch": 0.1051344156934219, "grad_norm": 1.1310336532591148, "learning_rate": 1.9706206051920573e-05, "loss": 0.939, "step": 548 }, { "epoch": 0.10532626681695005, "grad_norm": 1.2084689448637511, "learning_rate": 1.9704708797287156e-05, "loss": 0.8701, "step": 549 }, { "epoch": 0.10551811794047819, "grad_norm": 1.1023898473009766, "learning_rate": 1.9703207794305608e-05, "loss": 0.8564, "step": 550 }, { "epoch": 0.10570996906400633, "grad_norm": 1.0535388221426387, "learning_rate": 1.970170304355567e-05, "loss": 0.9586, "step": 551 }, { "epoch": 0.10590182018753447, "grad_norm": 0.6875934557559709, "learning_rate": 1.970019454561854e-05, "loss": 0.325, "step": 552 }, { "epoch": 0.10609367131106262, "grad_norm": 1.1079786149340904, "learning_rate": 1.9698682301076867e-05, "loss": 0.8074, "step": 553 }, { "epoch": 0.10628552243459076, "grad_norm": 1.1725021046972548, "learning_rate": 1.9697166310514735e-05, "loss": 0.872, "step": 554 }, { "epoch": 0.1064773735581189, "grad_norm": 1.0221121784642655, "learning_rate": 1.9695646574517675e-05, "loss": 0.8741, "step": 555 }, { "epoch": 0.10666922468164704, "grad_norm": 1.199234638826653, "learning_rate": 1.969412309367268e-05, "loss": 0.866, "step": 556 }, { "epoch": 0.10686107580517519, "grad_norm": 0.6076672322824542, "learning_rate": 1.9692595868568172e-05, "loss": 0.293, "step": 557 }, { "epoch": 0.10705292692870333, "grad_norm": 1.1692596490023854, "learning_rate": 1.969106489979403e-05, "loss": 0.9194, "step": 558 }, { "epoch": 0.10724477805223147, "grad_norm": 0.9643998517263197, "learning_rate": 1.9689530187941575e-05, "loss": 0.8377, "step": 559 }, { "epoch": 0.10743662917575961, "grad_norm": 1.00739679557371, "learning_rate": 1.9687991733603574e-05, "loss": 0.8873, "step": 560 }, { "epoch": 0.10762848029928775, "grad_norm": 0.9889637069486695, "learning_rate": 1.9686449537374242e-05, "loss": 0.8623, "step": 561 }, { "epoch": 0.1078203314228159, "grad_norm": 1.0836847798467881, "learning_rate": 1.968490359984923e-05, "loss": 0.8591, "step": 562 }, { "epoch": 0.10801218254634404, "grad_norm": 0.6714919594203496, "learning_rate": 1.9683353921625654e-05, "loss": 0.305, "step": 563 }, { "epoch": 0.10820403366987218, "grad_norm": 0.9852973114360336, "learning_rate": 1.968180050330205e-05, "loss": 0.855, "step": 564 }, { "epoch": 0.10839588479340032, "grad_norm": 1.0880377263460115, "learning_rate": 1.9680243345478416e-05, "loss": 0.8621, "step": 565 }, { "epoch": 0.10858773591692847, "grad_norm": 1.1961545316688087, "learning_rate": 1.9678682448756188e-05, "loss": 0.8405, "step": 566 }, { "epoch": 0.10877958704045661, "grad_norm": 0.6385868741678601, "learning_rate": 1.9677117813738245e-05, "loss": 0.3138, "step": 567 }, { "epoch": 0.10897143816398475, "grad_norm": 1.09231124343318, "learning_rate": 1.9675549441028913e-05, "loss": 0.8612, "step": 568 }, { "epoch": 0.10916328928751289, "grad_norm": 1.1389349896051424, "learning_rate": 1.9673977331233964e-05, "loss": 0.8199, "step": 569 }, { "epoch": 0.10935514041104104, "grad_norm": 0.9793753285100535, "learning_rate": 1.9672401484960607e-05, "loss": 0.8919, "step": 570 }, { "epoch": 0.10954699153456918, "grad_norm": 0.996497071131716, "learning_rate": 1.9670821902817496e-05, "loss": 0.8909, "step": 571 }, { "epoch": 0.10973884265809732, "grad_norm": 0.8989101219319947, "learning_rate": 1.9669238585414728e-05, "loss": 0.8266, "step": 572 }, { "epoch": 0.10993069378162545, "grad_norm": 1.2444368439620583, "learning_rate": 1.966765153336385e-05, "loss": 0.9105, "step": 573 }, { "epoch": 0.11012254490515361, "grad_norm": 0.9218946674912233, "learning_rate": 1.966606074727784e-05, "loss": 0.8829, "step": 574 }, { "epoch": 0.11031439602868175, "grad_norm": 1.056592315236213, "learning_rate": 1.9664466227771122e-05, "loss": 0.9388, "step": 575 }, { "epoch": 0.11050624715220989, "grad_norm": 1.067588133324203, "learning_rate": 1.966286797545957e-05, "loss": 0.8648, "step": 576 }, { "epoch": 0.11069809827573802, "grad_norm": 1.0050559815915294, "learning_rate": 1.9661265990960486e-05, "loss": 0.9184, "step": 577 }, { "epoch": 0.11088994939926616, "grad_norm": 1.1659126468097778, "learning_rate": 1.9659660274892625e-05, "loss": 0.8712, "step": 578 }, { "epoch": 0.11108180052279432, "grad_norm": 0.9798260194752159, "learning_rate": 1.965805082787618e-05, "loss": 0.9158, "step": 579 }, { "epoch": 0.11127365164632246, "grad_norm": 0.9741402387215534, "learning_rate": 1.9656437650532777e-05, "loss": 0.8555, "step": 580 }, { "epoch": 0.1114655027698506, "grad_norm": 1.1353466995985546, "learning_rate": 1.96548207434855e-05, "loss": 0.8502, "step": 581 }, { "epoch": 0.11165735389337873, "grad_norm": 1.035656384831665, "learning_rate": 1.9653200107358855e-05, "loss": 0.8963, "step": 582 }, { "epoch": 0.11184920501690689, "grad_norm": 1.1927483586213956, "learning_rate": 1.96515757427788e-05, "loss": 0.8558, "step": 583 }, { "epoch": 0.11204105614043502, "grad_norm": 1.005847795871807, "learning_rate": 1.964994765037273e-05, "loss": 0.9119, "step": 584 }, { "epoch": 0.11223290726396316, "grad_norm": 1.0665825385334455, "learning_rate": 1.9648315830769478e-05, "loss": 0.9119, "step": 585 }, { "epoch": 0.1124247583874913, "grad_norm": 0.9922803927184453, "learning_rate": 1.9646680284599324e-05, "loss": 0.8521, "step": 586 }, { "epoch": 0.11261660951101946, "grad_norm": 0.9923063901334062, "learning_rate": 1.9645041012493974e-05, "loss": 0.8547, "step": 587 }, { "epoch": 0.1128084606345476, "grad_norm": 1.0297420663651538, "learning_rate": 1.9643398015086585e-05, "loss": 0.8816, "step": 588 }, { "epoch": 0.11300031175807573, "grad_norm": 1.1656536623864684, "learning_rate": 1.9641751293011747e-05, "loss": 0.8819, "step": 589 }, { "epoch": 0.11319216288160387, "grad_norm": 0.9610588332287351, "learning_rate": 1.964010084690549e-05, "loss": 0.843, "step": 590 }, { "epoch": 0.11338401400513201, "grad_norm": 1.0693944332448833, "learning_rate": 1.9638446677405286e-05, "loss": 0.7899, "step": 591 }, { "epoch": 0.11357586512866016, "grad_norm": 1.0487383208755583, "learning_rate": 1.9636788785150037e-05, "loss": 0.8875, "step": 592 }, { "epoch": 0.1137677162521883, "grad_norm": 0.9565326531034252, "learning_rate": 1.963512717078009e-05, "loss": 0.8375, "step": 593 }, { "epoch": 0.11395956737571644, "grad_norm": 0.940397377062958, "learning_rate": 1.9633461834937226e-05, "loss": 0.8637, "step": 594 }, { "epoch": 0.11415141849924458, "grad_norm": 1.0350365829616737, "learning_rate": 1.9631792778264665e-05, "loss": 0.8732, "step": 595 }, { "epoch": 0.11434326962277273, "grad_norm": 0.9712608546453038, "learning_rate": 1.9630120001407066e-05, "loss": 0.8653, "step": 596 }, { "epoch": 0.11453512074630087, "grad_norm": 0.9455315978492276, "learning_rate": 1.962844350501052e-05, "loss": 0.8181, "step": 597 }, { "epoch": 0.11472697186982901, "grad_norm": 1.167360435824856, "learning_rate": 1.9626763289722553e-05, "loss": 0.816, "step": 598 }, { "epoch": 0.11491882299335715, "grad_norm": 1.010573191243999, "learning_rate": 1.9625079356192142e-05, "loss": 0.898, "step": 599 }, { "epoch": 0.1151106741168853, "grad_norm": 0.9652770351316647, "learning_rate": 1.962339170506968e-05, "loss": 0.785, "step": 600 }, { "epoch": 0.11530252524041344, "grad_norm": 0.712216473548431, "learning_rate": 1.9621700337007012e-05, "loss": 0.301, "step": 601 }, { "epoch": 0.11549437636394158, "grad_norm": 0.6719586847166046, "learning_rate": 1.962000525265741e-05, "loss": 0.3468, "step": 602 }, { "epoch": 0.11568622748746972, "grad_norm": 1.03305836567427, "learning_rate": 1.961830645267558e-05, "loss": 0.9382, "step": 603 }, { "epoch": 0.11587807861099786, "grad_norm": 1.2144387043258869, "learning_rate": 1.9616603937717676e-05, "loss": 0.8837, "step": 604 }, { "epoch": 0.11606992973452601, "grad_norm": 1.0686795061054455, "learning_rate": 1.9614897708441267e-05, "loss": 0.9272, "step": 605 }, { "epoch": 0.11626178085805415, "grad_norm": 1.0105847933512158, "learning_rate": 1.9613187765505374e-05, "loss": 0.895, "step": 606 }, { "epoch": 0.11645363198158229, "grad_norm": 0.9852210166963897, "learning_rate": 1.9611474109570446e-05, "loss": 0.8733, "step": 607 }, { "epoch": 0.11664548310511043, "grad_norm": 0.9457767643302347, "learning_rate": 1.960975674129836e-05, "loss": 0.8912, "step": 608 }, { "epoch": 0.11683733422863858, "grad_norm": 1.0707066749751513, "learning_rate": 1.9608035661352437e-05, "loss": 0.8612, "step": 609 }, { "epoch": 0.11702918535216672, "grad_norm": 1.0626231262949621, "learning_rate": 1.9606310870397426e-05, "loss": 0.861, "step": 610 }, { "epoch": 0.11722103647569486, "grad_norm": 1.0827756988675055, "learning_rate": 1.960458236909951e-05, "loss": 0.9428, "step": 611 }, { "epoch": 0.117412887599223, "grad_norm": 1.1233823223158592, "learning_rate": 1.9602850158126308e-05, "loss": 0.8838, "step": 612 }, { "epoch": 0.11760473872275115, "grad_norm": 1.0701205925279331, "learning_rate": 1.9601114238146867e-05, "loss": 0.8456, "step": 613 }, { "epoch": 0.11779658984627929, "grad_norm": 1.0240097596981919, "learning_rate": 1.9599374609831667e-05, "loss": 0.8799, "step": 614 }, { "epoch": 0.11798844096980743, "grad_norm": 1.1203614389805467, "learning_rate": 1.9597631273852628e-05, "loss": 0.8687, "step": 615 }, { "epoch": 0.11818029209333557, "grad_norm": 1.042502191421542, "learning_rate": 1.959588423088309e-05, "loss": 0.8519, "step": 616 }, { "epoch": 0.11837214321686372, "grad_norm": 1.0864731285920524, "learning_rate": 1.9594133481597837e-05, "loss": 0.9087, "step": 617 }, { "epoch": 0.11856399434039186, "grad_norm": 0.9787261940765011, "learning_rate": 1.959237902667308e-05, "loss": 0.8451, "step": 618 }, { "epoch": 0.11875584546392, "grad_norm": 0.9966614527915212, "learning_rate": 1.9590620866786453e-05, "loss": 0.8446, "step": 619 }, { "epoch": 0.11894769658744814, "grad_norm": 1.0220718732628482, "learning_rate": 1.9588859002617026e-05, "loss": 0.8364, "step": 620 }, { "epoch": 0.11913954771097628, "grad_norm": 1.291331208884991, "learning_rate": 1.958709343484531e-05, "loss": 0.8172, "step": 621 }, { "epoch": 0.11933139883450443, "grad_norm": 0.931929680264572, "learning_rate": 1.9585324164153236e-05, "loss": 0.9031, "step": 622 }, { "epoch": 0.11952324995803257, "grad_norm": 1.0433637380448524, "learning_rate": 1.9583551191224165e-05, "loss": 0.8226, "step": 623 }, { "epoch": 0.11971510108156071, "grad_norm": 1.0110815132697537, "learning_rate": 1.9581774516742893e-05, "loss": 0.9154, "step": 624 }, { "epoch": 0.11990695220508885, "grad_norm": 0.9768973651957826, "learning_rate": 1.9579994141395643e-05, "loss": 0.864, "step": 625 }, { "epoch": 0.120098803328617, "grad_norm": 0.9805407360384285, "learning_rate": 1.957821006587006e-05, "loss": 0.926, "step": 626 }, { "epoch": 0.12029065445214514, "grad_norm": 1.030182989274252, "learning_rate": 1.957642229085524e-05, "loss": 0.8916, "step": 627 }, { "epoch": 0.12048250557567328, "grad_norm": 0.9572985880621525, "learning_rate": 1.9574630817041678e-05, "loss": 0.945, "step": 628 }, { "epoch": 0.12067435669920142, "grad_norm": 0.9852060643005276, "learning_rate": 1.9572835645121322e-05, "loss": 0.8962, "step": 629 }, { "epoch": 0.12086620782272957, "grad_norm": 1.017478710687697, "learning_rate": 1.9571036775787537e-05, "loss": 0.8534, "step": 630 }, { "epoch": 0.12105805894625771, "grad_norm": 1.0280883129356693, "learning_rate": 1.956923420973512e-05, "loss": 0.9323, "step": 631 }, { "epoch": 0.12124991006978585, "grad_norm": 1.0457251132893837, "learning_rate": 1.956742794766029e-05, "loss": 0.7684, "step": 632 }, { "epoch": 0.12144176119331399, "grad_norm": 1.0031547477228617, "learning_rate": 1.9565617990260707e-05, "loss": 0.9737, "step": 633 }, { "epoch": 0.12163361231684212, "grad_norm": 1.1576251833355833, "learning_rate": 1.956380433823544e-05, "loss": 0.8895, "step": 634 }, { "epoch": 0.12182546344037028, "grad_norm": 0.9710795933796098, "learning_rate": 1.9561986992284998e-05, "loss": 0.8787, "step": 635 }, { "epoch": 0.12201731456389842, "grad_norm": 1.0522806098521487, "learning_rate": 1.9560165953111312e-05, "loss": 0.9206, "step": 636 }, { "epoch": 0.12220916568742655, "grad_norm": 0.9806234381299819, "learning_rate": 1.9558341221417744e-05, "loss": 0.9111, "step": 637 }, { "epoch": 0.1224010168109547, "grad_norm": 1.0468284726218642, "learning_rate": 1.9556512797909076e-05, "loss": 0.7706, "step": 638 }, { "epoch": 0.12259286793448285, "grad_norm": 1.0850660405554502, "learning_rate": 1.9554680683291517e-05, "loss": 0.9661, "step": 639 }, { "epoch": 0.12278471905801099, "grad_norm": 1.1563327911799524, "learning_rate": 1.9552844878272702e-05, "loss": 0.8327, "step": 640 }, { "epoch": 0.12297657018153912, "grad_norm": 1.0234332967379902, "learning_rate": 1.95510053835617e-05, "loss": 0.8962, "step": 641 }, { "epoch": 0.12316842130506726, "grad_norm": 1.1844189268243561, "learning_rate": 1.954916219986899e-05, "loss": 0.7941, "step": 642 }, { "epoch": 0.12336027242859542, "grad_norm": 1.0422029151419296, "learning_rate": 1.9547315327906487e-05, "loss": 0.8982, "step": 643 }, { "epoch": 0.12355212355212356, "grad_norm": 0.9268991525315138, "learning_rate": 1.9545464768387526e-05, "loss": 0.8744, "step": 644 }, { "epoch": 0.1237439746756517, "grad_norm": 1.1321885741417455, "learning_rate": 1.9543610522026867e-05, "loss": 0.9032, "step": 645 }, { "epoch": 0.12393582579917983, "grad_norm": 1.0799637006851066, "learning_rate": 1.95417525895407e-05, "loss": 0.8701, "step": 646 }, { "epoch": 0.12412767692270797, "grad_norm": 0.9700656093795259, "learning_rate": 1.9539890971646624e-05, "loss": 0.9006, "step": 647 }, { "epoch": 0.12431952804623612, "grad_norm": 0.9915772175870659, "learning_rate": 1.9538025669063678e-05, "loss": 0.9169, "step": 648 }, { "epoch": 0.12451137916976426, "grad_norm": 1.0162829756393168, "learning_rate": 1.9536156682512312e-05, "loss": 0.8778, "step": 649 }, { "epoch": 0.1247032302932924, "grad_norm": 0.9388512247285833, "learning_rate": 1.9534284012714405e-05, "loss": 0.8284, "step": 650 }, { "epoch": 0.12489508141682054, "grad_norm": 1.0275555119686057, "learning_rate": 1.9532407660393258e-05, "loss": 0.7949, "step": 651 }, { "epoch": 0.1250869325403487, "grad_norm": 0.9538920859237724, "learning_rate": 1.9530527626273592e-05, "loss": 0.8799, "step": 652 }, { "epoch": 0.12527878366387682, "grad_norm": 0.960340787258044, "learning_rate": 1.9528643911081556e-05, "loss": 0.9012, "step": 653 }, { "epoch": 0.12547063478740497, "grad_norm": 1.0231439109212561, "learning_rate": 1.952675651554471e-05, "loss": 0.823, "step": 654 }, { "epoch": 0.12566248591093312, "grad_norm": 0.9777149862019828, "learning_rate": 1.9524865440392048e-05, "loss": 0.3325, "step": 655 }, { "epoch": 0.12585433703446125, "grad_norm": 1.2356021007414295, "learning_rate": 1.9522970686353976e-05, "loss": 0.8447, "step": 656 }, { "epoch": 0.1260461881579894, "grad_norm": 1.0732040486737846, "learning_rate": 1.9521072254162324e-05, "loss": 0.8631, "step": 657 }, { "epoch": 0.12623803928151756, "grad_norm": 1.0109735374545332, "learning_rate": 1.9519170144550342e-05, "loss": 0.9224, "step": 658 }, { "epoch": 0.12642989040504568, "grad_norm": 0.6962914540756535, "learning_rate": 1.9517264358252702e-05, "loss": 0.297, "step": 659 }, { "epoch": 0.12662174152857383, "grad_norm": 1.0230786750941028, "learning_rate": 1.9515354896005496e-05, "loss": 0.8931, "step": 660 }, { "epoch": 0.12681359265210196, "grad_norm": 0.9898329998139305, "learning_rate": 1.9513441758546235e-05, "loss": 0.8593, "step": 661 }, { "epoch": 0.1270054437756301, "grad_norm": 0.9245132856665037, "learning_rate": 1.951152494661385e-05, "loss": 0.9102, "step": 662 }, { "epoch": 0.12719729489915826, "grad_norm": 1.0446676611487982, "learning_rate": 1.9509604460948687e-05, "loss": 0.8785, "step": 663 }, { "epoch": 0.1273891460226864, "grad_norm": 1.004619514003131, "learning_rate": 1.9507680302292518e-05, "loss": 0.8612, "step": 664 }, { "epoch": 0.12758099714621454, "grad_norm": 0.9898451659700107, "learning_rate": 1.950575247138853e-05, "loss": 0.8729, "step": 665 }, { "epoch": 0.12777284826974267, "grad_norm": 0.642035739031974, "learning_rate": 1.950382096898133e-05, "loss": 0.3122, "step": 666 }, { "epoch": 0.12796469939327082, "grad_norm": 0.9965139400359114, "learning_rate": 1.9501885795816937e-05, "loss": 0.8835, "step": 667 }, { "epoch": 0.12815655051679897, "grad_norm": 0.637005452212527, "learning_rate": 1.94999469526428e-05, "loss": 0.2823, "step": 668 }, { "epoch": 0.1283484016403271, "grad_norm": 1.0614713780304945, "learning_rate": 1.949800444020777e-05, "loss": 0.8937, "step": 669 }, { "epoch": 0.12854025276385525, "grad_norm": 1.0343326709186185, "learning_rate": 1.9496058259262124e-05, "loss": 0.8952, "step": 670 }, { "epoch": 0.1287321038873834, "grad_norm": 1.0607603425910366, "learning_rate": 1.9494108410557563e-05, "loss": 0.8405, "step": 671 }, { "epoch": 0.12892395501091153, "grad_norm": 1.1049146000753882, "learning_rate": 1.9492154894847192e-05, "loss": 0.7789, "step": 672 }, { "epoch": 0.12911580613443968, "grad_norm": 1.0726546263304122, "learning_rate": 1.9490197712885538e-05, "loss": 0.8548, "step": 673 }, { "epoch": 0.1293076572579678, "grad_norm": 0.7343544391533559, "learning_rate": 1.9488236865428537e-05, "loss": 0.305, "step": 674 }, { "epoch": 0.12949950838149596, "grad_norm": 1.2426045672179808, "learning_rate": 1.9486272353233556e-05, "loss": 0.8233, "step": 675 }, { "epoch": 0.1296913595050241, "grad_norm": 1.073466792463549, "learning_rate": 1.9484304177059366e-05, "loss": 0.8848, "step": 676 }, { "epoch": 0.12988321062855224, "grad_norm": 1.1306800967450963, "learning_rate": 1.9482332337666156e-05, "loss": 0.8797, "step": 677 }, { "epoch": 0.1300750617520804, "grad_norm": 0.9507166825540415, "learning_rate": 1.9480356835815524e-05, "loss": 0.8695, "step": 678 }, { "epoch": 0.13026691287560851, "grad_norm": 0.9652099911069143, "learning_rate": 1.9478377672270494e-05, "loss": 0.9016, "step": 679 }, { "epoch": 0.13045876399913667, "grad_norm": 1.0050113220276473, "learning_rate": 1.94763948477955e-05, "loss": 0.8891, "step": 680 }, { "epoch": 0.13065061512266482, "grad_norm": 0.9817541910931907, "learning_rate": 1.947440836315638e-05, "loss": 0.855, "step": 681 }, { "epoch": 0.13084246624619295, "grad_norm": 0.6625159707357955, "learning_rate": 1.9472418219120403e-05, "loss": 0.3227, "step": 682 }, { "epoch": 0.1310343173697211, "grad_norm": 0.933297872891513, "learning_rate": 1.9470424416456235e-05, "loss": 0.863, "step": 683 }, { "epoch": 0.13122616849324925, "grad_norm": 0.9721163004836642, "learning_rate": 1.946842695593397e-05, "loss": 0.8838, "step": 684 }, { "epoch": 0.13141801961677738, "grad_norm": 1.0064478401151813, "learning_rate": 1.9466425838325102e-05, "loss": 0.8311, "step": 685 }, { "epoch": 0.13160987074030553, "grad_norm": 1.0310253870759067, "learning_rate": 1.9464421064402544e-05, "loss": 0.8888, "step": 686 }, { "epoch": 0.13180172186383365, "grad_norm": 0.932923460488113, "learning_rate": 1.946241263494062e-05, "loss": 0.8462, "step": 687 }, { "epoch": 0.1319935729873618, "grad_norm": 1.1141730630473456, "learning_rate": 1.946040055071507e-05, "loss": 0.8322, "step": 688 }, { "epoch": 0.13218542411088996, "grad_norm": 1.4308921734412736, "learning_rate": 1.945838481250304e-05, "loss": 0.9081, "step": 689 }, { "epoch": 0.13237727523441808, "grad_norm": 1.0408372559893826, "learning_rate": 1.945636542108308e-05, "loss": 0.8417, "step": 690 }, { "epoch": 0.13256912635794624, "grad_norm": 1.0135157473884069, "learning_rate": 1.9454342377235175e-05, "loss": 0.9026, "step": 691 }, { "epoch": 0.13276097748147436, "grad_norm": 1.1888595690237242, "learning_rate": 1.9452315681740693e-05, "loss": 0.8936, "step": 692 }, { "epoch": 0.13295282860500252, "grad_norm": 0.9756128806140344, "learning_rate": 1.9450285335382436e-05, "loss": 0.8081, "step": 693 }, { "epoch": 0.13314467972853067, "grad_norm": 0.9441350470455795, "learning_rate": 1.94482513389446e-05, "loss": 0.8652, "step": 694 }, { "epoch": 0.1333365308520588, "grad_norm": 0.6551034142091455, "learning_rate": 1.9446213693212793e-05, "loss": 0.3146, "step": 695 }, { "epoch": 0.13352838197558695, "grad_norm": 1.0140529908741276, "learning_rate": 1.944417239897404e-05, "loss": 0.9186, "step": 696 }, { "epoch": 0.1337202330991151, "grad_norm": 1.1236024865040444, "learning_rate": 1.9442127457016768e-05, "loss": 0.914, "step": 697 }, { "epoch": 0.13391208422264322, "grad_norm": 0.9562465491167474, "learning_rate": 1.9440078868130817e-05, "loss": 0.8611, "step": 698 }, { "epoch": 0.13410393534617138, "grad_norm": 1.057270592563223, "learning_rate": 1.9438026633107435e-05, "loss": 0.8845, "step": 699 }, { "epoch": 0.1342957864696995, "grad_norm": 0.9479387159062987, "learning_rate": 1.943597075273928e-05, "loss": 0.807, "step": 700 }, { "epoch": 0.13448763759322765, "grad_norm": 0.9470340391835518, "learning_rate": 1.9433911227820408e-05, "loss": 0.8237, "step": 701 }, { "epoch": 0.1346794887167558, "grad_norm": 0.934964260897749, "learning_rate": 1.94318480591463e-05, "loss": 0.8497, "step": 702 }, { "epoch": 0.13487133984028393, "grad_norm": 1.1504145876744276, "learning_rate": 1.9429781247513825e-05, "loss": 0.8128, "step": 703 }, { "epoch": 0.13506319096381209, "grad_norm": 1.0734342090181663, "learning_rate": 1.942771079372127e-05, "loss": 0.9151, "step": 704 }, { "epoch": 0.1352550420873402, "grad_norm": 1.0201671779769996, "learning_rate": 1.9425636698568337e-05, "loss": 0.8438, "step": 705 }, { "epoch": 0.13544689321086836, "grad_norm": 1.0327069019657977, "learning_rate": 1.9423558962856114e-05, "loss": 0.8846, "step": 706 }, { "epoch": 0.13563874433439652, "grad_norm": 0.6564497615762749, "learning_rate": 1.942147758738711e-05, "loss": 0.3208, "step": 707 }, { "epoch": 0.13583059545792464, "grad_norm": 1.1137625781600904, "learning_rate": 1.9419392572965236e-05, "loss": 0.8027, "step": 708 }, { "epoch": 0.1360224465814528, "grad_norm": 1.2369965841293116, "learning_rate": 1.941730392039581e-05, "loss": 0.8743, "step": 709 }, { "epoch": 0.13621429770498095, "grad_norm": 1.0266890150213968, "learning_rate": 1.9415211630485546e-05, "loss": 0.8499, "step": 710 }, { "epoch": 0.13640614882850907, "grad_norm": 1.049110525597187, "learning_rate": 1.941311570404258e-05, "loss": 0.8963, "step": 711 }, { "epoch": 0.13659799995203722, "grad_norm": 1.2108904477447093, "learning_rate": 1.9411016141876438e-05, "loss": 0.829, "step": 712 }, { "epoch": 0.13678985107556535, "grad_norm": 0.6898946342421837, "learning_rate": 1.940891294479806e-05, "loss": 0.2984, "step": 713 }, { "epoch": 0.1369817021990935, "grad_norm": 0.9795107415015999, "learning_rate": 1.9406806113619775e-05, "loss": 0.8642, "step": 714 }, { "epoch": 0.13717355332262166, "grad_norm": 1.3213981933009102, "learning_rate": 1.9404695649155335e-05, "loss": 0.9108, "step": 715 }, { "epoch": 0.13736540444614978, "grad_norm": 1.0302932639318214, "learning_rate": 1.9402581552219878e-05, "loss": 0.8729, "step": 716 }, { "epoch": 0.13755725556967793, "grad_norm": 0.9986460960374773, "learning_rate": 1.9400463823629963e-05, "loss": 0.8519, "step": 717 }, { "epoch": 0.13774910669320609, "grad_norm": 0.8789903412081113, "learning_rate": 1.9398342464203537e-05, "loss": 0.7122, "step": 718 }, { "epoch": 0.1379409578167342, "grad_norm": 0.9755202831427563, "learning_rate": 1.9396217474759953e-05, "loss": 0.8032, "step": 719 }, { "epoch": 0.13813280894026236, "grad_norm": 1.1043039852294263, "learning_rate": 1.939408885611997e-05, "loss": 0.9725, "step": 720 }, { "epoch": 0.1383246600637905, "grad_norm": 1.0798216347096987, "learning_rate": 1.9391956609105742e-05, "loss": 0.8808, "step": 721 }, { "epoch": 0.13851651118731864, "grad_norm": 0.7885202509751763, "learning_rate": 1.938982073454083e-05, "loss": 0.2739, "step": 722 }, { "epoch": 0.1387083623108468, "grad_norm": 1.0394430952830445, "learning_rate": 1.93876812332502e-05, "loss": 0.8329, "step": 723 }, { "epoch": 0.13890021343437492, "grad_norm": 1.0319764613013112, "learning_rate": 1.93855381060602e-05, "loss": 0.8328, "step": 724 }, { "epoch": 0.13909206455790307, "grad_norm": 0.9303009761283202, "learning_rate": 1.9383391353798605e-05, "loss": 0.8752, "step": 725 }, { "epoch": 0.1392839156814312, "grad_norm": 1.0829456682141727, "learning_rate": 1.9381240977294573e-05, "loss": 0.7862, "step": 726 }, { "epoch": 0.13947576680495935, "grad_norm": 0.9122456901433368, "learning_rate": 1.9379086977378664e-05, "loss": 0.8465, "step": 727 }, { "epoch": 0.1396676179284875, "grad_norm": 1.03098895825266, "learning_rate": 1.937692935488284e-05, "loss": 0.7805, "step": 728 }, { "epoch": 0.13985946905201563, "grad_norm": 0.949635098288025, "learning_rate": 1.9374768110640462e-05, "loss": 0.8586, "step": 729 }, { "epoch": 0.14005132017554378, "grad_norm": 1.0753743097433135, "learning_rate": 1.937260324548629e-05, "loss": 0.816, "step": 730 }, { "epoch": 0.14024317129907193, "grad_norm": 1.2158891654765789, "learning_rate": 1.9370434760256475e-05, "loss": 0.7867, "step": 731 }, { "epoch": 0.14043502242260006, "grad_norm": 1.1959435227798652, "learning_rate": 1.9368262655788586e-05, "loss": 0.8686, "step": 732 }, { "epoch": 0.1406268735461282, "grad_norm": 0.7165600515477313, "learning_rate": 1.9366086932921565e-05, "loss": 0.2988, "step": 733 }, { "epoch": 0.14081872466965634, "grad_norm": 1.0156979440200675, "learning_rate": 1.936390759249577e-05, "loss": 0.8449, "step": 734 }, { "epoch": 0.1410105757931845, "grad_norm": 1.001084468148156, "learning_rate": 1.9361724635352946e-05, "loss": 0.8967, "step": 735 }, { "epoch": 0.14120242691671264, "grad_norm": 1.1232137623943679, "learning_rate": 1.935953806233624e-05, "loss": 0.9214, "step": 736 }, { "epoch": 0.14139427804024077, "grad_norm": 1.065693893866454, "learning_rate": 1.93573478742902e-05, "loss": 0.8689, "step": 737 }, { "epoch": 0.14158612916376892, "grad_norm": 1.165110085613514, "learning_rate": 1.9355154072060753e-05, "loss": 0.8358, "step": 738 }, { "epoch": 0.14177798028729705, "grad_norm": 1.1057596234375453, "learning_rate": 1.9352956656495246e-05, "loss": 0.8795, "step": 739 }, { "epoch": 0.1419698314108252, "grad_norm": 0.9652379353424194, "learning_rate": 1.93507556284424e-05, "loss": 0.8318, "step": 740 }, { "epoch": 0.14216168253435335, "grad_norm": 1.3213356199314208, "learning_rate": 1.934855098875234e-05, "loss": 0.8487, "step": 741 }, { "epoch": 0.14235353365788148, "grad_norm": 1.0255098888212397, "learning_rate": 1.9346342738276593e-05, "loss": 0.9252, "step": 742 }, { "epoch": 0.14254538478140963, "grad_norm": 0.6432993230491634, "learning_rate": 1.9344130877868072e-05, "loss": 0.2954, "step": 743 }, { "epoch": 0.14273723590493778, "grad_norm": 0.9382935558266141, "learning_rate": 1.9341915408381085e-05, "loss": 0.8668, "step": 744 }, { "epoch": 0.1429290870284659, "grad_norm": 0.6919944182487441, "learning_rate": 1.9339696330671335e-05, "loss": 0.2885, "step": 745 }, { "epoch": 0.14312093815199406, "grad_norm": 1.016116051410938, "learning_rate": 1.9337473645595917e-05, "loss": 0.8555, "step": 746 }, { "epoch": 0.14331278927552218, "grad_norm": 1.0260547669940792, "learning_rate": 1.9335247354013323e-05, "loss": 0.8505, "step": 747 }, { "epoch": 0.14350464039905034, "grad_norm": 0.9312306696214403, "learning_rate": 1.9333017456783438e-05, "loss": 0.9033, "step": 748 }, { "epoch": 0.1436964915225785, "grad_norm": 1.087577605246591, "learning_rate": 1.9330783954767537e-05, "loss": 0.7865, "step": 749 }, { "epoch": 0.14388834264610662, "grad_norm": 0.9266861973636445, "learning_rate": 1.9328546848828287e-05, "loss": 0.8745, "step": 750 }, { "epoch": 0.14408019376963477, "grad_norm": 1.0223707418354333, "learning_rate": 1.932630613982975e-05, "loss": 0.8445, "step": 751 }, { "epoch": 0.1442720448931629, "grad_norm": 0.9603709374903542, "learning_rate": 1.9324061828637374e-05, "loss": 0.92, "step": 752 }, { "epoch": 0.14446389601669105, "grad_norm": 0.9478279811150162, "learning_rate": 1.932181391611801e-05, "loss": 0.781, "step": 753 }, { "epoch": 0.1446557471402192, "grad_norm": 0.9680248135724452, "learning_rate": 1.931956240313988e-05, "loss": 0.8607, "step": 754 }, { "epoch": 0.14484759826374732, "grad_norm": 1.0902007195443009, "learning_rate": 1.9317307290572618e-05, "loss": 0.8089, "step": 755 }, { "epoch": 0.14503944938727548, "grad_norm": 0.9646921039410863, "learning_rate": 1.9315048579287234e-05, "loss": 0.8932, "step": 756 }, { "epoch": 0.14523130051080363, "grad_norm": 0.910105148247928, "learning_rate": 1.9312786270156135e-05, "loss": 0.8535, "step": 757 }, { "epoch": 0.14542315163433175, "grad_norm": 1.1465206734338083, "learning_rate": 1.931052036405312e-05, "loss": 0.8093, "step": 758 }, { "epoch": 0.1456150027578599, "grad_norm": 0.988044632050515, "learning_rate": 1.9308250861853367e-05, "loss": 0.8332, "step": 759 }, { "epoch": 0.14580685388138803, "grad_norm": 1.141468755756497, "learning_rate": 1.930597776443345e-05, "loss": 0.8256, "step": 760 }, { "epoch": 0.14599870500491618, "grad_norm": 1.1911776561996867, "learning_rate": 1.930370107267133e-05, "loss": 0.9253, "step": 761 }, { "epoch": 0.14619055612844434, "grad_norm": 1.0352535598022292, "learning_rate": 1.930142078744636e-05, "loss": 0.8568, "step": 762 }, { "epoch": 0.14638240725197246, "grad_norm": 1.277086912312307, "learning_rate": 1.9299136909639275e-05, "loss": 0.8026, "step": 763 }, { "epoch": 0.14657425837550062, "grad_norm": 0.753985600711871, "learning_rate": 1.9296849440132203e-05, "loss": 0.3091, "step": 764 }, { "epoch": 0.14676610949902874, "grad_norm": 1.519625890979699, "learning_rate": 1.929455837980865e-05, "loss": 0.8911, "step": 765 }, { "epoch": 0.1469579606225569, "grad_norm": 1.143169013957811, "learning_rate": 1.9292263729553523e-05, "loss": 0.948, "step": 766 }, { "epoch": 0.14714981174608505, "grad_norm": 0.8859294677390344, "learning_rate": 1.9289965490253103e-05, "loss": 0.8456, "step": 767 }, { "epoch": 0.14734166286961317, "grad_norm": 1.0021562045681225, "learning_rate": 1.9287663662795065e-05, "loss": 0.8404, "step": 768 }, { "epoch": 0.14753351399314132, "grad_norm": 1.0388732536279495, "learning_rate": 1.9285358248068468e-05, "loss": 0.9008, "step": 769 }, { "epoch": 0.14772536511666948, "grad_norm": 1.0379877652481786, "learning_rate": 1.928304924696375e-05, "loss": 0.855, "step": 770 }, { "epoch": 0.1479172162401976, "grad_norm": 0.9135880237203347, "learning_rate": 1.928073666037275e-05, "loss": 0.8608, "step": 771 }, { "epoch": 0.14810906736372575, "grad_norm": 0.695510284337323, "learning_rate": 1.927842048918867e-05, "loss": 0.323, "step": 772 }, { "epoch": 0.14830091848725388, "grad_norm": 0.6811680321762051, "learning_rate": 1.9276100734306116e-05, "loss": 0.3093, "step": 773 }, { "epoch": 0.14849276961078203, "grad_norm": 1.2762417958130883, "learning_rate": 1.927377739662107e-05, "loss": 0.8342, "step": 774 }, { "epoch": 0.14868462073431019, "grad_norm": 1.020579478059427, "learning_rate": 1.9271450477030894e-05, "loss": 0.9018, "step": 775 }, { "epoch": 0.1488764718578383, "grad_norm": 1.1664174597870727, "learning_rate": 1.926911997643434e-05, "loss": 0.7847, "step": 776 }, { "epoch": 0.14906832298136646, "grad_norm": 0.9439078390116032, "learning_rate": 1.9266785895731543e-05, "loss": 0.7854, "step": 777 }, { "epoch": 0.1492601741048946, "grad_norm": 1.0976061450239154, "learning_rate": 1.926444823582402e-05, "loss": 0.8276, "step": 778 }, { "epoch": 0.14945202522842274, "grad_norm": 1.1444425810835361, "learning_rate": 1.9262106997614663e-05, "loss": 0.8671, "step": 779 }, { "epoch": 0.1496438763519509, "grad_norm": 0.9492398136026542, "learning_rate": 1.9259762182007757e-05, "loss": 0.8909, "step": 780 }, { "epoch": 0.14983572747547902, "grad_norm": 0.9252481858891172, "learning_rate": 1.9257413789908962e-05, "loss": 0.8695, "step": 781 }, { "epoch": 0.15002757859900717, "grad_norm": 1.0780892528643666, "learning_rate": 1.9255061822225322e-05, "loss": 0.8164, "step": 782 }, { "epoch": 0.15021942972253532, "grad_norm": 0.9958458709382785, "learning_rate": 1.925270627986526e-05, "loss": 0.813, "step": 783 }, { "epoch": 0.15041128084606345, "grad_norm": 1.048176651860475, "learning_rate": 1.925034716373858e-05, "loss": 0.8669, "step": 784 }, { "epoch": 0.1506031319695916, "grad_norm": 1.1715838988273632, "learning_rate": 1.9247984474756474e-05, "loss": 0.8163, "step": 785 }, { "epoch": 0.15079498309311973, "grad_norm": 1.2802136189746793, "learning_rate": 1.92456182138315e-05, "loss": 0.8018, "step": 786 }, { "epoch": 0.15098683421664788, "grad_norm": 0.9408712493918415, "learning_rate": 1.9243248381877605e-05, "loss": 0.8726, "step": 787 }, { "epoch": 0.15117868534017603, "grad_norm": 0.9113820310163783, "learning_rate": 1.9240874979810115e-05, "loss": 0.8501, "step": 788 }, { "epoch": 0.15137053646370416, "grad_norm": 1.1273892881578986, "learning_rate": 1.9238498008545732e-05, "loss": 0.8328, "step": 789 }, { "epoch": 0.1515623875872323, "grad_norm": 0.9345625971594248, "learning_rate": 1.923611746900254e-05, "loss": 0.901, "step": 790 }, { "epoch": 0.15175423871076044, "grad_norm": 1.122919621306202, "learning_rate": 1.9233733362099994e-05, "loss": 0.8717, "step": 791 }, { "epoch": 0.1519460898342886, "grad_norm": 1.0815378944339424, "learning_rate": 1.9231345688758932e-05, "loss": 0.8489, "step": 792 }, { "epoch": 0.15213794095781674, "grad_norm": 1.0309338423202938, "learning_rate": 1.9228954449901576e-05, "loss": 0.7876, "step": 793 }, { "epoch": 0.15232979208134487, "grad_norm": 0.936938138147071, "learning_rate": 1.9226559646451515e-05, "loss": 0.8617, "step": 794 }, { "epoch": 0.15252164320487302, "grad_norm": 0.9584435006438874, "learning_rate": 1.9224161279333714e-05, "loss": 0.8394, "step": 795 }, { "epoch": 0.15271349432840117, "grad_norm": 0.9657587328738025, "learning_rate": 1.9221759349474526e-05, "loss": 0.8616, "step": 796 }, { "epoch": 0.1529053454519293, "grad_norm": 1.0851692538279643, "learning_rate": 1.9219353857801666e-05, "loss": 0.8393, "step": 797 }, { "epoch": 0.15309719657545745, "grad_norm": 1.0545842434271655, "learning_rate": 1.9216944805244234e-05, "loss": 0.7625, "step": 798 }, { "epoch": 0.15328904769898558, "grad_norm": 0.9985933104604855, "learning_rate": 1.9214532192732706e-05, "loss": 0.8418, "step": 799 }, { "epoch": 0.15348089882251373, "grad_norm": 1.3934493698373778, "learning_rate": 1.9212116021198923e-05, "loss": 0.932, "step": 800 }, { "epoch": 0.15367274994604188, "grad_norm": 0.953500703226431, "learning_rate": 1.9209696291576116e-05, "loss": 0.7738, "step": 801 }, { "epoch": 0.15386460106957, "grad_norm": 1.1298373630257579, "learning_rate": 1.9207273004798873e-05, "loss": 0.9043, "step": 802 }, { "epoch": 0.15405645219309816, "grad_norm": 1.1971752737455303, "learning_rate": 1.9204846161803173e-05, "loss": 0.8501, "step": 803 }, { "epoch": 0.1542483033166263, "grad_norm": 0.7654543562211187, "learning_rate": 1.9202415763526353e-05, "loss": 0.3128, "step": 804 }, { "epoch": 0.15444015444015444, "grad_norm": 1.064383102337358, "learning_rate": 1.9199981810907135e-05, "loss": 0.8994, "step": 805 }, { "epoch": 0.1546320055636826, "grad_norm": 1.0289586766775909, "learning_rate": 1.9197544304885604e-05, "loss": 0.8569, "step": 806 }, { "epoch": 0.15482385668721071, "grad_norm": 1.2601094410514744, "learning_rate": 1.919510324640323e-05, "loss": 0.781, "step": 807 }, { "epoch": 0.15501570781073887, "grad_norm": 1.034759157375643, "learning_rate": 1.919265863640284e-05, "loss": 0.8635, "step": 808 }, { "epoch": 0.15520755893426702, "grad_norm": 1.037947405081086, "learning_rate": 1.9190210475828648e-05, "loss": 0.8544, "step": 809 }, { "epoch": 0.15539941005779515, "grad_norm": 0.6684239775316293, "learning_rate": 1.9187758765626228e-05, "loss": 0.2655, "step": 810 }, { "epoch": 0.1555912611813233, "grad_norm": 1.1819901087887277, "learning_rate": 1.9185303506742528e-05, "loss": 0.7695, "step": 811 }, { "epoch": 0.15578311230485142, "grad_norm": 1.0257452055022576, "learning_rate": 1.918284470012587e-05, "loss": 0.8853, "step": 812 }, { "epoch": 0.15597496342837958, "grad_norm": 1.0671073955280785, "learning_rate": 1.9180382346725945e-05, "loss": 0.8703, "step": 813 }, { "epoch": 0.15616681455190773, "grad_norm": 1.1969925215059192, "learning_rate": 1.9177916447493807e-05, "loss": 0.8419, "step": 814 }, { "epoch": 0.15635866567543585, "grad_norm": 0.9495609127035608, "learning_rate": 1.9175447003381892e-05, "loss": 0.8101, "step": 815 }, { "epoch": 0.156550516798964, "grad_norm": 0.9801733093334334, "learning_rate": 1.917297401534399e-05, "loss": 0.8368, "step": 816 }, { "epoch": 0.15674236792249216, "grad_norm": 0.975257668609612, "learning_rate": 1.9170497484335276e-05, "loss": 0.8361, "step": 817 }, { "epoch": 0.15693421904602028, "grad_norm": 0.9163385858122598, "learning_rate": 1.9168017411312284e-05, "loss": 0.8794, "step": 818 }, { "epoch": 0.15712607016954844, "grad_norm": 1.2689314677570511, "learning_rate": 1.9165533797232917e-05, "loss": 0.8684, "step": 819 }, { "epoch": 0.15731792129307656, "grad_norm": 0.8907516858685469, "learning_rate": 1.9163046643056448e-05, "loss": 0.8655, "step": 820 }, { "epoch": 0.15750977241660472, "grad_norm": 0.9659586993242265, "learning_rate": 1.916055594974351e-05, "loss": 0.8537, "step": 821 }, { "epoch": 0.15770162354013287, "grad_norm": 1.0617234965933813, "learning_rate": 1.9158061718256115e-05, "loss": 0.8276, "step": 822 }, { "epoch": 0.157893474663661, "grad_norm": 1.0075989382500603, "learning_rate": 1.9155563949557634e-05, "loss": 0.924, "step": 823 }, { "epoch": 0.15808532578718915, "grad_norm": 0.9627565063876422, "learning_rate": 1.9153062644612802e-05, "loss": 0.8404, "step": 824 }, { "epoch": 0.15827717691071727, "grad_norm": 1.004770689337444, "learning_rate": 1.9150557804387727e-05, "loss": 0.8219, "step": 825 }, { "epoch": 0.15846902803424542, "grad_norm": 0.9179537784407511, "learning_rate": 1.9148049429849882e-05, "loss": 0.8353, "step": 826 }, { "epoch": 0.15866087915777358, "grad_norm": 0.9995778389773614, "learning_rate": 1.9145537521968095e-05, "loss": 0.9201, "step": 827 }, { "epoch": 0.1588527302813017, "grad_norm": 0.954465499355653, "learning_rate": 1.9143022081712567e-05, "loss": 0.8304, "step": 828 }, { "epoch": 0.15904458140482985, "grad_norm": 0.9745163550095656, "learning_rate": 1.9140503110054864e-05, "loss": 0.7964, "step": 829 }, { "epoch": 0.159236432528358, "grad_norm": 0.9609534603207006, "learning_rate": 1.9137980607967917e-05, "loss": 0.7962, "step": 830 }, { "epoch": 0.15942828365188613, "grad_norm": 0.9077944885738506, "learning_rate": 1.913545457642601e-05, "loss": 0.3175, "step": 831 }, { "epoch": 0.15962013477541429, "grad_norm": 0.7198107980760751, "learning_rate": 1.9132925016404805e-05, "loss": 0.3227, "step": 832 }, { "epoch": 0.1598119858989424, "grad_norm": 1.0470828972820132, "learning_rate": 1.9130391928881312e-05, "loss": 0.8986, "step": 833 }, { "epoch": 0.16000383702247056, "grad_norm": 1.0659728074167396, "learning_rate": 1.912785531483392e-05, "loss": 0.8323, "step": 834 }, { "epoch": 0.16019568814599872, "grad_norm": 1.1010237785312118, "learning_rate": 1.912531517524237e-05, "loss": 0.9049, "step": 835 }, { "epoch": 0.16038753926952684, "grad_norm": 1.4117456883421469, "learning_rate": 1.9122771511087757e-05, "loss": 0.872, "step": 836 }, { "epoch": 0.160579390393055, "grad_norm": 1.0702677713240376, "learning_rate": 1.9120224323352553e-05, "loss": 0.8733, "step": 837 }, { "epoch": 0.16077124151658312, "grad_norm": 0.8832157914017508, "learning_rate": 1.9117673613020584e-05, "loss": 0.8156, "step": 838 }, { "epoch": 0.16096309264011127, "grad_norm": 0.9194216357000969, "learning_rate": 1.9115119381077038e-05, "loss": 0.8209, "step": 839 }, { "epoch": 0.16115494376363942, "grad_norm": 1.0573019275713107, "learning_rate": 1.9112561628508456e-05, "loss": 0.8582, "step": 840 }, { "epoch": 0.16134679488716755, "grad_norm": 0.9205466516816402, "learning_rate": 1.911000035630275e-05, "loss": 0.8728, "step": 841 }, { "epoch": 0.1615386460106957, "grad_norm": 1.0546391549704817, "learning_rate": 1.9107435565449185e-05, "loss": 0.8612, "step": 842 }, { "epoch": 0.16173049713422386, "grad_norm": 1.055380355293825, "learning_rate": 1.9104867256938385e-05, "loss": 0.8103, "step": 843 }, { "epoch": 0.16192234825775198, "grad_norm": 1.281931742538664, "learning_rate": 1.9102295431762334e-05, "loss": 0.3399, "step": 844 }, { "epoch": 0.16211419938128013, "grad_norm": 1.0809761402501974, "learning_rate": 1.9099720090914375e-05, "loss": 0.3377, "step": 845 }, { "epoch": 0.16230605050480826, "grad_norm": 1.001630655390883, "learning_rate": 1.909714123538921e-05, "loss": 0.8935, "step": 846 }, { "epoch": 0.1624979016283364, "grad_norm": 1.0361402249108, "learning_rate": 1.9094558866182892e-05, "loss": 0.8637, "step": 847 }, { "epoch": 0.16268975275186456, "grad_norm": 1.0258623492605872, "learning_rate": 1.909197298429284e-05, "loss": 0.8987, "step": 848 }, { "epoch": 0.1628816038753927, "grad_norm": 1.0794539224190345, "learning_rate": 1.9089383590717824e-05, "loss": 0.8387, "step": 849 }, { "epoch": 0.16307345499892084, "grad_norm": 1.3026453856924531, "learning_rate": 1.9086790686457968e-05, "loss": 0.8714, "step": 850 }, { "epoch": 0.16326530612244897, "grad_norm": 1.054280367990185, "learning_rate": 1.908419427251476e-05, "loss": 0.8657, "step": 851 }, { "epoch": 0.16345715724597712, "grad_norm": 0.908703513065058, "learning_rate": 1.908159434989104e-05, "loss": 0.9139, "step": 852 }, { "epoch": 0.16364900836950527, "grad_norm": 1.0786163681370322, "learning_rate": 1.9078990919591e-05, "loss": 0.8518, "step": 853 }, { "epoch": 0.1638408594930334, "grad_norm": 0.9989553816221581, "learning_rate": 1.9076383982620187e-05, "loss": 0.8538, "step": 854 }, { "epoch": 0.16403271061656155, "grad_norm": 1.0015409740137677, "learning_rate": 1.907377353998551e-05, "loss": 0.8257, "step": 855 }, { "epoch": 0.1642245617400897, "grad_norm": 1.0122540946313372, "learning_rate": 1.9071159592695224e-05, "loss": 0.8018, "step": 856 }, { "epoch": 0.16441641286361783, "grad_norm": 1.16172500438297, "learning_rate": 1.9068542141758943e-05, "loss": 0.8819, "step": 857 }, { "epoch": 0.16460826398714598, "grad_norm": 1.0201549447049816, "learning_rate": 1.9065921188187628e-05, "loss": 0.9027, "step": 858 }, { "epoch": 0.1648001151106741, "grad_norm": 0.9873786268240567, "learning_rate": 1.9063296732993598e-05, "loss": 0.8622, "step": 859 }, { "epoch": 0.16499196623420226, "grad_norm": 0.9487645980511417, "learning_rate": 1.906066877719052e-05, "loss": 0.8506, "step": 860 }, { "epoch": 0.1651838173577304, "grad_norm": 0.8954898277011253, "learning_rate": 1.9058037321793425e-05, "loss": 0.8026, "step": 861 }, { "epoch": 0.16537566848125854, "grad_norm": 1.1462042437885194, "learning_rate": 1.9055402367818673e-05, "loss": 0.7957, "step": 862 }, { "epoch": 0.1655675196047867, "grad_norm": 2.5118557766425362, "learning_rate": 1.9052763916284003e-05, "loss": 0.3732, "step": 863 }, { "epoch": 0.16575937072831481, "grad_norm": 1.1134218920048655, "learning_rate": 1.9050121968208484e-05, "loss": 0.9113, "step": 864 }, { "epoch": 0.16595122185184297, "grad_norm": 1.0166957690834277, "learning_rate": 1.904747652461254e-05, "loss": 0.8008, "step": 865 }, { "epoch": 0.16614307297537112, "grad_norm": 1.3072864424824093, "learning_rate": 1.904482758651795e-05, "loss": 0.9059, "step": 866 }, { "epoch": 0.16633492409889925, "grad_norm": 0.9221563911062493, "learning_rate": 1.9042175154947842e-05, "loss": 0.8646, "step": 867 }, { "epoch": 0.1665267752224274, "grad_norm": 1.134470100684133, "learning_rate": 1.9039519230926692e-05, "loss": 0.8622, "step": 868 }, { "epoch": 0.16671862634595555, "grad_norm": 0.9384055088574438, "learning_rate": 1.903685981548032e-05, "loss": 0.8696, "step": 869 }, { "epoch": 0.16691047746948368, "grad_norm": 1.0973171307116976, "learning_rate": 1.9034196909635906e-05, "loss": 0.8989, "step": 870 }, { "epoch": 0.16710232859301183, "grad_norm": 1.2040558748022194, "learning_rate": 1.903153051442196e-05, "loss": 0.8549, "step": 871 }, { "epoch": 0.16729417971653995, "grad_norm": 0.9113039777393404, "learning_rate": 1.9028860630868364e-05, "loss": 0.835, "step": 872 }, { "epoch": 0.1674860308400681, "grad_norm": 0.9482607579202664, "learning_rate": 1.9026187260006326e-05, "loss": 0.8316, "step": 873 }, { "epoch": 0.16767788196359626, "grad_norm": 0.9945151971776587, "learning_rate": 1.902351040286841e-05, "loss": 0.837, "step": 874 }, { "epoch": 0.16786973308712438, "grad_norm": 1.0764414957548598, "learning_rate": 1.9020830060488528e-05, "loss": 0.8658, "step": 875 }, { "epoch": 0.16806158421065254, "grad_norm": 1.0289303138650086, "learning_rate": 1.9018146233901934e-05, "loss": 0.8882, "step": 876 }, { "epoch": 0.16825343533418066, "grad_norm": 1.093129142945324, "learning_rate": 1.901545892414523e-05, "loss": 0.8839, "step": 877 }, { "epoch": 0.16844528645770881, "grad_norm": 0.9061530292792211, "learning_rate": 1.9012768132256366e-05, "loss": 0.8535, "step": 878 }, { "epoch": 0.16863713758123697, "grad_norm": 1.243970193577445, "learning_rate": 1.9010073859274625e-05, "loss": 0.8918, "step": 879 }, { "epoch": 0.1688289887047651, "grad_norm": 1.1900008565389049, "learning_rate": 1.9007376106240654e-05, "loss": 0.814, "step": 880 }, { "epoch": 0.16902083982829325, "grad_norm": 1.0323658380374283, "learning_rate": 1.9004674874196427e-05, "loss": 0.8614, "step": 881 }, { "epoch": 0.1692126909518214, "grad_norm": 1.0120135938950383, "learning_rate": 1.900197016418527e-05, "loss": 0.8176, "step": 882 }, { "epoch": 0.16940454207534952, "grad_norm": 0.9441306847369573, "learning_rate": 1.899926197725185e-05, "loss": 0.8574, "step": 883 }, { "epoch": 0.16959639319887768, "grad_norm": 0.965385564353847, "learning_rate": 1.899655031444218e-05, "loss": 0.9013, "step": 884 }, { "epoch": 0.1697882443224058, "grad_norm": 1.029642421551934, "learning_rate": 1.8993835176803613e-05, "loss": 0.8434, "step": 885 }, { "epoch": 0.16998009544593395, "grad_norm": 1.0447823254484558, "learning_rate": 1.8991116565384842e-05, "loss": 0.8469, "step": 886 }, { "epoch": 0.1701719465694621, "grad_norm": 0.9991130499595787, "learning_rate": 1.89883944812359e-05, "loss": 0.8715, "step": 887 }, { "epoch": 0.17036379769299023, "grad_norm": 1.1397364121609226, "learning_rate": 1.8985668925408173e-05, "loss": 0.8784, "step": 888 }, { "epoch": 0.17055564881651838, "grad_norm": 1.016567859701069, "learning_rate": 1.8982939898954377e-05, "loss": 0.8604, "step": 889 }, { "epoch": 0.17074749994004654, "grad_norm": 0.9538383891335077, "learning_rate": 1.8980207402928566e-05, "loss": 0.8526, "step": 890 }, { "epoch": 0.17093935106357466, "grad_norm": 1.006751833326214, "learning_rate": 1.8977471438386154e-05, "loss": 0.9091, "step": 891 }, { "epoch": 0.17113120218710282, "grad_norm": 0.903593030336375, "learning_rate": 1.897473200638386e-05, "loss": 0.8932, "step": 892 }, { "epoch": 0.17132305331063094, "grad_norm": 1.2758176389287168, "learning_rate": 1.897198910797978e-05, "loss": 0.3724, "step": 893 }, { "epoch": 0.1715149044341591, "grad_norm": 1.039420519182296, "learning_rate": 1.896924274423333e-05, "loss": 0.9179, "step": 894 }, { "epoch": 0.17170675555768725, "grad_norm": 1.078187823805199, "learning_rate": 1.8966492916205253e-05, "loss": 0.8718, "step": 895 }, { "epoch": 0.17189860668121537, "grad_norm": 1.1533127867515558, "learning_rate": 1.8963739624957655e-05, "loss": 0.9081, "step": 896 }, { "epoch": 0.17209045780474352, "grad_norm": 1.0360850060127151, "learning_rate": 1.8960982871553963e-05, "loss": 0.7922, "step": 897 }, { "epoch": 0.17228230892827165, "grad_norm": 1.1283725933332576, "learning_rate": 1.8958222657058945e-05, "loss": 0.8211, "step": 898 }, { "epoch": 0.1724741600517998, "grad_norm": 0.9574117547711989, "learning_rate": 1.8955458982538707e-05, "loss": 0.8206, "step": 899 }, { "epoch": 0.17266601117532795, "grad_norm": 1.157253725431378, "learning_rate": 1.8952691849060694e-05, "loss": 0.7986, "step": 900 }, { "epoch": 0.17285786229885608, "grad_norm": 0.9060873447764366, "learning_rate": 1.8949921257693688e-05, "loss": 0.8352, "step": 901 }, { "epoch": 0.17304971342238423, "grad_norm": 1.060365725674547, "learning_rate": 1.894714720950779e-05, "loss": 0.8906, "step": 902 }, { "epoch": 0.17324156454591239, "grad_norm": 0.9275524560208396, "learning_rate": 1.894436970557446e-05, "loss": 0.8249, "step": 903 }, { "epoch": 0.1734334156694405, "grad_norm": 0.9427620145599508, "learning_rate": 1.8941588746966473e-05, "loss": 0.8304, "step": 904 }, { "epoch": 0.17362526679296866, "grad_norm": 1.0026053052774977, "learning_rate": 1.893880433475795e-05, "loss": 0.8628, "step": 905 }, { "epoch": 0.1738171179164968, "grad_norm": 1.1883023417214582, "learning_rate": 1.893601647002435e-05, "loss": 0.8962, "step": 906 }, { "epoch": 0.17400896904002494, "grad_norm": 1.0206314643997787, "learning_rate": 1.8933225153842446e-05, "loss": 0.8986, "step": 907 }, { "epoch": 0.1742008201635531, "grad_norm": 1.208989198740364, "learning_rate": 1.8930430387290365e-05, "loss": 0.9005, "step": 908 }, { "epoch": 0.17439267128708122, "grad_norm": 1.0715465769218486, "learning_rate": 1.8927632171447555e-05, "loss": 0.874, "step": 909 }, { "epoch": 0.17458452241060937, "grad_norm": 0.9900226319020026, "learning_rate": 1.8924830507394803e-05, "loss": 0.8228, "step": 910 }, { "epoch": 0.1747763735341375, "grad_norm": 0.9431744207882916, "learning_rate": 1.8922025396214218e-05, "loss": 0.9323, "step": 911 }, { "epoch": 0.17496822465766565, "grad_norm": 1.037975679092361, "learning_rate": 1.8919216838989252e-05, "loss": 0.8861, "step": 912 }, { "epoch": 0.1751600757811938, "grad_norm": 0.9686052822447557, "learning_rate": 1.8916404836804677e-05, "loss": 0.827, "step": 913 }, { "epoch": 0.17535192690472193, "grad_norm": 1.009339105412468, "learning_rate": 1.8913589390746607e-05, "loss": 0.9498, "step": 914 }, { "epoch": 0.17554377802825008, "grad_norm": 1.0253841087220155, "learning_rate": 1.8910770501902475e-05, "loss": 0.8712, "step": 915 }, { "epoch": 0.17573562915177823, "grad_norm": 0.9832965336897971, "learning_rate": 1.8907948171361054e-05, "loss": 0.8017, "step": 916 }, { "epoch": 0.17592748027530636, "grad_norm": 1.0308074411074737, "learning_rate": 1.8905122400212435e-05, "loss": 0.8985, "step": 917 }, { "epoch": 0.1761193313988345, "grad_norm": 1.030936630879553, "learning_rate": 1.890229318954805e-05, "loss": 0.9422, "step": 918 }, { "epoch": 0.17631118252236264, "grad_norm": 1.0312021866620904, "learning_rate": 1.8899460540460653e-05, "loss": 0.903, "step": 919 }, { "epoch": 0.1765030336458908, "grad_norm": 1.0507472500087969, "learning_rate": 1.8896624454044324e-05, "loss": 0.8516, "step": 920 }, { "epoch": 0.17669488476941894, "grad_norm": 0.9814952055772599, "learning_rate": 1.8893784931394476e-05, "loss": 0.9, "step": 921 }, { "epoch": 0.17688673589294707, "grad_norm": 0.9231457253835407, "learning_rate": 1.8890941973607843e-05, "loss": 0.8972, "step": 922 }, { "epoch": 0.17707858701647522, "grad_norm": 0.9809913802317721, "learning_rate": 1.88880955817825e-05, "loss": 0.7627, "step": 923 }, { "epoch": 0.17727043814000334, "grad_norm": 0.8986387295969887, "learning_rate": 1.8885245757017824e-05, "loss": 0.8653, "step": 924 }, { "epoch": 0.1774622892635315, "grad_norm": 1.1083269699496414, "learning_rate": 1.8882392500414535e-05, "loss": 0.8826, "step": 925 }, { "epoch": 0.17765414038705965, "grad_norm": 0.9101561081710766, "learning_rate": 1.887953581307468e-05, "loss": 0.7877, "step": 926 }, { "epoch": 0.17784599151058778, "grad_norm": 1.0536255011239148, "learning_rate": 1.8876675696101627e-05, "loss": 0.8352, "step": 927 }, { "epoch": 0.17803784263411593, "grad_norm": 1.0228415583000767, "learning_rate": 1.8873812150600064e-05, "loss": 0.8457, "step": 928 }, { "epoch": 0.17822969375764408, "grad_norm": 0.9468605120787813, "learning_rate": 1.887094517767601e-05, "loss": 0.9483, "step": 929 }, { "epoch": 0.1784215448811722, "grad_norm": 1.1219812326121994, "learning_rate": 1.88680747784368e-05, "loss": 0.8189, "step": 930 }, { "epoch": 0.17861339600470036, "grad_norm": 1.1643538519256027, "learning_rate": 1.8865200953991105e-05, "loss": 0.85, "step": 931 }, { "epoch": 0.17880524712822848, "grad_norm": 0.973650571985732, "learning_rate": 1.88623237054489e-05, "loss": 0.8286, "step": 932 }, { "epoch": 0.17899709825175664, "grad_norm": 0.9698083663869155, "learning_rate": 1.885944303392151e-05, "loss": 0.9012, "step": 933 }, { "epoch": 0.1791889493752848, "grad_norm": 0.9605187095195283, "learning_rate": 1.8856558940521553e-05, "loss": 0.9273, "step": 934 }, { "epoch": 0.17938080049881291, "grad_norm": 0.9170391697262624, "learning_rate": 1.8853671426362985e-05, "loss": 0.8648, "step": 935 }, { "epoch": 0.17957265162234107, "grad_norm": 1.1392284528725336, "learning_rate": 1.885078049256108e-05, "loss": 0.7876, "step": 936 }, { "epoch": 0.1797645027458692, "grad_norm": 0.8018449838699837, "learning_rate": 1.8847886140232438e-05, "loss": 0.3185, "step": 937 }, { "epoch": 0.17995635386939735, "grad_norm": 0.9847970593196193, "learning_rate": 1.8844988370494966e-05, "loss": 0.9119, "step": 938 }, { "epoch": 0.1801482049929255, "grad_norm": 1.0154015120737976, "learning_rate": 1.8842087184467904e-05, "loss": 0.9027, "step": 939 }, { "epoch": 0.18034005611645362, "grad_norm": 0.9735362506646809, "learning_rate": 1.8839182583271802e-05, "loss": 0.8739, "step": 940 }, { "epoch": 0.18053190723998178, "grad_norm": 1.0264929752351901, "learning_rate": 1.883627456802854e-05, "loss": 0.8305, "step": 941 }, { "epoch": 0.18072375836350993, "grad_norm": 0.8279772817650861, "learning_rate": 1.8833363139861305e-05, "loss": 0.3288, "step": 942 }, { "epoch": 0.18091560948703805, "grad_norm": 1.0761323511948342, "learning_rate": 1.8830448299894613e-05, "loss": 0.828, "step": 943 }, { "epoch": 0.1811074606105662, "grad_norm": 1.2859730203748954, "learning_rate": 1.8827530049254282e-05, "loss": 0.8231, "step": 944 }, { "epoch": 0.18129931173409433, "grad_norm": 1.0615403185298928, "learning_rate": 1.882460838906747e-05, "loss": 0.8977, "step": 945 }, { "epoch": 0.18149116285762248, "grad_norm": 0.9365447960716085, "learning_rate": 1.882168332046263e-05, "loss": 0.8345, "step": 946 }, { "epoch": 0.18168301398115064, "grad_norm": 0.9885311597225039, "learning_rate": 1.8818754844569546e-05, "loss": 0.8954, "step": 947 }, { "epoch": 0.18187486510467876, "grad_norm": 1.0978930667414435, "learning_rate": 1.881582296251931e-05, "loss": 0.7817, "step": 948 }, { "epoch": 0.18206671622820692, "grad_norm": 0.9443270865004868, "learning_rate": 1.8812887675444335e-05, "loss": 0.8031, "step": 949 }, { "epoch": 0.18225856735173504, "grad_norm": 0.9753116410117798, "learning_rate": 1.8809948984478343e-05, "loss": 0.8805, "step": 950 }, { "epoch": 0.1824504184752632, "grad_norm": 1.185985599838809, "learning_rate": 1.880700689075638e-05, "loss": 0.8342, "step": 951 }, { "epoch": 0.18264226959879135, "grad_norm": 1.074330832401568, "learning_rate": 1.8804061395414795e-05, "loss": 0.8225, "step": 952 }, { "epoch": 0.18283412072231947, "grad_norm": 1.0225230169130775, "learning_rate": 1.8801112499591262e-05, "loss": 0.8422, "step": 953 }, { "epoch": 0.18302597184584762, "grad_norm": 1.2161668251343691, "learning_rate": 1.879816020442476e-05, "loss": 0.8323, "step": 954 }, { "epoch": 0.18321782296937578, "grad_norm": 1.012662775264417, "learning_rate": 1.8795204511055584e-05, "loss": 0.8958, "step": 955 }, { "epoch": 0.1834096740929039, "grad_norm": 0.9515183597923307, "learning_rate": 1.8792245420625343e-05, "loss": 0.8666, "step": 956 }, { "epoch": 0.18360152521643205, "grad_norm": 0.9184334138325905, "learning_rate": 1.8789282934276952e-05, "loss": 0.8213, "step": 957 }, { "epoch": 0.18379337633996018, "grad_norm": 1.1385066833118465, "learning_rate": 1.8786317053154644e-05, "loss": 0.8237, "step": 958 }, { "epoch": 0.18398522746348833, "grad_norm": 1.1798340188879644, "learning_rate": 1.8783347778403965e-05, "loss": 0.9262, "step": 959 }, { "epoch": 0.18417707858701649, "grad_norm": 1.2205476634534884, "learning_rate": 1.8780375111171765e-05, "loss": 0.7988, "step": 960 }, { "epoch": 0.1843689297105446, "grad_norm": 0.9661066638429588, "learning_rate": 1.8777399052606213e-05, "loss": 0.8453, "step": 961 }, { "epoch": 0.18456078083407276, "grad_norm": 1.0528952769117632, "learning_rate": 1.877441960385677e-05, "loss": 0.879, "step": 962 }, { "epoch": 0.1847526319576009, "grad_norm": 0.889347472722738, "learning_rate": 1.8771436766074232e-05, "loss": 0.8449, "step": 963 }, { "epoch": 0.18494448308112904, "grad_norm": 0.9028630760785543, "learning_rate": 1.876845054041068e-05, "loss": 0.9361, "step": 964 }, { "epoch": 0.1851363342046572, "grad_norm": 0.9475633804399629, "learning_rate": 1.8765460928019525e-05, "loss": 0.8127, "step": 965 }, { "epoch": 0.18532818532818532, "grad_norm": 0.9517612802548124, "learning_rate": 1.8762467930055463e-05, "loss": 0.8256, "step": 966 }, { "epoch": 0.18552003645171347, "grad_norm": 0.9782362908553554, "learning_rate": 1.875947154767452e-05, "loss": 0.8516, "step": 967 }, { "epoch": 0.18571188757524162, "grad_norm": 1.0654632714223182, "learning_rate": 1.8756471782034014e-05, "loss": 0.8774, "step": 968 }, { "epoch": 0.18590373869876975, "grad_norm": 0.9799949905992545, "learning_rate": 1.8753468634292577e-05, "loss": 0.8519, "step": 969 }, { "epoch": 0.1860955898222979, "grad_norm": 1.0179241200322338, "learning_rate": 1.8750462105610147e-05, "loss": 0.9003, "step": 970 }, { "epoch": 0.18628744094582603, "grad_norm": 1.1410598257406719, "learning_rate": 1.874745219714796e-05, "loss": 0.8314, "step": 971 }, { "epoch": 0.18647929206935418, "grad_norm": 0.771197707280329, "learning_rate": 1.8744438910068567e-05, "loss": 0.319, "step": 972 }, { "epoch": 0.18667114319288233, "grad_norm": 0.6836653668266268, "learning_rate": 1.8741422245535824e-05, "loss": 0.3084, "step": 973 }, { "epoch": 0.18686299431641046, "grad_norm": 0.9242599021375278, "learning_rate": 1.8738402204714882e-05, "loss": 0.8889, "step": 974 }, { "epoch": 0.1870548454399386, "grad_norm": 1.3327421678183249, "learning_rate": 1.873537878877221e-05, "loss": 0.8107, "step": 975 }, { "epoch": 0.18724669656346676, "grad_norm": 0.9584368120859077, "learning_rate": 1.873235199887556e-05, "loss": 0.8009, "step": 976 }, { "epoch": 0.1874385476869949, "grad_norm": 1.020192671807852, "learning_rate": 1.872932183619401e-05, "loss": 0.8069, "step": 977 }, { "epoch": 0.18763039881052304, "grad_norm": 1.036062039074708, "learning_rate": 1.872628830189793e-05, "loss": 0.768, "step": 978 }, { "epoch": 0.18782224993405117, "grad_norm": 1.0159213075217641, "learning_rate": 1.8723251397158987e-05, "loss": 0.8206, "step": 979 }, { "epoch": 0.18801410105757932, "grad_norm": 1.0562867807495488, "learning_rate": 1.872021112315016e-05, "loss": 0.9139, "step": 980 }, { "epoch": 0.18820595218110747, "grad_norm": 1.011602126526792, "learning_rate": 1.871716748104572e-05, "loss": 0.8797, "step": 981 }, { "epoch": 0.1883978033046356, "grad_norm": 0.9589750438752075, "learning_rate": 1.8714120472021252e-05, "loss": 0.8355, "step": 982 }, { "epoch": 0.18858965442816375, "grad_norm": 0.927311597134932, "learning_rate": 1.8711070097253625e-05, "loss": 0.8129, "step": 983 }, { "epoch": 0.18878150555169187, "grad_norm": 0.711696646409667, "learning_rate": 1.870801635792102e-05, "loss": 0.3402, "step": 984 }, { "epoch": 0.18897335667522003, "grad_norm": 1.0142029060487383, "learning_rate": 1.870495925520292e-05, "loss": 0.9006, "step": 985 }, { "epoch": 0.18916520779874818, "grad_norm": 0.9841971760990108, "learning_rate": 1.8701898790280086e-05, "loss": 0.7874, "step": 986 }, { "epoch": 0.1893570589222763, "grad_norm": 1.062748555745718, "learning_rate": 1.8698834964334604e-05, "loss": 0.9069, "step": 987 }, { "epoch": 0.18954891004580446, "grad_norm": 1.0388820791208024, "learning_rate": 1.8695767778549843e-05, "loss": 0.8631, "step": 988 }, { "epoch": 0.1897407611693326, "grad_norm": 1.03723187047132, "learning_rate": 1.8692697234110474e-05, "loss": 0.8796, "step": 989 }, { "epoch": 0.18993261229286074, "grad_norm": 0.9648900995827643, "learning_rate": 1.8689623332202464e-05, "loss": 0.8341, "step": 990 }, { "epoch": 0.1901244634163889, "grad_norm": 1.0304732962104146, "learning_rate": 1.868654607401308e-05, "loss": 0.8783, "step": 991 }, { "epoch": 0.19031631453991701, "grad_norm": 0.8516614594251256, "learning_rate": 1.8683465460730876e-05, "loss": 0.82, "step": 992 }, { "epoch": 0.19050816566344517, "grad_norm": 0.9314174163451617, "learning_rate": 1.8680381493545715e-05, "loss": 0.7638, "step": 993 }, { "epoch": 0.19070001678697332, "grad_norm": 1.0191480474678496, "learning_rate": 1.8677294173648746e-05, "loss": 0.8723, "step": 994 }, { "epoch": 0.19089186791050144, "grad_norm": 1.4194611616570563, "learning_rate": 1.8674203502232414e-05, "loss": 0.8544, "step": 995 }, { "epoch": 0.1910837190340296, "grad_norm": 1.0368638695760664, "learning_rate": 1.8671109480490467e-05, "loss": 0.8939, "step": 996 }, { "epoch": 0.19127557015755772, "grad_norm": 0.9966411765043484, "learning_rate": 1.8668012109617933e-05, "loss": 0.8373, "step": 997 }, { "epoch": 0.19146742128108588, "grad_norm": 1.120839336207458, "learning_rate": 1.8664911390811147e-05, "loss": 0.9121, "step": 998 }, { "epoch": 0.19165927240461403, "grad_norm": 0.7259576679848194, "learning_rate": 1.8661807325267727e-05, "loss": 0.3189, "step": 999 }, { "epoch": 0.19185112352814215, "grad_norm": 0.9918258868706631, "learning_rate": 1.8658699914186585e-05, "loss": 0.8499, "step": 1000 }, { "epoch": 0.1920429746516703, "grad_norm": 1.01974971027852, "learning_rate": 1.8655589158767938e-05, "loss": 0.8813, "step": 1001 }, { "epoch": 0.19223482577519846, "grad_norm": 1.0872060003864263, "learning_rate": 1.8652475060213275e-05, "loss": 0.8803, "step": 1002 }, { "epoch": 0.19242667689872658, "grad_norm": 1.1968673901089497, "learning_rate": 1.864935761972539e-05, "loss": 0.8506, "step": 1003 }, { "epoch": 0.19261852802225474, "grad_norm": 0.9163620664168098, "learning_rate": 1.8646236838508363e-05, "loss": 0.8443, "step": 1004 }, { "epoch": 0.19281037914578286, "grad_norm": 0.8880953140863282, "learning_rate": 1.8643112717767565e-05, "loss": 0.8549, "step": 1005 }, { "epoch": 0.19300223026931101, "grad_norm": 0.971419339347436, "learning_rate": 1.8639985258709656e-05, "loss": 0.8851, "step": 1006 }, { "epoch": 0.19319408139283917, "grad_norm": 1.0273173157796947, "learning_rate": 1.863685446254259e-05, "loss": 0.873, "step": 1007 }, { "epoch": 0.1933859325163673, "grad_norm": 0.9372891373760787, "learning_rate": 1.8633720330475602e-05, "loss": 0.8633, "step": 1008 }, { "epoch": 0.19357778363989545, "grad_norm": 0.9889523592763505, "learning_rate": 1.863058286371922e-05, "loss": 0.8958, "step": 1009 }, { "epoch": 0.19376963476342357, "grad_norm": 0.9521211298661042, "learning_rate": 1.862744206348526e-05, "loss": 0.8731, "step": 1010 }, { "epoch": 0.19396148588695172, "grad_norm": 1.0227098662363672, "learning_rate": 1.8624297930986827e-05, "loss": 0.8819, "step": 1011 }, { "epoch": 0.19415333701047988, "grad_norm": 1.0175453671240196, "learning_rate": 1.862115046743831e-05, "loss": 0.841, "step": 1012 }, { "epoch": 0.194345188134008, "grad_norm": 1.0235738915863675, "learning_rate": 1.8617999674055386e-05, "loss": 0.9087, "step": 1013 }, { "epoch": 0.19453703925753615, "grad_norm": 0.860659150517995, "learning_rate": 1.8614845552055017e-05, "loss": 0.9044, "step": 1014 }, { "epoch": 0.1947288903810643, "grad_norm": 0.8844020377463286, "learning_rate": 1.861168810265545e-05, "loss": 0.8604, "step": 1015 }, { "epoch": 0.19492074150459243, "grad_norm": 1.1052246871023634, "learning_rate": 1.8608527327076223e-05, "loss": 0.8457, "step": 1016 }, { "epoch": 0.19511259262812058, "grad_norm": 0.997944321520435, "learning_rate": 1.860536322653815e-05, "loss": 0.8378, "step": 1017 }, { "epoch": 0.1953044437516487, "grad_norm": 0.9177123438343545, "learning_rate": 1.8602195802263342e-05, "loss": 0.8354, "step": 1018 }, { "epoch": 0.19549629487517686, "grad_norm": 0.9581109569146887, "learning_rate": 1.8599025055475174e-05, "loss": 0.887, "step": 1019 }, { "epoch": 0.19568814599870502, "grad_norm": 0.9597099527212488, "learning_rate": 1.859585098739832e-05, "loss": 0.8485, "step": 1020 }, { "epoch": 0.19587999712223314, "grad_norm": 0.7168696692399915, "learning_rate": 1.8592673599258736e-05, "loss": 0.3031, "step": 1021 }, { "epoch": 0.1960718482457613, "grad_norm": 0.9853023113368657, "learning_rate": 1.8589492892283653e-05, "loss": 0.8593, "step": 1022 }, { "epoch": 0.19626369936928942, "grad_norm": 0.9622693072782171, "learning_rate": 1.8586308867701592e-05, "loss": 0.8058, "step": 1023 }, { "epoch": 0.19645555049281757, "grad_norm": 0.9129632531677976, "learning_rate": 1.8583121526742345e-05, "loss": 0.8436, "step": 1024 }, { "epoch": 0.19664740161634572, "grad_norm": 1.046600204387308, "learning_rate": 1.8579930870636994e-05, "loss": 0.9071, "step": 1025 }, { "epoch": 0.19683925273987385, "grad_norm": 0.9445792650613254, "learning_rate": 1.85767369006179e-05, "loss": 0.8902, "step": 1026 }, { "epoch": 0.197031103863402, "grad_norm": 1.0848289561589017, "learning_rate": 1.85735396179187e-05, "loss": 0.8004, "step": 1027 }, { "epoch": 0.19722295498693015, "grad_norm": 1.0113726304963533, "learning_rate": 1.8570339023774315e-05, "loss": 0.8387, "step": 1028 }, { "epoch": 0.19741480611045828, "grad_norm": 0.8804411165724526, "learning_rate": 1.8567135119420942e-05, "loss": 0.8083, "step": 1029 }, { "epoch": 0.19760665723398643, "grad_norm": 0.6731675945089165, "learning_rate": 1.856392790609606e-05, "loss": 0.3072, "step": 1030 }, { "epoch": 0.19779850835751456, "grad_norm": 1.0805643505823457, "learning_rate": 1.856071738503842e-05, "loss": 0.8429, "step": 1031 }, { "epoch": 0.1979903594810427, "grad_norm": 0.9900065004523522, "learning_rate": 1.855750355748805e-05, "loss": 0.8124, "step": 1032 }, { "epoch": 0.19818221060457086, "grad_norm": 1.0019671764244722, "learning_rate": 1.8554286424686268e-05, "loss": 0.869, "step": 1033 }, { "epoch": 0.198374061728099, "grad_norm": 0.9700062093671911, "learning_rate": 1.8551065987875655e-05, "loss": 0.8822, "step": 1034 }, { "epoch": 0.19856591285162714, "grad_norm": 1.1983663786663667, "learning_rate": 1.8547842248300075e-05, "loss": 0.8575, "step": 1035 }, { "epoch": 0.19875776397515527, "grad_norm": 1.0049024807155769, "learning_rate": 1.854461520720467e-05, "loss": 0.8048, "step": 1036 }, { "epoch": 0.19894961509868342, "grad_norm": 0.6514346400634056, "learning_rate": 1.854138486583584e-05, "loss": 0.2856, "step": 1037 }, { "epoch": 0.19914146622221157, "grad_norm": 0.9661917138302571, "learning_rate": 1.8538151225441282e-05, "loss": 0.8366, "step": 1038 }, { "epoch": 0.1993333173457397, "grad_norm": 1.1346685761717121, "learning_rate": 1.853491428726996e-05, "loss": 0.8592, "step": 1039 }, { "epoch": 0.19952516846926785, "grad_norm": 0.9586274054738024, "learning_rate": 1.8531674052572102e-05, "loss": 0.8215, "step": 1040 }, { "epoch": 0.199717019592796, "grad_norm": 1.161516338763645, "learning_rate": 1.852843052259922e-05, "loss": 0.8695, "step": 1041 }, { "epoch": 0.19990887071632413, "grad_norm": 0.9668852062396248, "learning_rate": 1.8525183698604098e-05, "loss": 0.8844, "step": 1042 }, { "epoch": 0.20010072183985228, "grad_norm": 0.9988708261854247, "learning_rate": 1.8521933581840782e-05, "loss": 0.7841, "step": 1043 }, { "epoch": 0.2002925729633804, "grad_norm": 0.9860442050085725, "learning_rate": 1.8518680173564608e-05, "loss": 0.8546, "step": 1044 }, { "epoch": 0.20048442408690856, "grad_norm": 0.885759447104051, "learning_rate": 1.8515423475032166e-05, "loss": 0.842, "step": 1045 }, { "epoch": 0.2006762752104367, "grad_norm": 1.20145933726807, "learning_rate": 1.8512163487501323e-05, "loss": 0.8126, "step": 1046 }, { "epoch": 0.20086812633396484, "grad_norm": 1.0404994332453357, "learning_rate": 1.8508900212231225e-05, "loss": 0.8534, "step": 1047 }, { "epoch": 0.201059977457493, "grad_norm": 1.033538510283685, "learning_rate": 1.850563365048227e-05, "loss": 0.906, "step": 1048 }, { "epoch": 0.2012518285810211, "grad_norm": 1.0936244383236222, "learning_rate": 1.8502363803516145e-05, "loss": 0.8087, "step": 1049 }, { "epoch": 0.20144367970454927, "grad_norm": 1.049133624015507, "learning_rate": 1.8499090672595785e-05, "loss": 0.8465, "step": 1050 }, { "epoch": 0.20163553082807742, "grad_norm": 0.699411185471206, "learning_rate": 1.8495814258985414e-05, "loss": 0.3376, "step": 1051 }, { "epoch": 0.20182738195160554, "grad_norm": 0.937221380938905, "learning_rate": 1.849253456395051e-05, "loss": 0.8848, "step": 1052 }, { "epoch": 0.2020192330751337, "grad_norm": 1.1521347438300586, "learning_rate": 1.8489251588757825e-05, "loss": 0.8272, "step": 1053 }, { "epoch": 0.20221108419866185, "grad_norm": 1.058277312552453, "learning_rate": 1.8485965334675372e-05, "loss": 0.8247, "step": 1054 }, { "epoch": 0.20240293532218998, "grad_norm": 0.93277878909176, "learning_rate": 1.8482675802972438e-05, "loss": 0.8219, "step": 1055 }, { "epoch": 0.20259478644571813, "grad_norm": 0.9521578839658791, "learning_rate": 1.8479382994919572e-05, "loss": 0.8016, "step": 1056 }, { "epoch": 0.20278663756924625, "grad_norm": 0.9255928493883685, "learning_rate": 1.8476086911788588e-05, "loss": 0.8455, "step": 1057 }, { "epoch": 0.2029784886927744, "grad_norm": 0.9226160385676792, "learning_rate": 1.8472787554852566e-05, "loss": 0.9204, "step": 1058 }, { "epoch": 0.20317033981630256, "grad_norm": 1.212964524879874, "learning_rate": 1.8469484925385846e-05, "loss": 0.7923, "step": 1059 }, { "epoch": 0.20336219093983068, "grad_norm": 0.983446100693176, "learning_rate": 1.8466179024664043e-05, "loss": 0.8899, "step": 1060 }, { "epoch": 0.20355404206335884, "grad_norm": 0.866584562402715, "learning_rate": 1.8462869853964022e-05, "loss": 0.8528, "step": 1061 }, { "epoch": 0.203745893186887, "grad_norm": 0.98305793449149, "learning_rate": 1.8459557414563926e-05, "loss": 0.9042, "step": 1062 }, { "epoch": 0.20393774431041511, "grad_norm": 1.0730907839589052, "learning_rate": 1.8456241707743143e-05, "loss": 0.8807, "step": 1063 }, { "epoch": 0.20412959543394327, "grad_norm": 0.9951743545712692, "learning_rate": 1.8452922734782336e-05, "loss": 0.8654, "step": 1064 }, { "epoch": 0.2043214465574714, "grad_norm": 1.0184664547997093, "learning_rate": 1.8449600496963426e-05, "loss": 0.9199, "step": 1065 }, { "epoch": 0.20451329768099955, "grad_norm": 0.9693423355427335, "learning_rate": 1.8446274995569596e-05, "loss": 0.8855, "step": 1066 }, { "epoch": 0.2047051488045277, "grad_norm": 0.8666010216839564, "learning_rate": 1.8442946231885283e-05, "loss": 0.8615, "step": 1067 }, { "epoch": 0.20489699992805582, "grad_norm": 0.6832862691591437, "learning_rate": 1.8439614207196192e-05, "loss": 0.3365, "step": 1068 }, { "epoch": 0.20508885105158398, "grad_norm": 1.011718152596057, "learning_rate": 1.843627892278929e-05, "loss": 0.8951, "step": 1069 }, { "epoch": 0.2052807021751121, "grad_norm": 0.9202066509911291, "learning_rate": 1.8432940379952787e-05, "loss": 0.8383, "step": 1070 }, { "epoch": 0.20547255329864025, "grad_norm": 0.8719404115691543, "learning_rate": 1.842959857997617e-05, "loss": 0.8628, "step": 1071 }, { "epoch": 0.2056644044221684, "grad_norm": 1.0336511982472596, "learning_rate": 1.8426253524150176e-05, "loss": 0.8048, "step": 1072 }, { "epoch": 0.20585625554569653, "grad_norm": 1.0029483981779983, "learning_rate": 1.84229052137668e-05, "loss": 0.7973, "step": 1073 }, { "epoch": 0.20604810666922468, "grad_norm": 1.047558868536764, "learning_rate": 1.841955365011929e-05, "loss": 0.8301, "step": 1074 }, { "epoch": 0.20623995779275284, "grad_norm": 1.0587418212116833, "learning_rate": 1.8416198834502155e-05, "loss": 0.8118, "step": 1075 }, { "epoch": 0.20643180891628096, "grad_norm": 0.9404847496136095, "learning_rate": 1.8412840768211162e-05, "loss": 0.868, "step": 1076 }, { "epoch": 0.20662366003980911, "grad_norm": 0.9813673610477712, "learning_rate": 1.840947945254333e-05, "loss": 0.8493, "step": 1077 }, { "epoch": 0.20681551116333724, "grad_norm": 0.9952711455050725, "learning_rate": 1.8406114888796938e-05, "loss": 0.8014, "step": 1078 }, { "epoch": 0.2070073622868654, "grad_norm": 0.993096504093702, "learning_rate": 1.8402747078271507e-05, "loss": 0.9046, "step": 1079 }, { "epoch": 0.20719921341039355, "grad_norm": 0.8687260735522269, "learning_rate": 1.839937602226783e-05, "loss": 0.9234, "step": 1080 }, { "epoch": 0.20739106453392167, "grad_norm": 1.0696675569321066, "learning_rate": 1.8396001722087935e-05, "loss": 0.8348, "step": 1081 }, { "epoch": 0.20758291565744982, "grad_norm": 1.286979732236771, "learning_rate": 1.8392624179035118e-05, "loss": 0.8422, "step": 1082 }, { "epoch": 0.20777476678097795, "grad_norm": 1.0400437860910798, "learning_rate": 1.838924339441392e-05, "loss": 0.8356, "step": 1083 }, { "epoch": 0.2079666179045061, "grad_norm": 0.955803684608253, "learning_rate": 1.838585936953014e-05, "loss": 0.8628, "step": 1084 }, { "epoch": 0.20815846902803425, "grad_norm": 0.6532840571690776, "learning_rate": 1.8382472105690815e-05, "loss": 0.3199, "step": 1085 }, { "epoch": 0.20835032015156238, "grad_norm": 1.0948362721146336, "learning_rate": 1.8379081604204247e-05, "loss": 0.8513, "step": 1086 }, { "epoch": 0.20854217127509053, "grad_norm": 0.8561145038879577, "learning_rate": 1.8375687866379988e-05, "loss": 0.8303, "step": 1087 }, { "epoch": 0.20873402239861868, "grad_norm": 1.0101196243885247, "learning_rate": 1.837229089352883e-05, "loss": 0.7575, "step": 1088 }, { "epoch": 0.2089258735221468, "grad_norm": 1.0553899074548296, "learning_rate": 1.8368890686962828e-05, "loss": 0.9032, "step": 1089 }, { "epoch": 0.20911772464567496, "grad_norm": 0.9460749297950166, "learning_rate": 1.836548724799527e-05, "loss": 0.9004, "step": 1090 }, { "epoch": 0.2093095757692031, "grad_norm": 1.062847856837082, "learning_rate": 1.83620805779407e-05, "loss": 0.8837, "step": 1091 }, { "epoch": 0.20950142689273124, "grad_norm": 1.0783994669071224, "learning_rate": 1.8358670678114915e-05, "loss": 0.8252, "step": 1092 }, { "epoch": 0.2096932780162594, "grad_norm": 0.9872638793861537, "learning_rate": 1.835525754983496e-05, "loss": 0.8686, "step": 1093 }, { "epoch": 0.20988512913978752, "grad_norm": 0.9597354348451395, "learning_rate": 1.835184119441911e-05, "loss": 0.9116, "step": 1094 }, { "epoch": 0.21007698026331567, "grad_norm": 1.053892327650084, "learning_rate": 1.834842161318691e-05, "loss": 0.9347, "step": 1095 }, { "epoch": 0.2102688313868438, "grad_norm": 0.9088567819999884, "learning_rate": 1.8344998807459133e-05, "loss": 0.7902, "step": 1096 }, { "epoch": 0.21046068251037195, "grad_norm": 0.9247524033369138, "learning_rate": 1.834157277855781e-05, "loss": 0.8506, "step": 1097 }, { "epoch": 0.2106525336339001, "grad_norm": 0.951949873710287, "learning_rate": 1.8338143527806203e-05, "loss": 0.8085, "step": 1098 }, { "epoch": 0.21084438475742823, "grad_norm": 0.8430692278079066, "learning_rate": 1.833471105652883e-05, "loss": 0.8318, "step": 1099 }, { "epoch": 0.21103623588095638, "grad_norm": 1.1405938366651243, "learning_rate": 1.8331275366051448e-05, "loss": 0.8529, "step": 1100 }, { "epoch": 0.21122808700448453, "grad_norm": 1.0214009110421742, "learning_rate": 1.832783645770106e-05, "loss": 0.8789, "step": 1101 }, { "epoch": 0.21141993812801266, "grad_norm": 1.0740018610910536, "learning_rate": 1.8324394332805913e-05, "loss": 0.7744, "step": 1102 }, { "epoch": 0.2116117892515408, "grad_norm": 1.0840246550971573, "learning_rate": 1.832094899269549e-05, "loss": 0.7956, "step": 1103 }, { "epoch": 0.21180364037506894, "grad_norm": 0.9456411450293118, "learning_rate": 1.831750043870052e-05, "loss": 0.7478, "step": 1104 }, { "epoch": 0.2119954914985971, "grad_norm": 0.9325496455123728, "learning_rate": 1.8314048672152976e-05, "loss": 0.8785, "step": 1105 }, { "epoch": 0.21218734262212524, "grad_norm": 1.0381884347197243, "learning_rate": 1.831059369438606e-05, "loss": 0.8278, "step": 1106 }, { "epoch": 0.21237919374565337, "grad_norm": 0.957493270080628, "learning_rate": 1.8307135506734237e-05, "loss": 0.9346, "step": 1107 }, { "epoch": 0.21257104486918152, "grad_norm": 0.94769801472417, "learning_rate": 1.830367411053319e-05, "loss": 0.8768, "step": 1108 }, { "epoch": 0.21276289599270964, "grad_norm": 1.0939502707529278, "learning_rate": 1.8300209507119845e-05, "loss": 0.9566, "step": 1109 }, { "epoch": 0.2129547471162378, "grad_norm": 1.0553099634341103, "learning_rate": 1.8296741697832382e-05, "loss": 0.8675, "step": 1110 }, { "epoch": 0.21314659823976595, "grad_norm": 1.0281357357679695, "learning_rate": 1.82932706840102e-05, "loss": 0.8303, "step": 1111 }, { "epoch": 0.21333844936329407, "grad_norm": 0.9654295156983638, "learning_rate": 1.828979646699394e-05, "loss": 0.7874, "step": 1112 }, { "epoch": 0.21353030048682223, "grad_norm": 1.0568462782773005, "learning_rate": 1.8286319048125495e-05, "loss": 0.8529, "step": 1113 }, { "epoch": 0.21372215161035038, "grad_norm": 0.984110738334508, "learning_rate": 1.828283842874798e-05, "loss": 0.8567, "step": 1114 }, { "epoch": 0.2139140027338785, "grad_norm": 0.9935115533124711, "learning_rate": 1.8279354610205744e-05, "loss": 0.8004, "step": 1115 }, { "epoch": 0.21410585385740666, "grad_norm": 0.9239972611349369, "learning_rate": 1.8275867593844386e-05, "loss": 0.8483, "step": 1116 }, { "epoch": 0.21429770498093478, "grad_norm": 1.0063748699785549, "learning_rate": 1.8272377381010726e-05, "loss": 0.8097, "step": 1117 }, { "epoch": 0.21448955610446294, "grad_norm": 0.8792003365932471, "learning_rate": 1.8268883973052826e-05, "loss": 0.7627, "step": 1118 }, { "epoch": 0.2146814072279911, "grad_norm": 0.9449758942582361, "learning_rate": 1.8265387371319983e-05, "loss": 0.8729, "step": 1119 }, { "epoch": 0.21487325835151921, "grad_norm": 0.9279414744020809, "learning_rate": 1.8261887577162717e-05, "loss": 0.86, "step": 1120 }, { "epoch": 0.21506510947504737, "grad_norm": 1.0011981228745415, "learning_rate": 1.82583845919328e-05, "loss": 0.9529, "step": 1121 }, { "epoch": 0.2152569605985755, "grad_norm": 0.7431497601466168, "learning_rate": 1.8254878416983217e-05, "loss": 0.3293, "step": 1122 }, { "epoch": 0.21544881172210364, "grad_norm": 0.9819187129967373, "learning_rate": 1.82513690536682e-05, "loss": 0.9191, "step": 1123 }, { "epoch": 0.2156406628456318, "grad_norm": 0.9755896342233258, "learning_rate": 1.8247856503343198e-05, "loss": 0.8489, "step": 1124 }, { "epoch": 0.21583251396915992, "grad_norm": 0.8604515687229202, "learning_rate": 1.8244340767364908e-05, "loss": 0.8632, "step": 1125 }, { "epoch": 0.21602436509268808, "grad_norm": 0.8834391526527703, "learning_rate": 1.8240821847091237e-05, "loss": 0.8258, "step": 1126 }, { "epoch": 0.21621621621621623, "grad_norm": 1.0147563860822777, "learning_rate": 1.823729974388135e-05, "loss": 0.8368, "step": 1127 }, { "epoch": 0.21640806733974435, "grad_norm": 0.9309150194137524, "learning_rate": 1.823377445909561e-05, "loss": 0.9273, "step": 1128 }, { "epoch": 0.2165999184632725, "grad_norm": 0.8906696541137533, "learning_rate": 1.8230245994095624e-05, "loss": 0.8152, "step": 1129 }, { "epoch": 0.21679176958680063, "grad_norm": 0.8600167480813835, "learning_rate": 1.8226714350244235e-05, "loss": 0.7684, "step": 1130 }, { "epoch": 0.21698362071032878, "grad_norm": 0.9533825567723881, "learning_rate": 1.82231795289055e-05, "loss": 0.8234, "step": 1131 }, { "epoch": 0.21717547183385694, "grad_norm": 0.8996127663690817, "learning_rate": 1.8219641531444713e-05, "loss": 0.8715, "step": 1132 }, { "epoch": 0.21736732295738506, "grad_norm": 0.9620513686971373, "learning_rate": 1.8216100359228387e-05, "loss": 0.837, "step": 1133 }, { "epoch": 0.21755917408091321, "grad_norm": 1.09887768483636, "learning_rate": 1.821255601362427e-05, "loss": 0.7879, "step": 1134 }, { "epoch": 0.21775102520444134, "grad_norm": 0.700588370652876, "learning_rate": 1.820900849600132e-05, "loss": 0.3384, "step": 1135 }, { "epoch": 0.2179428763279695, "grad_norm": 0.9391808038279654, "learning_rate": 1.8205457807729744e-05, "loss": 0.806, "step": 1136 }, { "epoch": 0.21813472745149765, "grad_norm": 1.0462386221281181, "learning_rate": 1.820190395018095e-05, "loss": 0.9058, "step": 1137 }, { "epoch": 0.21832657857502577, "grad_norm": 0.987818668672192, "learning_rate": 1.8198346924727587e-05, "loss": 0.8049, "step": 1138 }, { "epoch": 0.21851842969855392, "grad_norm": 1.037475552950344, "learning_rate": 1.8194786732743516e-05, "loss": 0.7626, "step": 1139 }, { "epoch": 0.21871028082208208, "grad_norm": 1.168128433603557, "learning_rate": 1.8191223375603827e-05, "loss": 0.7966, "step": 1140 }, { "epoch": 0.2189021319456102, "grad_norm": 0.9101142991468723, "learning_rate": 1.8187656854684838e-05, "loss": 0.7863, "step": 1141 }, { "epoch": 0.21909398306913835, "grad_norm": 0.9053923468832186, "learning_rate": 1.8184087171364074e-05, "loss": 0.8349, "step": 1142 }, { "epoch": 0.21928583419266648, "grad_norm": 0.6550389456808744, "learning_rate": 1.8180514327020293e-05, "loss": 0.3407, "step": 1143 }, { "epoch": 0.21947768531619463, "grad_norm": 1.236999101954372, "learning_rate": 1.817693832303347e-05, "loss": 0.7547, "step": 1144 }, { "epoch": 0.21966953643972278, "grad_norm": 0.9058843927449503, "learning_rate": 1.8173359160784806e-05, "loss": 0.78, "step": 1145 }, { "epoch": 0.2198613875632509, "grad_norm": 1.0178628229807938, "learning_rate": 1.8169776841656715e-05, "loss": 0.7892, "step": 1146 }, { "epoch": 0.22005323868677906, "grad_norm": 0.9372085377284595, "learning_rate": 1.8166191367032828e-05, "loss": 0.8596, "step": 1147 }, { "epoch": 0.22024508981030722, "grad_norm": 0.936370511640048, "learning_rate": 1.8162602738298006e-05, "loss": 0.8765, "step": 1148 }, { "epoch": 0.22043694093383534, "grad_norm": 0.8856214266235881, "learning_rate": 1.8159010956838315e-05, "loss": 0.8288, "step": 1149 }, { "epoch": 0.2206287920573635, "grad_norm": 1.0107153369062576, "learning_rate": 1.815541602404105e-05, "loss": 0.8585, "step": 1150 }, { "epoch": 0.22082064318089162, "grad_norm": 0.9361899655896608, "learning_rate": 1.8151817941294715e-05, "loss": 0.8065, "step": 1151 }, { "epoch": 0.22101249430441977, "grad_norm": 1.1527948687757534, "learning_rate": 1.8148216709989034e-05, "loss": 0.8615, "step": 1152 }, { "epoch": 0.22120434542794792, "grad_norm": 1.0138852717332987, "learning_rate": 1.8144612331514952e-05, "loss": 0.8414, "step": 1153 }, { "epoch": 0.22139619655147605, "grad_norm": 0.8839398839848258, "learning_rate": 1.814100480726462e-05, "loss": 0.8129, "step": 1154 }, { "epoch": 0.2215880476750042, "grad_norm": 0.9719594149470215, "learning_rate": 1.8137394138631413e-05, "loss": 0.8072, "step": 1155 }, { "epoch": 0.22177989879853233, "grad_norm": 0.9366863599260402, "learning_rate": 1.813378032700991e-05, "loss": 0.759, "step": 1156 }, { "epoch": 0.22197174992206048, "grad_norm": 1.0742544820871147, "learning_rate": 1.8130163373795918e-05, "loss": 0.8098, "step": 1157 }, { "epoch": 0.22216360104558863, "grad_norm": 0.8567488295107807, "learning_rate": 1.812654328038644e-05, "loss": 0.3341, "step": 1158 }, { "epoch": 0.22235545216911676, "grad_norm": 1.1361073040426457, "learning_rate": 1.8122920048179705e-05, "loss": 0.8379, "step": 1159 }, { "epoch": 0.2225473032926449, "grad_norm": 1.1878040575834097, "learning_rate": 1.8119293678575155e-05, "loss": 0.7971, "step": 1160 }, { "epoch": 0.22273915441617306, "grad_norm": 0.8807382833227798, "learning_rate": 1.8115664172973437e-05, "loss": 0.8734, "step": 1161 }, { "epoch": 0.2229310055397012, "grad_norm": 0.9485347919903245, "learning_rate": 1.811203153277641e-05, "loss": 0.852, "step": 1162 }, { "epoch": 0.22312285666322934, "grad_norm": 0.9522367240980859, "learning_rate": 1.810839575938715e-05, "loss": 0.8903, "step": 1163 }, { "epoch": 0.22331470778675747, "grad_norm": 0.8888128788409757, "learning_rate": 1.810475685420993e-05, "loss": 0.807, "step": 1164 }, { "epoch": 0.22350655891028562, "grad_norm": 0.9866492668338288, "learning_rate": 1.8101114818650252e-05, "loss": 0.8192, "step": 1165 }, { "epoch": 0.22369841003381377, "grad_norm": 0.9562782033334595, "learning_rate": 1.8097469654114806e-05, "loss": 0.8715, "step": 1166 }, { "epoch": 0.2238902611573419, "grad_norm": 0.9551869837034348, "learning_rate": 1.809382136201151e-05, "loss": 0.8565, "step": 1167 }, { "epoch": 0.22408211228087005, "grad_norm": 0.8804730203832379, "learning_rate": 1.8090169943749477e-05, "loss": 0.827, "step": 1168 }, { "epoch": 0.22427396340439817, "grad_norm": 1.100169929876388, "learning_rate": 1.8086515400739027e-05, "loss": 0.8553, "step": 1169 }, { "epoch": 0.22446581452792633, "grad_norm": 0.9884452191156432, "learning_rate": 1.80828577343917e-05, "loss": 0.8675, "step": 1170 }, { "epoch": 0.22465766565145448, "grad_norm": 0.679559731852617, "learning_rate": 1.8079196946120227e-05, "loss": 0.3415, "step": 1171 }, { "epoch": 0.2248495167749826, "grad_norm": 0.7238121251053861, "learning_rate": 1.8075533037338554e-05, "loss": 0.3418, "step": 1172 }, { "epoch": 0.22504136789851076, "grad_norm": 1.081826411984708, "learning_rate": 1.8071866009461828e-05, "loss": 0.7824, "step": 1173 }, { "epoch": 0.2252332190220389, "grad_norm": 1.0438950692987266, "learning_rate": 1.80681958639064e-05, "loss": 0.7858, "step": 1174 }, { "epoch": 0.22542507014556704, "grad_norm": 1.019300000348257, "learning_rate": 1.8064522602089836e-05, "loss": 0.854, "step": 1175 }, { "epoch": 0.2256169212690952, "grad_norm": 0.9967268861909538, "learning_rate": 1.806084622543089e-05, "loss": 0.8078, "step": 1176 }, { "epoch": 0.2258087723926233, "grad_norm": 1.147775721826465, "learning_rate": 1.8057166735349533e-05, "loss": 0.8542, "step": 1177 }, { "epoch": 0.22600062351615147, "grad_norm": 0.88045681132894, "learning_rate": 1.805348413326692e-05, "loss": 0.8357, "step": 1178 }, { "epoch": 0.22619247463967962, "grad_norm": 0.9348239615812476, "learning_rate": 1.804979842060543e-05, "loss": 0.8285, "step": 1179 }, { "epoch": 0.22638432576320774, "grad_norm": 1.0644548017988207, "learning_rate": 1.8046109598788632e-05, "loss": 0.8543, "step": 1180 }, { "epoch": 0.2265761768867359, "grad_norm": 0.9638285142129052, "learning_rate": 1.8042417669241296e-05, "loss": 0.8315, "step": 1181 }, { "epoch": 0.22676802801026402, "grad_norm": 0.8813068135392144, "learning_rate": 1.803872263338939e-05, "loss": 0.7932, "step": 1182 }, { "epoch": 0.22695987913379218, "grad_norm": 1.0230282173318905, "learning_rate": 1.8035024492660086e-05, "loss": 0.8384, "step": 1183 }, { "epoch": 0.22715173025732033, "grad_norm": 0.8526950742323578, "learning_rate": 1.8031323248481758e-05, "loss": 0.85, "step": 1184 }, { "epoch": 0.22734358138084845, "grad_norm": 0.9218179871280444, "learning_rate": 1.802761890228397e-05, "loss": 0.8866, "step": 1185 }, { "epoch": 0.2275354325043766, "grad_norm": 0.9393396098076607, "learning_rate": 1.80239114554975e-05, "loss": 0.7833, "step": 1186 }, { "epoch": 0.22772728362790476, "grad_norm": 0.8282915018120786, "learning_rate": 1.80202009095543e-05, "loss": 0.8794, "step": 1187 }, { "epoch": 0.22791913475143288, "grad_norm": 1.1260800955814783, "learning_rate": 1.8016487265887536e-05, "loss": 0.8854, "step": 1188 }, { "epoch": 0.22811098587496104, "grad_norm": 0.9204963879671411, "learning_rate": 1.8012770525931572e-05, "loss": 0.7457, "step": 1189 }, { "epoch": 0.22830283699848916, "grad_norm": 1.0397428286674903, "learning_rate": 1.8009050691121954e-05, "loss": 0.8153, "step": 1190 }, { "epoch": 0.22849468812201731, "grad_norm": 1.0084548998697052, "learning_rate": 1.8005327762895436e-05, "loss": 0.8417, "step": 1191 }, { "epoch": 0.22868653924554547, "grad_norm": 0.977923967265507, "learning_rate": 1.800160174268996e-05, "loss": 0.8492, "step": 1192 }, { "epoch": 0.2288783903690736, "grad_norm": 0.9956674645938409, "learning_rate": 1.7997872631944667e-05, "loss": 0.8044, "step": 1193 }, { "epoch": 0.22907024149260174, "grad_norm": 1.0827992163100921, "learning_rate": 1.799414043209989e-05, "loss": 0.9355, "step": 1194 }, { "epoch": 0.22926209261612987, "grad_norm": 0.9562921850680146, "learning_rate": 1.799040514459715e-05, "loss": 0.8549, "step": 1195 }, { "epoch": 0.22945394373965802, "grad_norm": 1.0663952336839557, "learning_rate": 1.7986666770879168e-05, "loss": 0.8271, "step": 1196 }, { "epoch": 0.22964579486318618, "grad_norm": 0.968127445412265, "learning_rate": 1.7982925312389856e-05, "loss": 0.8101, "step": 1197 }, { "epoch": 0.2298376459867143, "grad_norm": 1.0013773289040278, "learning_rate": 1.7979180770574313e-05, "loss": 0.8112, "step": 1198 }, { "epoch": 0.23002949711024245, "grad_norm": 0.9134113704728177, "learning_rate": 1.7975433146878828e-05, "loss": 0.8599, "step": 1199 }, { "epoch": 0.2302213482337706, "grad_norm": 0.8079099782107642, "learning_rate": 1.7971682442750888e-05, "loss": 0.3342, "step": 1200 }, { "epoch": 0.23041319935729873, "grad_norm": 1.009267889411519, "learning_rate": 1.7967928659639167e-05, "loss": 0.8801, "step": 1201 }, { "epoch": 0.23060505048082688, "grad_norm": 1.0405695730488531, "learning_rate": 1.7964171798993525e-05, "loss": 0.8189, "step": 1202 }, { "epoch": 0.230796901604355, "grad_norm": 1.0368375518865662, "learning_rate": 1.7960411862265007e-05, "loss": 0.7621, "step": 1203 }, { "epoch": 0.23098875272788316, "grad_norm": 1.018591369972708, "learning_rate": 1.795664885090586e-05, "loss": 0.9022, "step": 1204 }, { "epoch": 0.23118060385141131, "grad_norm": 1.2944042065173107, "learning_rate": 1.7952882766369508e-05, "loss": 0.753, "step": 1205 }, { "epoch": 0.23137245497493944, "grad_norm": 1.138443624400879, "learning_rate": 1.794911361011056e-05, "loss": 0.8241, "step": 1206 }, { "epoch": 0.2315643060984676, "grad_norm": 0.8530536030723568, "learning_rate": 1.7945341383584818e-05, "loss": 0.838, "step": 1207 }, { "epoch": 0.23175615722199572, "grad_norm": 1.0183654058269853, "learning_rate": 1.794156608824927e-05, "loss": 0.8274, "step": 1208 }, { "epoch": 0.23194800834552387, "grad_norm": 0.8931832961773531, "learning_rate": 1.793778772556208e-05, "loss": 0.8655, "step": 1209 }, { "epoch": 0.23213985946905202, "grad_norm": 1.0974876030279648, "learning_rate": 1.793400629698261e-05, "loss": 0.8214, "step": 1210 }, { "epoch": 0.23233171059258015, "grad_norm": 0.9032037694365653, "learning_rate": 1.7930221803971395e-05, "loss": 0.829, "step": 1211 }, { "epoch": 0.2325235617161083, "grad_norm": 1.1525216778087435, "learning_rate": 1.792643424799016e-05, "loss": 0.8463, "step": 1212 }, { "epoch": 0.23271541283963645, "grad_norm": 0.9919072332168437, "learning_rate": 1.792264363050181e-05, "loss": 0.8105, "step": 1213 }, { "epoch": 0.23290726396316458, "grad_norm": 0.8845591041578695, "learning_rate": 1.791884995297044e-05, "loss": 0.8416, "step": 1214 }, { "epoch": 0.23309911508669273, "grad_norm": 0.7031742249199783, "learning_rate": 1.7915053216861307e-05, "loss": 0.3351, "step": 1215 }, { "epoch": 0.23329096621022086, "grad_norm": 1.1606038610216833, "learning_rate": 1.7911253423640876e-05, "loss": 0.7793, "step": 1216 }, { "epoch": 0.233482817333749, "grad_norm": 1.0519159950554597, "learning_rate": 1.7907450574776776e-05, "loss": 0.8561, "step": 1217 }, { "epoch": 0.23367466845727716, "grad_norm": 0.9192333446232672, "learning_rate": 1.7903644671737818e-05, "loss": 0.8555, "step": 1218 }, { "epoch": 0.2338665195808053, "grad_norm": 0.9822704783379911, "learning_rate": 1.7899835715993993e-05, "loss": 0.8453, "step": 1219 }, { "epoch": 0.23405837070433344, "grad_norm": 0.6317929241749435, "learning_rate": 1.7896023709016476e-05, "loss": 0.2971, "step": 1220 }, { "epoch": 0.23425022182786157, "grad_norm": 1.1705492652695857, "learning_rate": 1.789220865227762e-05, "loss": 0.9152, "step": 1221 }, { "epoch": 0.23444207295138972, "grad_norm": 0.9594004571701524, "learning_rate": 1.7888390547250944e-05, "loss": 0.8779, "step": 1222 }, { "epoch": 0.23463392407491787, "grad_norm": 0.9338334216100931, "learning_rate": 1.7884569395411158e-05, "loss": 0.838, "step": 1223 }, { "epoch": 0.234825775198446, "grad_norm": 1.056469037334628, "learning_rate": 1.7880745198234146e-05, "loss": 0.8358, "step": 1224 }, { "epoch": 0.23501762632197415, "grad_norm": 0.8715372751951921, "learning_rate": 1.7876917957196966e-05, "loss": 0.8056, "step": 1225 }, { "epoch": 0.2352094774455023, "grad_norm": 1.052114265295728, "learning_rate": 1.787308767377785e-05, "loss": 0.8306, "step": 1226 }, { "epoch": 0.23540132856903043, "grad_norm": 0.9644677244109211, "learning_rate": 1.786925434945621e-05, "loss": 0.8443, "step": 1227 }, { "epoch": 0.23559317969255858, "grad_norm": 0.8992256987722074, "learning_rate": 1.786541798571263e-05, "loss": 0.8343, "step": 1228 }, { "epoch": 0.2357850308160867, "grad_norm": 0.9733404639439774, "learning_rate": 1.7861578584028863e-05, "loss": 0.907, "step": 1229 }, { "epoch": 0.23597688193961486, "grad_norm": 0.9382922257853881, "learning_rate": 1.7857736145887844e-05, "loss": 0.8418, "step": 1230 }, { "epoch": 0.236168733063143, "grad_norm": 0.9042238492974061, "learning_rate": 1.7853890672773677e-05, "loss": 0.8095, "step": 1231 }, { "epoch": 0.23636058418667114, "grad_norm": 1.044223372683393, "learning_rate": 1.7850042166171637e-05, "loss": 0.7893, "step": 1232 }, { "epoch": 0.2365524353101993, "grad_norm": 0.8878816976836736, "learning_rate": 1.784619062756817e-05, "loss": 0.8185, "step": 1233 }, { "epoch": 0.23674428643372744, "grad_norm": 0.9044900134917526, "learning_rate": 1.7842336058450896e-05, "loss": 0.8498, "step": 1234 }, { "epoch": 0.23693613755725557, "grad_norm": 0.9797790914613652, "learning_rate": 1.783847846030861e-05, "loss": 0.859, "step": 1235 }, { "epoch": 0.23712798868078372, "grad_norm": 0.8413181052104647, "learning_rate": 1.783461783463126e-05, "loss": 0.8253, "step": 1236 }, { "epoch": 0.23731983980431184, "grad_norm": 0.8682185971567347, "learning_rate": 1.7830754182909985e-05, "loss": 0.8136, "step": 1237 }, { "epoch": 0.23751169092784, "grad_norm": 1.0530162387018722, "learning_rate": 1.7826887506637075e-05, "loss": 0.7557, "step": 1238 }, { "epoch": 0.23770354205136815, "grad_norm": 0.9185138768894286, "learning_rate": 1.7823017807306003e-05, "loss": 0.8313, "step": 1239 }, { "epoch": 0.23789539317489627, "grad_norm": 0.9163520392398489, "learning_rate": 1.7819145086411394e-05, "loss": 0.8661, "step": 1240 }, { "epoch": 0.23808724429842443, "grad_norm": 0.9533648094397468, "learning_rate": 1.7815269345449056e-05, "loss": 0.7832, "step": 1241 }, { "epoch": 0.23827909542195255, "grad_norm": 0.8105846698535564, "learning_rate": 1.781139058591595e-05, "loss": 0.3401, "step": 1242 }, { "epoch": 0.2384709465454807, "grad_norm": 0.8964154840993139, "learning_rate": 1.7807508809310213e-05, "loss": 0.8324, "step": 1243 }, { "epoch": 0.23866279766900886, "grad_norm": 0.9525247293673481, "learning_rate": 1.780362401713114e-05, "loss": 0.8812, "step": 1244 }, { "epoch": 0.23885464879253698, "grad_norm": 0.9028432490701536, "learning_rate": 1.779973621087919e-05, "loss": 0.8609, "step": 1245 }, { "epoch": 0.23904649991606514, "grad_norm": 1.1646135036533032, "learning_rate": 1.7795845392055996e-05, "loss": 0.8114, "step": 1246 }, { "epoch": 0.2392383510395933, "grad_norm": 0.6753025939086377, "learning_rate": 1.779195156216435e-05, "loss": 0.3181, "step": 1247 }, { "epoch": 0.23943020216312141, "grad_norm": 0.9926250634300243, "learning_rate": 1.77880547227082e-05, "loss": 0.7965, "step": 1248 }, { "epoch": 0.23962205328664957, "grad_norm": 0.8745398746309847, "learning_rate": 1.7784154875192665e-05, "loss": 0.8281, "step": 1249 }, { "epoch": 0.2398139044101777, "grad_norm": 1.0186023746576083, "learning_rate": 1.778025202112402e-05, "loss": 0.817, "step": 1250 }, { "epoch": 0.24000575553370584, "grad_norm": 1.0531631177872434, "learning_rate": 1.777634616200971e-05, "loss": 0.8186, "step": 1251 }, { "epoch": 0.240197606657234, "grad_norm": 1.1286237447462284, "learning_rate": 1.7772437299358324e-05, "loss": 0.8423, "step": 1252 }, { "epoch": 0.24038945778076212, "grad_norm": 0.9059039191657495, "learning_rate": 1.7768525434679632e-05, "loss": 0.8687, "step": 1253 }, { "epoch": 0.24058130890429028, "grad_norm": 0.9230045022394993, "learning_rate": 1.7764610569484547e-05, "loss": 0.761, "step": 1254 }, { "epoch": 0.2407731600278184, "grad_norm": 0.9574405459639874, "learning_rate": 1.776069270528515e-05, "loss": 0.8405, "step": 1255 }, { "epoch": 0.24096501115134655, "grad_norm": 0.7160557059707698, "learning_rate": 1.7756771843594674e-05, "loss": 0.3287, "step": 1256 }, { "epoch": 0.2411568622748747, "grad_norm": 0.9464674897194901, "learning_rate": 1.775284798592752e-05, "loss": 0.7822, "step": 1257 }, { "epoch": 0.24134871339840283, "grad_norm": 0.9255263772716076, "learning_rate": 1.774892113379923e-05, "loss": 0.8468, "step": 1258 }, { "epoch": 0.24154056452193098, "grad_norm": 1.0203742117024515, "learning_rate": 1.774499128872652e-05, "loss": 0.9123, "step": 1259 }, { "epoch": 0.24173241564545914, "grad_norm": 0.8493184242871302, "learning_rate": 1.7741058452227252e-05, "loss": 0.7499, "step": 1260 }, { "epoch": 0.24192426676898726, "grad_norm": 0.9841918937471741, "learning_rate": 1.7737122625820444e-05, "loss": 0.8428, "step": 1261 }, { "epoch": 0.24211611789251541, "grad_norm": 0.8192206760448135, "learning_rate": 1.7733183811026266e-05, "loss": 0.8309, "step": 1262 }, { "epoch": 0.24230796901604354, "grad_norm": 0.9758712604471754, "learning_rate": 1.7729242009366058e-05, "loss": 0.7725, "step": 1263 }, { "epoch": 0.2424998201395717, "grad_norm": 1.1076291623796242, "learning_rate": 1.772529722236229e-05, "loss": 0.8719, "step": 1264 }, { "epoch": 0.24269167126309985, "grad_norm": 0.9340235834696159, "learning_rate": 1.7721349451538604e-05, "loss": 0.8441, "step": 1265 }, { "epoch": 0.24288352238662797, "grad_norm": 0.6540203641250129, "learning_rate": 1.7717398698419782e-05, "loss": 0.2905, "step": 1266 }, { "epoch": 0.24307537351015612, "grad_norm": 0.9572852530116597, "learning_rate": 1.771344496453177e-05, "loss": 0.8628, "step": 1267 }, { "epoch": 0.24326722463368425, "grad_norm": 0.9807837280256235, "learning_rate": 1.770948825140166e-05, "loss": 0.8598, "step": 1268 }, { "epoch": 0.2434590757572124, "grad_norm": 1.1850787794872737, "learning_rate": 1.7705528560557682e-05, "loss": 0.9375, "step": 1269 }, { "epoch": 0.24365092688074055, "grad_norm": 1.0017125343481, "learning_rate": 1.7701565893529236e-05, "loss": 0.8446, "step": 1270 }, { "epoch": 0.24384277800426868, "grad_norm": 0.9790445421151409, "learning_rate": 1.7697600251846868e-05, "loss": 0.8254, "step": 1271 }, { "epoch": 0.24403462912779683, "grad_norm": 1.1217216099823923, "learning_rate": 1.7693631637042256e-05, "loss": 0.8483, "step": 1272 }, { "epoch": 0.24422648025132498, "grad_norm": 0.9350790681286946, "learning_rate": 1.7689660050648245e-05, "loss": 0.8412, "step": 1273 }, { "epoch": 0.2444183313748531, "grad_norm": 0.9493678594596762, "learning_rate": 1.768568549419882e-05, "loss": 0.8464, "step": 1274 }, { "epoch": 0.24461018249838126, "grad_norm": 1.016400265713324, "learning_rate": 1.7681707969229115e-05, "loss": 0.8497, "step": 1275 }, { "epoch": 0.2448020336219094, "grad_norm": 0.9956613018075066, "learning_rate": 1.7677727477275414e-05, "loss": 0.8449, "step": 1276 }, { "epoch": 0.24499388474543754, "grad_norm": 1.0026923553542209, "learning_rate": 1.767374401987513e-05, "loss": 0.8221, "step": 1277 }, { "epoch": 0.2451857358689657, "grad_norm": 1.0060527391294276, "learning_rate": 1.7669757598566848e-05, "loss": 0.7961, "step": 1278 }, { "epoch": 0.24537758699249382, "grad_norm": 0.9939465899301274, "learning_rate": 1.766576821489028e-05, "loss": 0.7936, "step": 1279 }, { "epoch": 0.24556943811602197, "grad_norm": 1.1086315922991021, "learning_rate": 1.766177587038628e-05, "loss": 0.8814, "step": 1280 }, { "epoch": 0.2457612892395501, "grad_norm": 1.0833848411834437, "learning_rate": 1.7657780566596856e-05, "loss": 0.7786, "step": 1281 }, { "epoch": 0.24595314036307825, "grad_norm": 0.9433614702642914, "learning_rate": 1.7653782305065158e-05, "loss": 0.809, "step": 1282 }, { "epoch": 0.2461449914866064, "grad_norm": 1.006732159432956, "learning_rate": 1.764978108733547e-05, "loss": 0.8485, "step": 1283 }, { "epoch": 0.24633684261013453, "grad_norm": 1.016060315428831, "learning_rate": 1.7645776914953227e-05, "loss": 0.7574, "step": 1284 }, { "epoch": 0.24652869373366268, "grad_norm": 0.9442779883153882, "learning_rate": 1.7641769789464996e-05, "loss": 0.7639, "step": 1285 }, { "epoch": 0.24672054485719083, "grad_norm": 1.1640547451691539, "learning_rate": 1.7637759712418497e-05, "loss": 0.8812, "step": 1286 }, { "epoch": 0.24691239598071896, "grad_norm": 0.8586137800414664, "learning_rate": 1.7633746685362573e-05, "loss": 0.8065, "step": 1287 }, { "epoch": 0.2471042471042471, "grad_norm": 0.9673659229758236, "learning_rate": 1.7629730709847227e-05, "loss": 0.8699, "step": 1288 }, { "epoch": 0.24729609822777524, "grad_norm": 0.951064822989284, "learning_rate": 1.7625711787423583e-05, "loss": 0.824, "step": 1289 }, { "epoch": 0.2474879493513034, "grad_norm": 1.158635745527694, "learning_rate": 1.762168991964391e-05, "loss": 0.7764, "step": 1290 }, { "epoch": 0.24767980047483154, "grad_norm": 0.9136058380488091, "learning_rate": 1.7617665108061617e-05, "loss": 0.8334, "step": 1291 }, { "epoch": 0.24787165159835967, "grad_norm": 0.7383417887459998, "learning_rate": 1.761363735423125e-05, "loss": 0.331, "step": 1292 }, { "epoch": 0.24806350272188782, "grad_norm": 0.9065026102816078, "learning_rate": 1.7609606659708482e-05, "loss": 0.8488, "step": 1293 }, { "epoch": 0.24825535384541594, "grad_norm": 0.9489169877555388, "learning_rate": 1.7605573026050138e-05, "loss": 0.751, "step": 1294 }, { "epoch": 0.2484472049689441, "grad_norm": 0.8868794058533002, "learning_rate": 1.7601536454814164e-05, "loss": 0.7905, "step": 1295 }, { "epoch": 0.24863905609247225, "grad_norm": 1.014634320777516, "learning_rate": 1.7597496947559645e-05, "loss": 0.8598, "step": 1296 }, { "epoch": 0.24883090721600037, "grad_norm": 1.0365407111858451, "learning_rate": 1.7593454505846807e-05, "loss": 0.777, "step": 1297 }, { "epoch": 0.24902275833952853, "grad_norm": 0.9515706575101429, "learning_rate": 1.7589409131236994e-05, "loss": 0.8025, "step": 1298 }, { "epoch": 0.24921460946305668, "grad_norm": 0.6282649976116996, "learning_rate": 1.75853608252927e-05, "loss": 0.3324, "step": 1299 }, { "epoch": 0.2494064605865848, "grad_norm": 0.9435927564605338, "learning_rate": 1.758130958957754e-05, "loss": 0.883, "step": 1300 }, { "epoch": 0.24959831171011296, "grad_norm": 1.193448314057325, "learning_rate": 1.757725542565626e-05, "loss": 0.8382, "step": 1301 }, { "epoch": 0.24979016283364108, "grad_norm": 1.0401688923332673, "learning_rate": 1.7573198335094753e-05, "loss": 0.8664, "step": 1302 }, { "epoch": 0.24998201395716924, "grad_norm": 0.9556172444168193, "learning_rate": 1.7569138319460016e-05, "loss": 0.88, "step": 1303 }, { "epoch": 0.2501738650806974, "grad_norm": 0.9279439234831469, "learning_rate": 1.75650753803202e-05, "loss": 0.8241, "step": 1304 }, { "epoch": 0.25036571620422554, "grad_norm": 0.9310122074567229, "learning_rate": 1.7561009519244568e-05, "loss": 0.8429, "step": 1305 }, { "epoch": 0.25055756732775364, "grad_norm": 0.9831804901937574, "learning_rate": 1.7556940737803523e-05, "loss": 0.8513, "step": 1306 }, { "epoch": 0.2507494184512818, "grad_norm": 0.8818173610054637, "learning_rate": 1.7552869037568592e-05, "loss": 0.8409, "step": 1307 }, { "epoch": 0.25094126957480994, "grad_norm": 1.0225250000070054, "learning_rate": 1.754879442011243e-05, "loss": 0.8459, "step": 1308 }, { "epoch": 0.2511331206983381, "grad_norm": 1.0802874251008738, "learning_rate": 1.754471688700881e-05, "loss": 0.8623, "step": 1309 }, { "epoch": 0.25132497182186625, "grad_norm": 0.8795036483656965, "learning_rate": 1.754063643983265e-05, "loss": 0.7766, "step": 1310 }, { "epoch": 0.25151682294539435, "grad_norm": 0.9878669469478885, "learning_rate": 1.7536553080159978e-05, "loss": 0.8562, "step": 1311 }, { "epoch": 0.2517086740689225, "grad_norm": 1.106319181096172, "learning_rate": 1.753246680956795e-05, "loss": 0.7967, "step": 1312 }, { "epoch": 0.25190052519245065, "grad_norm": 0.9414360098448206, "learning_rate": 1.7528377629634847e-05, "loss": 0.8766, "step": 1313 }, { "epoch": 0.2520923763159788, "grad_norm": 0.9229320768186036, "learning_rate": 1.7524285541940075e-05, "loss": 0.8485, "step": 1314 }, { "epoch": 0.25228422743950696, "grad_norm": 0.9193628784503964, "learning_rate": 1.7520190548064166e-05, "loss": 0.8603, "step": 1315 }, { "epoch": 0.2524760785630351, "grad_norm": 1.009860305329254, "learning_rate": 1.751609264958877e-05, "loss": 0.8106, "step": 1316 }, { "epoch": 0.2526679296865632, "grad_norm": 0.8883348923890543, "learning_rate": 1.7511991848096658e-05, "loss": 0.7702, "step": 1317 }, { "epoch": 0.25285978081009136, "grad_norm": 0.7182858426669789, "learning_rate": 1.750788814517172e-05, "loss": 0.307, "step": 1318 }, { "epoch": 0.2530516319336195, "grad_norm": 0.8894140594035685, "learning_rate": 1.750378154239898e-05, "loss": 0.8066, "step": 1319 }, { "epoch": 0.25324348305714767, "grad_norm": 0.649449798082214, "learning_rate": 1.7499672041364567e-05, "loss": 0.3169, "step": 1320 }, { "epoch": 0.2534353341806758, "grad_norm": 0.8285225415669357, "learning_rate": 1.7495559643655736e-05, "loss": 0.7411, "step": 1321 }, { "epoch": 0.2536271853042039, "grad_norm": 0.8997542655756666, "learning_rate": 1.749144435086086e-05, "loss": 0.799, "step": 1322 }, { "epoch": 0.25381903642773207, "grad_norm": 0.8450985765719915, "learning_rate": 1.7487326164569434e-05, "loss": 0.8488, "step": 1323 }, { "epoch": 0.2540108875512602, "grad_norm": 0.8872474677717753, "learning_rate": 1.7483205086372064e-05, "loss": 0.861, "step": 1324 }, { "epoch": 0.2542027386747884, "grad_norm": 0.9067163601750808, "learning_rate": 1.7479081117860475e-05, "loss": 0.8652, "step": 1325 }, { "epoch": 0.25439458979831653, "grad_norm": 0.9851063633668732, "learning_rate": 1.7474954260627507e-05, "loss": 0.8124, "step": 1326 }, { "epoch": 0.2545864409218446, "grad_norm": 0.8710071171570741, "learning_rate": 1.7470824516267125e-05, "loss": 0.8924, "step": 1327 }, { "epoch": 0.2547782920453728, "grad_norm": 0.9344477589163906, "learning_rate": 1.7466691886374394e-05, "loss": 0.8602, "step": 1328 }, { "epoch": 0.25497014316890093, "grad_norm": 1.186137897567125, "learning_rate": 1.7462556372545508e-05, "loss": 0.8391, "step": 1329 }, { "epoch": 0.2551619942924291, "grad_norm": 0.9276102781930952, "learning_rate": 1.7458417976377764e-05, "loss": 0.8516, "step": 1330 }, { "epoch": 0.25535384541595724, "grad_norm": 0.9903367068992426, "learning_rate": 1.745427669946958e-05, "loss": 0.8693, "step": 1331 }, { "epoch": 0.25554569653948533, "grad_norm": 0.9387579998221237, "learning_rate": 1.745013254342048e-05, "loss": 0.9034, "step": 1332 }, { "epoch": 0.2557375476630135, "grad_norm": 0.9408187500676166, "learning_rate": 1.744598550983111e-05, "loss": 0.7978, "step": 1333 }, { "epoch": 0.25592939878654164, "grad_norm": 1.069057915319122, "learning_rate": 1.7441835600303216e-05, "loss": 0.8565, "step": 1334 }, { "epoch": 0.2561212499100698, "grad_norm": 0.9498075776445468, "learning_rate": 1.7437682816439654e-05, "loss": 0.7562, "step": 1335 }, { "epoch": 0.25631310103359795, "grad_norm": 0.9752894000736566, "learning_rate": 1.7433527159844406e-05, "loss": 0.872, "step": 1336 }, { "epoch": 0.25650495215712604, "grad_norm": 0.9865977977612248, "learning_rate": 1.742936863212255e-05, "loss": 0.8291, "step": 1337 }, { "epoch": 0.2566968032806542, "grad_norm": 1.0431123841278227, "learning_rate": 1.7425207234880278e-05, "loss": 0.8328, "step": 1338 }, { "epoch": 0.25688865440418235, "grad_norm": 1.0079295116172462, "learning_rate": 1.742104296972488e-05, "loss": 0.8254, "step": 1339 }, { "epoch": 0.2570805055277105, "grad_norm": 0.954880307451445, "learning_rate": 1.741687583826477e-05, "loss": 0.8161, "step": 1340 }, { "epoch": 0.25727235665123865, "grad_norm": 1.2489582591763024, "learning_rate": 1.7412705842109464e-05, "loss": 0.7952, "step": 1341 }, { "epoch": 0.2574642077747668, "grad_norm": 1.1052694599704578, "learning_rate": 1.7408532982869573e-05, "loss": 0.8083, "step": 1342 }, { "epoch": 0.2576560588982949, "grad_norm": 0.8991060250086897, "learning_rate": 1.7404357262156834e-05, "loss": 0.3081, "step": 1343 }, { "epoch": 0.25784791002182306, "grad_norm": 0.9531628625775975, "learning_rate": 1.7400178681584065e-05, "loss": 0.8193, "step": 1344 }, { "epoch": 0.2580397611453512, "grad_norm": 0.9662787877966802, "learning_rate": 1.7395997242765215e-05, "loss": 0.8722, "step": 1345 }, { "epoch": 0.25823161226887936, "grad_norm": 1.0865186735860695, "learning_rate": 1.739181294731531e-05, "loss": 0.8313, "step": 1346 }, { "epoch": 0.2584234633924075, "grad_norm": 0.6891328851968169, "learning_rate": 1.738762579685051e-05, "loss": 0.2997, "step": 1347 }, { "epoch": 0.2586153145159356, "grad_norm": 1.1275976792253912, "learning_rate": 1.738343579298804e-05, "loss": 0.7949, "step": 1348 }, { "epoch": 0.25880716563946377, "grad_norm": 0.9027561289687388, "learning_rate": 1.7379242937346263e-05, "loss": 0.8692, "step": 1349 }, { "epoch": 0.2589990167629919, "grad_norm": 0.977805344963415, "learning_rate": 1.7375047231544622e-05, "loss": 0.8527, "step": 1350 }, { "epoch": 0.25919086788652007, "grad_norm": 1.0044992134173576, "learning_rate": 1.7370848677203666e-05, "loss": 0.8696, "step": 1351 }, { "epoch": 0.2593827190100482, "grad_norm": 0.6719479022616396, "learning_rate": 1.736664727594505e-05, "loss": 0.3545, "step": 1352 }, { "epoch": 0.2595745701335763, "grad_norm": 1.126901092890855, "learning_rate": 1.7362443029391524e-05, "loss": 0.8355, "step": 1353 }, { "epoch": 0.2597664212571045, "grad_norm": 1.0415796250755582, "learning_rate": 1.7358235939166928e-05, "loss": 0.8475, "step": 1354 }, { "epoch": 0.2599582723806326, "grad_norm": 0.9253520941949013, "learning_rate": 1.7354026006896222e-05, "loss": 0.8658, "step": 1355 }, { "epoch": 0.2601501235041608, "grad_norm": 1.0141750763962405, "learning_rate": 1.734981323420544e-05, "loss": 0.8265, "step": 1356 }, { "epoch": 0.26034197462768893, "grad_norm": 0.8785396037452271, "learning_rate": 1.7345597622721727e-05, "loss": 0.7648, "step": 1357 }, { "epoch": 0.26053382575121703, "grad_norm": 1.0347105247662023, "learning_rate": 1.7341379174073327e-05, "loss": 0.9315, "step": 1358 }, { "epoch": 0.2607256768747452, "grad_norm": 0.8218597373907672, "learning_rate": 1.7337157889889565e-05, "loss": 0.7806, "step": 1359 }, { "epoch": 0.26091752799827334, "grad_norm": 1.1153332896751373, "learning_rate": 1.733293377180088e-05, "loss": 0.8307, "step": 1360 }, { "epoch": 0.2611093791218015, "grad_norm": 0.9170844897816823, "learning_rate": 1.7328706821438785e-05, "loss": 0.7994, "step": 1361 }, { "epoch": 0.26130123024532964, "grad_norm": 0.9033400008800083, "learning_rate": 1.7324477040435906e-05, "loss": 0.8668, "step": 1362 }, { "epoch": 0.2614930813688578, "grad_norm": 0.9585809595996299, "learning_rate": 1.7320244430425953e-05, "loss": 0.8648, "step": 1363 }, { "epoch": 0.2616849324923859, "grad_norm": 0.9489108096979231, "learning_rate": 1.731600899304373e-05, "loss": 0.843, "step": 1364 }, { "epoch": 0.26187678361591404, "grad_norm": 0.8761052231950027, "learning_rate": 1.7311770729925134e-05, "loss": 0.7818, "step": 1365 }, { "epoch": 0.2620686347394422, "grad_norm": 0.9321082470986658, "learning_rate": 1.730752964270715e-05, "loss": 0.8435, "step": 1366 }, { "epoch": 0.26226048586297035, "grad_norm": 0.842951683711304, "learning_rate": 1.7303285733027852e-05, "loss": 0.8804, "step": 1367 }, { "epoch": 0.2624523369864985, "grad_norm": 0.9843386993664349, "learning_rate": 1.7299039002526413e-05, "loss": 0.8428, "step": 1368 }, { "epoch": 0.2626441881100266, "grad_norm": 0.9547192673546875, "learning_rate": 1.7294789452843095e-05, "loss": 0.9154, "step": 1369 }, { "epoch": 0.26283603923355475, "grad_norm": 1.0919800365982792, "learning_rate": 1.729053708561924e-05, "loss": 0.8486, "step": 1370 }, { "epoch": 0.2630278903570829, "grad_norm": 0.9579883086165809, "learning_rate": 1.728628190249728e-05, "loss": 0.8185, "step": 1371 }, { "epoch": 0.26321974148061106, "grad_norm": 1.040649975887473, "learning_rate": 1.7282023905120743e-05, "loss": 0.8272, "step": 1372 }, { "epoch": 0.2634115926041392, "grad_norm": 0.9784258441104656, "learning_rate": 1.7277763095134236e-05, "loss": 0.8462, "step": 1373 }, { "epoch": 0.2636034437276673, "grad_norm": 0.8948167861312603, "learning_rate": 1.7273499474183454e-05, "loss": 0.8368, "step": 1374 }, { "epoch": 0.26379529485119546, "grad_norm": 0.9581261687520009, "learning_rate": 1.726923304391518e-05, "loss": 0.8035, "step": 1375 }, { "epoch": 0.2639871459747236, "grad_norm": 1.0047592291317438, "learning_rate": 1.726496380597728e-05, "loss": 0.8626, "step": 1376 }, { "epoch": 0.26417899709825177, "grad_norm": 0.9270748643281302, "learning_rate": 1.7260691762018708e-05, "loss": 0.8265, "step": 1377 }, { "epoch": 0.2643708482217799, "grad_norm": 0.665210052322458, "learning_rate": 1.7256416913689497e-05, "loss": 0.335, "step": 1378 }, { "epoch": 0.264562699345308, "grad_norm": 0.9202949514338167, "learning_rate": 1.725213926264076e-05, "loss": 0.8064, "step": 1379 }, { "epoch": 0.26475455046883617, "grad_norm": 0.6919473927331417, "learning_rate": 1.7247858810524706e-05, "loss": 0.3192, "step": 1380 }, { "epoch": 0.2649464015923643, "grad_norm": 0.9771517860906013, "learning_rate": 1.7243575558994613e-05, "loss": 0.8191, "step": 1381 }, { "epoch": 0.2651382527158925, "grad_norm": 0.8670781632472606, "learning_rate": 1.7239289509704843e-05, "loss": 0.8736, "step": 1382 }, { "epoch": 0.26533010383942063, "grad_norm": 0.929096907049196, "learning_rate": 1.7235000664310844e-05, "loss": 0.8568, "step": 1383 }, { "epoch": 0.2655219549629487, "grad_norm": 0.7207101023520229, "learning_rate": 1.7230709024469137e-05, "loss": 0.3345, "step": 1384 }, { "epoch": 0.2657138060864769, "grad_norm": 0.9701229510232892, "learning_rate": 1.722641459183733e-05, "loss": 0.8619, "step": 1385 }, { "epoch": 0.26590565721000503, "grad_norm": 0.8676470191902736, "learning_rate": 1.72221173680741e-05, "loss": 0.8033, "step": 1386 }, { "epoch": 0.2660975083335332, "grad_norm": 0.9685776156903025, "learning_rate": 1.721781735483921e-05, "loss": 0.8411, "step": 1387 }, { "epoch": 0.26628935945706134, "grad_norm": 1.1488539638414925, "learning_rate": 1.72135145537935e-05, "loss": 0.8388, "step": 1388 }, { "epoch": 0.2664812105805895, "grad_norm": 1.0549910857490419, "learning_rate": 1.720920896659888e-05, "loss": 0.7777, "step": 1389 }, { "epoch": 0.2666730617041176, "grad_norm": 0.9300318923816852, "learning_rate": 1.7204900594918346e-05, "loss": 0.7715, "step": 1390 }, { "epoch": 0.26686491282764574, "grad_norm": 0.9197583148164895, "learning_rate": 1.720058944041596e-05, "loss": 0.8813, "step": 1391 }, { "epoch": 0.2670567639511739, "grad_norm": 0.8737944944272352, "learning_rate": 1.7196275504756862e-05, "loss": 0.8266, "step": 1392 }, { "epoch": 0.26724861507470205, "grad_norm": 0.8232546613847596, "learning_rate": 1.7191958789607267e-05, "loss": 0.799, "step": 1393 }, { "epoch": 0.2674404661982302, "grad_norm": 0.8999130060842907, "learning_rate": 1.718763929663447e-05, "loss": 0.8198, "step": 1394 }, { "epoch": 0.2676323173217583, "grad_norm": 0.9600762677909199, "learning_rate": 1.7183317027506828e-05, "loss": 0.8547, "step": 1395 }, { "epoch": 0.26782416844528645, "grad_norm": 0.8926050359080735, "learning_rate": 1.717899198389377e-05, "loss": 0.8005, "step": 1396 }, { "epoch": 0.2680160195688146, "grad_norm": 1.0352555178365999, "learning_rate": 1.717466416746581e-05, "loss": 0.8541, "step": 1397 }, { "epoch": 0.26820787069234275, "grad_norm": 0.922234195343884, "learning_rate": 1.717033357989452e-05, "loss": 0.8454, "step": 1398 }, { "epoch": 0.2683997218158709, "grad_norm": 0.9386699686875235, "learning_rate": 1.7166000222852554e-05, "loss": 0.8509, "step": 1399 }, { "epoch": 0.268591572939399, "grad_norm": 0.9038846993853782, "learning_rate": 1.716166409801361e-05, "loss": 0.8461, "step": 1400 }, { "epoch": 0.26878342406292716, "grad_norm": 0.9539480271639862, "learning_rate": 1.71573252070525e-05, "loss": 0.7801, "step": 1401 }, { "epoch": 0.2689752751864553, "grad_norm": 0.8805267186890716, "learning_rate": 1.7152983551645054e-05, "loss": 0.8557, "step": 1402 }, { "epoch": 0.26916712630998346, "grad_norm": 1.0437863046496543, "learning_rate": 1.7148639133468205e-05, "loss": 0.8046, "step": 1403 }, { "epoch": 0.2693589774335116, "grad_norm": 0.9646031751322436, "learning_rate": 1.7144291954199942e-05, "loss": 0.8941, "step": 1404 }, { "epoch": 0.2695508285570397, "grad_norm": 0.9896342132215638, "learning_rate": 1.7139942015519314e-05, "loss": 0.8218, "step": 1405 }, { "epoch": 0.26974267968056787, "grad_norm": 1.1336656003534136, "learning_rate": 1.7135589319106448e-05, "loss": 0.8132, "step": 1406 }, { "epoch": 0.269934530804096, "grad_norm": 0.9508616842876194, "learning_rate": 1.713123386664253e-05, "loss": 0.8782, "step": 1407 }, { "epoch": 0.27012638192762417, "grad_norm": 0.9874442137606183, "learning_rate": 1.7126875659809808e-05, "loss": 0.8839, "step": 1408 }, { "epoch": 0.2703182330511523, "grad_norm": 0.9785389868343273, "learning_rate": 1.7122514700291593e-05, "loss": 0.8332, "step": 1409 }, { "epoch": 0.2705100841746804, "grad_norm": 1.02068456970419, "learning_rate": 1.711815098977227e-05, "loss": 0.8476, "step": 1410 }, { "epoch": 0.2707019352982086, "grad_norm": 0.8435121520044636, "learning_rate": 1.7113784529937278e-05, "loss": 0.8456, "step": 1411 }, { "epoch": 0.2708937864217367, "grad_norm": 0.8187584882923575, "learning_rate": 1.7109415322473117e-05, "loss": 0.8213, "step": 1412 }, { "epoch": 0.2710856375452649, "grad_norm": 0.9106285211218945, "learning_rate": 1.7105043369067347e-05, "loss": 0.8568, "step": 1413 }, { "epoch": 0.27127748866879303, "grad_norm": 1.1108846813948923, "learning_rate": 1.7100668671408597e-05, "loss": 0.8101, "step": 1414 }, { "epoch": 0.2714693397923212, "grad_norm": 1.055928477186934, "learning_rate": 1.709629123118655e-05, "loss": 0.846, "step": 1415 }, { "epoch": 0.2716611909158493, "grad_norm": 0.9885211351103778, "learning_rate": 1.7091911050091944e-05, "loss": 0.7781, "step": 1416 }, { "epoch": 0.27185304203937743, "grad_norm": 0.890931507288958, "learning_rate": 1.708752812981659e-05, "loss": 0.7932, "step": 1417 }, { "epoch": 0.2720448931629056, "grad_norm": 0.8437933375293767, "learning_rate": 1.7083142472053343e-05, "loss": 0.7961, "step": 1418 }, { "epoch": 0.27223674428643374, "grad_norm": 0.9076952374655919, "learning_rate": 1.7078754078496116e-05, "loss": 0.819, "step": 1419 }, { "epoch": 0.2724285954099619, "grad_norm": 0.9423197036912089, "learning_rate": 1.7074362950839887e-05, "loss": 0.818, "step": 1420 }, { "epoch": 0.27262044653349, "grad_norm": 1.034290182211249, "learning_rate": 1.7069969090780688e-05, "loss": 0.8767, "step": 1421 }, { "epoch": 0.27281229765701814, "grad_norm": 1.493489898650462, "learning_rate": 1.7065572500015602e-05, "loss": 0.8367, "step": 1422 }, { "epoch": 0.2730041487805463, "grad_norm": 0.7701197312756776, "learning_rate": 1.7061173180242768e-05, "loss": 0.3315, "step": 1423 }, { "epoch": 0.27319599990407445, "grad_norm": 0.9552051759821065, "learning_rate": 1.7056771133161382e-05, "loss": 0.8207, "step": 1424 }, { "epoch": 0.2733878510276026, "grad_norm": 1.018499052810061, "learning_rate": 1.7052366360471686e-05, "loss": 0.8828, "step": 1425 }, { "epoch": 0.2735797021511307, "grad_norm": 1.007135405678107, "learning_rate": 1.7047958863874986e-05, "loss": 0.8423, "step": 1426 }, { "epoch": 0.27377155327465885, "grad_norm": 1.2719379060233793, "learning_rate": 1.704354864507363e-05, "loss": 0.8332, "step": 1427 }, { "epoch": 0.273963404398187, "grad_norm": 1.057798768887862, "learning_rate": 1.7039135705771023e-05, "loss": 0.8814, "step": 1428 }, { "epoch": 0.27415525552171516, "grad_norm": 0.9364752858232194, "learning_rate": 1.703472004767162e-05, "loss": 0.8661, "step": 1429 }, { "epoch": 0.2743471066452433, "grad_norm": 0.7167692771788899, "learning_rate": 1.7030301672480925e-05, "loss": 0.3096, "step": 1430 }, { "epoch": 0.2745389577687714, "grad_norm": 1.0120710478858734, "learning_rate": 1.7025880581905493e-05, "loss": 0.7826, "step": 1431 }, { "epoch": 0.27473080889229956, "grad_norm": 0.8655284166746341, "learning_rate": 1.702145677765293e-05, "loss": 0.8232, "step": 1432 }, { "epoch": 0.2749226600158277, "grad_norm": 1.0542614164700719, "learning_rate": 1.7017030261431876e-05, "loss": 0.8435, "step": 1433 }, { "epoch": 0.27511451113935587, "grad_norm": 0.9221312217544312, "learning_rate": 1.701260103495204e-05, "loss": 0.881, "step": 1434 }, { "epoch": 0.275306362262884, "grad_norm": 0.8925319387937322, "learning_rate": 1.7008169099924158e-05, "loss": 0.7868, "step": 1435 }, { "epoch": 0.27549821338641217, "grad_norm": 1.118201510358532, "learning_rate": 1.700373445806003e-05, "loss": 0.8031, "step": 1436 }, { "epoch": 0.27569006450994027, "grad_norm": 0.9824877321647344, "learning_rate": 1.6999297111072488e-05, "loss": 0.806, "step": 1437 }, { "epoch": 0.2758819156334684, "grad_norm": 1.243987952366177, "learning_rate": 1.6994857060675414e-05, "loss": 0.7491, "step": 1438 }, { "epoch": 0.2760737667569966, "grad_norm": 1.013225702750421, "learning_rate": 1.6990414308583733e-05, "loss": 0.912, "step": 1439 }, { "epoch": 0.2762656178805247, "grad_norm": 0.939675965363694, "learning_rate": 1.6985968856513417e-05, "loss": 0.7831, "step": 1440 }, { "epoch": 0.2764574690040529, "grad_norm": 0.9922666022511945, "learning_rate": 1.6981520706181476e-05, "loss": 0.8362, "step": 1441 }, { "epoch": 0.276649320127581, "grad_norm": 0.9569529775576868, "learning_rate": 1.6977069859305966e-05, "loss": 0.8584, "step": 1442 }, { "epoch": 0.27684117125110913, "grad_norm": 1.0418327591078993, "learning_rate": 1.6972616317605976e-05, "loss": 0.8468, "step": 1443 }, { "epoch": 0.2770330223746373, "grad_norm": 0.9235455833212498, "learning_rate": 1.6968160082801653e-05, "loss": 0.8273, "step": 1444 }, { "epoch": 0.27722487349816544, "grad_norm": 0.9022118371921809, "learning_rate": 1.6963701156614167e-05, "loss": 0.8074, "step": 1445 }, { "epoch": 0.2774167246216936, "grad_norm": 1.0595189056080365, "learning_rate": 1.6959239540765734e-05, "loss": 0.8287, "step": 1446 }, { "epoch": 0.2776085757452217, "grad_norm": 0.9719625292011463, "learning_rate": 1.6954775236979616e-05, "loss": 0.805, "step": 1447 }, { "epoch": 0.27780042686874984, "grad_norm": 1.0788733116299454, "learning_rate": 1.6950308246980097e-05, "loss": 0.8052, "step": 1448 }, { "epoch": 0.277992277992278, "grad_norm": 0.9270583800956604, "learning_rate": 1.6945838572492513e-05, "loss": 0.7728, "step": 1449 }, { "epoch": 0.27818412911580614, "grad_norm": 0.8900812862317217, "learning_rate": 1.6941366215243232e-05, "loss": 0.7719, "step": 1450 }, { "epoch": 0.2783759802393343, "grad_norm": 0.9342963783715752, "learning_rate": 1.6936891176959656e-05, "loss": 0.8359, "step": 1451 }, { "epoch": 0.2785678313628624, "grad_norm": 0.9493411283471548, "learning_rate": 1.6932413459370226e-05, "loss": 0.8669, "step": 1452 }, { "epoch": 0.27875968248639055, "grad_norm": 1.0157698572128562, "learning_rate": 1.6927933064204414e-05, "loss": 0.8845, "step": 1453 }, { "epoch": 0.2789515336099187, "grad_norm": 1.0355286672215005, "learning_rate": 1.692344999319273e-05, "loss": 0.7708, "step": 1454 }, { "epoch": 0.27914338473344685, "grad_norm": 1.0808793685314715, "learning_rate": 1.691896424806672e-05, "loss": 0.8758, "step": 1455 }, { "epoch": 0.279335235856975, "grad_norm": 0.7667239632225041, "learning_rate": 1.6914475830558953e-05, "loss": 0.3135, "step": 1456 }, { "epoch": 0.2795270869805031, "grad_norm": 1.154867860655671, "learning_rate": 1.690998474240304e-05, "loss": 0.8741, "step": 1457 }, { "epoch": 0.27971893810403126, "grad_norm": 0.9573954537567437, "learning_rate": 1.6905490985333612e-05, "loss": 0.8521, "step": 1458 }, { "epoch": 0.2799107892275594, "grad_norm": 1.1095427871591734, "learning_rate": 1.6900994561086347e-05, "loss": 0.8451, "step": 1459 }, { "epoch": 0.28010264035108756, "grad_norm": 0.915750113166336, "learning_rate": 1.689649547139794e-05, "loss": 0.7971, "step": 1460 }, { "epoch": 0.2802944914746157, "grad_norm": 0.9316275215553278, "learning_rate": 1.6891993718006127e-05, "loss": 0.8125, "step": 1461 }, { "epoch": 0.28048634259814387, "grad_norm": 0.8873042521979847, "learning_rate": 1.6887489302649657e-05, "loss": 0.777, "step": 1462 }, { "epoch": 0.28067819372167196, "grad_norm": 1.0165562350641402, "learning_rate": 1.6882982227068318e-05, "loss": 0.8737, "step": 1463 }, { "epoch": 0.2808700448452001, "grad_norm": 0.8567602902469212, "learning_rate": 1.6878472493002928e-05, "loss": 0.8186, "step": 1464 }, { "epoch": 0.28106189596872827, "grad_norm": 0.8971673233325645, "learning_rate": 1.687396010219532e-05, "loss": 0.8397, "step": 1465 }, { "epoch": 0.2812537470922564, "grad_norm": 1.0616151898616333, "learning_rate": 1.6869445056388367e-05, "loss": 0.8231, "step": 1466 }, { "epoch": 0.2814455982157846, "grad_norm": 0.836028396985879, "learning_rate": 1.686492735732596e-05, "loss": 0.8464, "step": 1467 }, { "epoch": 0.2816374493393127, "grad_norm": 0.9750752270688922, "learning_rate": 1.6860407006753012e-05, "loss": 0.827, "step": 1468 }, { "epoch": 0.2818293004628408, "grad_norm": 0.8877189701980254, "learning_rate": 1.6855884006415467e-05, "loss": 0.8383, "step": 1469 }, { "epoch": 0.282021151586369, "grad_norm": 0.9868833568069134, "learning_rate": 1.685135835806029e-05, "loss": 0.8719, "step": 1470 }, { "epoch": 0.28221300270989713, "grad_norm": 0.857342353828964, "learning_rate": 1.6846830063435468e-05, "loss": 0.7047, "step": 1471 }, { "epoch": 0.2824048538334253, "grad_norm": 0.8860495365498481, "learning_rate": 1.6842299124290003e-05, "loss": 0.8014, "step": 1472 }, { "epoch": 0.2825967049569534, "grad_norm": 0.8814248145380305, "learning_rate": 1.6837765542373937e-05, "loss": 0.8018, "step": 1473 }, { "epoch": 0.28278855608048153, "grad_norm": 0.9179764026142265, "learning_rate": 1.6833229319438316e-05, "loss": 0.8676, "step": 1474 }, { "epoch": 0.2829804072040097, "grad_norm": 1.08938246699311, "learning_rate": 1.6828690457235208e-05, "loss": 0.8208, "step": 1475 }, { "epoch": 0.28317225832753784, "grad_norm": 1.0637761071474288, "learning_rate": 1.682414895751771e-05, "loss": 0.8267, "step": 1476 }, { "epoch": 0.283364109451066, "grad_norm": 0.92136682596585, "learning_rate": 1.6819604822039924e-05, "loss": 0.7997, "step": 1477 }, { "epoch": 0.2835559605745941, "grad_norm": 1.0220104968191166, "learning_rate": 1.6815058052556988e-05, "loss": 0.8042, "step": 1478 }, { "epoch": 0.28374781169812224, "grad_norm": 1.036894394380365, "learning_rate": 1.681050865082504e-05, "loss": 0.8086, "step": 1479 }, { "epoch": 0.2839396628216504, "grad_norm": 0.8861017131038796, "learning_rate": 1.680595661860124e-05, "loss": 0.8224, "step": 1480 }, { "epoch": 0.28413151394517855, "grad_norm": 1.0189593749764416, "learning_rate": 1.680140195764377e-05, "loss": 0.8444, "step": 1481 }, { "epoch": 0.2843233650687067, "grad_norm": 1.0008452198637718, "learning_rate": 1.679684466971182e-05, "loss": 0.8049, "step": 1482 }, { "epoch": 0.2845152161922348, "grad_norm": 0.8719941498500676, "learning_rate": 1.6792284756565598e-05, "loss": 0.7998, "step": 1483 }, { "epoch": 0.28470706731576295, "grad_norm": 1.078822408825672, "learning_rate": 1.6787722219966327e-05, "loss": 0.8368, "step": 1484 }, { "epoch": 0.2848989184392911, "grad_norm": 0.9127368496943258, "learning_rate": 1.678315706167624e-05, "loss": 0.7794, "step": 1485 }, { "epoch": 0.28509076956281926, "grad_norm": 1.0081727296338598, "learning_rate": 1.6778589283458585e-05, "loss": 0.7761, "step": 1486 }, { "epoch": 0.2852826206863474, "grad_norm": 0.9537424453632422, "learning_rate": 1.6774018887077624e-05, "loss": 0.7808, "step": 1487 }, { "epoch": 0.28547447180987556, "grad_norm": 0.8889117446632666, "learning_rate": 1.6769445874298625e-05, "loss": 0.8015, "step": 1488 }, { "epoch": 0.28566632293340366, "grad_norm": 1.0168989720352488, "learning_rate": 1.676487024688787e-05, "loss": 0.813, "step": 1489 }, { "epoch": 0.2858581740569318, "grad_norm": 0.9677450986511748, "learning_rate": 1.6760292006612645e-05, "loss": 0.8239, "step": 1490 }, { "epoch": 0.28605002518045997, "grad_norm": 1.0331034657957678, "learning_rate": 1.6755711155241257e-05, "loss": 0.7276, "step": 1491 }, { "epoch": 0.2862418763039881, "grad_norm": 1.0077798667865936, "learning_rate": 1.6751127694543012e-05, "loss": 0.7627, "step": 1492 }, { "epoch": 0.28643372742751627, "grad_norm": 0.9130131445997544, "learning_rate": 1.6746541626288228e-05, "loss": 0.884, "step": 1493 }, { "epoch": 0.28662557855104437, "grad_norm": 0.9149165154380353, "learning_rate": 1.6741952952248228e-05, "loss": 0.7975, "step": 1494 }, { "epoch": 0.2868174296745725, "grad_norm": 0.929210294351959, "learning_rate": 1.673736167419534e-05, "loss": 0.8518, "step": 1495 }, { "epoch": 0.2870092807981007, "grad_norm": 1.0150063963500506, "learning_rate": 1.6732767793902904e-05, "loss": 0.8144, "step": 1496 }, { "epoch": 0.2872011319216288, "grad_norm": 1.033682039627521, "learning_rate": 1.6728171313145252e-05, "loss": 0.8051, "step": 1497 }, { "epoch": 0.287392983045157, "grad_norm": 0.973834946711255, "learning_rate": 1.6723572233697742e-05, "loss": 0.8203, "step": 1498 }, { "epoch": 0.2875848341686851, "grad_norm": 0.9981450652167332, "learning_rate": 1.6718970557336717e-05, "loss": 0.8204, "step": 1499 }, { "epoch": 0.28777668529221323, "grad_norm": 0.9856553869071647, "learning_rate": 1.6714366285839526e-05, "loss": 0.8705, "step": 1500 }, { "epoch": 0.2879685364157414, "grad_norm": 1.2031903082197677, "learning_rate": 1.6709759420984528e-05, "loss": 0.7018, "step": 1501 }, { "epoch": 0.28816038753926954, "grad_norm": 0.9399375904434141, "learning_rate": 1.670514996455108e-05, "loss": 0.8066, "step": 1502 }, { "epoch": 0.2883522386627977, "grad_norm": 0.9272508093632691, "learning_rate": 1.6700537918319533e-05, "loss": 0.8297, "step": 1503 }, { "epoch": 0.2885440897863258, "grad_norm": 0.9931213180545814, "learning_rate": 1.669592328407125e-05, "loss": 0.7991, "step": 1504 }, { "epoch": 0.28873594090985394, "grad_norm": 1.058200914278031, "learning_rate": 1.6691306063588583e-05, "loss": 0.8581, "step": 1505 }, { "epoch": 0.2889277920333821, "grad_norm": 0.911356851972104, "learning_rate": 1.6686686258654896e-05, "loss": 0.8384, "step": 1506 }, { "epoch": 0.28911964315691024, "grad_norm": 0.9656855480286468, "learning_rate": 1.6682063871054534e-05, "loss": 0.8136, "step": 1507 }, { "epoch": 0.2893114942804384, "grad_norm": 0.9401384609297091, "learning_rate": 1.6677438902572855e-05, "loss": 0.7861, "step": 1508 }, { "epoch": 0.2895033454039665, "grad_norm": 0.8680359466860573, "learning_rate": 1.6672811354996205e-05, "loss": 0.8327, "step": 1509 }, { "epoch": 0.28969519652749465, "grad_norm": 0.9076532572594107, "learning_rate": 1.6668181230111932e-05, "loss": 0.8537, "step": 1510 }, { "epoch": 0.2898870476510228, "grad_norm": 0.9395053331821702, "learning_rate": 1.666354852970837e-05, "loss": 0.8588, "step": 1511 }, { "epoch": 0.29007889877455095, "grad_norm": 0.8624556436873551, "learning_rate": 1.6658913255574858e-05, "loss": 0.8638, "step": 1512 }, { "epoch": 0.2902707498980791, "grad_norm": 0.9716079828785394, "learning_rate": 1.6654275409501726e-05, "loss": 0.8254, "step": 1513 }, { "epoch": 0.29046260102160726, "grad_norm": 0.9180344963094601, "learning_rate": 1.664963499328029e-05, "loss": 0.7865, "step": 1514 }, { "epoch": 0.29065445214513536, "grad_norm": 0.9790565090408505, "learning_rate": 1.6644992008702873e-05, "loss": 0.8238, "step": 1515 }, { "epoch": 0.2908463032686635, "grad_norm": 1.052196632742177, "learning_rate": 1.664034645756278e-05, "loss": 0.8276, "step": 1516 }, { "epoch": 0.29103815439219166, "grad_norm": 0.9583412981172171, "learning_rate": 1.6635698341654305e-05, "loss": 0.731, "step": 1517 }, { "epoch": 0.2912300055157198, "grad_norm": 0.8384529748365595, "learning_rate": 1.663104766277274e-05, "loss": 0.349, "step": 1518 }, { "epoch": 0.29142185663924797, "grad_norm": 0.9462520868212168, "learning_rate": 1.662639442271436e-05, "loss": 0.7938, "step": 1519 }, { "epoch": 0.29161370776277606, "grad_norm": 0.86517700065956, "learning_rate": 1.6621738623276442e-05, "loss": 0.8689, "step": 1520 }, { "epoch": 0.2918055588863042, "grad_norm": 0.6532327468433734, "learning_rate": 1.6617080266257234e-05, "loss": 0.3295, "step": 1521 }, { "epoch": 0.29199741000983237, "grad_norm": 0.9405145502848022, "learning_rate": 1.661241935345599e-05, "loss": 0.8531, "step": 1522 }, { "epoch": 0.2921892611333605, "grad_norm": 0.8840813060849477, "learning_rate": 1.6607755886672928e-05, "loss": 0.7961, "step": 1523 }, { "epoch": 0.2923811122568887, "grad_norm": 1.0571732814911499, "learning_rate": 1.660308986770928e-05, "loss": 0.7106, "step": 1524 }, { "epoch": 0.2925729633804168, "grad_norm": 0.9639909001938909, "learning_rate": 1.659842129836724e-05, "loss": 0.8043, "step": 1525 }, { "epoch": 0.2927648145039449, "grad_norm": 0.9807123916337369, "learning_rate": 1.659375018045e-05, "loss": 0.8523, "step": 1526 }, { "epoch": 0.2929566656274731, "grad_norm": 1.0014874039167392, "learning_rate": 1.6589076515761734e-05, "loss": 0.8874, "step": 1527 }, { "epoch": 0.29314851675100123, "grad_norm": 0.9937439873183328, "learning_rate": 1.65844003061076e-05, "loss": 0.7448, "step": 1528 }, { "epoch": 0.2933403678745294, "grad_norm": 1.0703689030274874, "learning_rate": 1.6579721553293736e-05, "loss": 0.8332, "step": 1529 }, { "epoch": 0.2935322189980575, "grad_norm": 0.8836682905744333, "learning_rate": 1.6575040259127263e-05, "loss": 0.8581, "step": 1530 }, { "epoch": 0.29372407012158563, "grad_norm": 1.0598258606830218, "learning_rate": 1.6570356425416287e-05, "loss": 0.8152, "step": 1531 }, { "epoch": 0.2939159212451138, "grad_norm": 1.1779036645059366, "learning_rate": 1.656567005396989e-05, "loss": 0.8776, "step": 1532 }, { "epoch": 0.29410777236864194, "grad_norm": 0.9686689969078348, "learning_rate": 1.6560981146598145e-05, "loss": 0.8572, "step": 1533 }, { "epoch": 0.2942996234921701, "grad_norm": 0.9566244442946576, "learning_rate": 1.6556289705112085e-05, "loss": 0.8924, "step": 1534 }, { "epoch": 0.29449147461569825, "grad_norm": 0.9525230780180596, "learning_rate": 1.655159573132374e-05, "loss": 0.7695, "step": 1535 }, { "epoch": 0.29468332573922634, "grad_norm": 0.9852758176168301, "learning_rate": 1.6546899227046107e-05, "loss": 0.3122, "step": 1536 }, { "epoch": 0.2948751768627545, "grad_norm": 0.9035726698018673, "learning_rate": 1.654220019409317e-05, "loss": 0.8293, "step": 1537 }, { "epoch": 0.29506702798628265, "grad_norm": 1.058597780955452, "learning_rate": 1.6537498634279877e-05, "loss": 0.835, "step": 1538 }, { "epoch": 0.2952588791098108, "grad_norm": 1.0589253113380477, "learning_rate": 1.6532794549422164e-05, "loss": 0.81, "step": 1539 }, { "epoch": 0.29545073023333895, "grad_norm": 0.8956496749671884, "learning_rate": 1.652808794133694e-05, "loss": 0.8271, "step": 1540 }, { "epoch": 0.29564258135686705, "grad_norm": 0.6738330920332147, "learning_rate": 1.6523378811842077e-05, "loss": 0.3049, "step": 1541 }, { "epoch": 0.2958344324803952, "grad_norm": 0.9696353107527925, "learning_rate": 1.651866716275644e-05, "loss": 0.7777, "step": 1542 }, { "epoch": 0.29602628360392336, "grad_norm": 0.9046772297773298, "learning_rate": 1.651395299589985e-05, "loss": 0.8297, "step": 1543 }, { "epoch": 0.2962181347274515, "grad_norm": 1.0014337076324407, "learning_rate": 1.6509236313093108e-05, "loss": 0.8539, "step": 1544 }, { "epoch": 0.29640998585097966, "grad_norm": 1.0186295737533435, "learning_rate": 1.6504517116157993e-05, "loss": 0.8691, "step": 1545 }, { "epoch": 0.29660183697450776, "grad_norm": 0.8738085008005928, "learning_rate": 1.6499795406917237e-05, "loss": 0.8377, "step": 1546 }, { "epoch": 0.2967936880980359, "grad_norm": 0.9734484368994005, "learning_rate": 1.6495071187194564e-05, "loss": 0.7846, "step": 1547 }, { "epoch": 0.29698553922156407, "grad_norm": 0.9505225603596377, "learning_rate": 1.6490344458814655e-05, "loss": 0.7819, "step": 1548 }, { "epoch": 0.2971773903450922, "grad_norm": 1.131022609213219, "learning_rate": 1.6485615223603157e-05, "loss": 0.854, "step": 1549 }, { "epoch": 0.29736924146862037, "grad_norm": 1.0173877052566915, "learning_rate": 1.6480883483386697e-05, "loss": 0.8005, "step": 1550 }, { "epoch": 0.29756109259214847, "grad_norm": 0.9796119236156676, "learning_rate": 1.6476149239992857e-05, "loss": 0.7427, "step": 1551 }, { "epoch": 0.2977529437156766, "grad_norm": 0.6952027088775811, "learning_rate": 1.6471412495250195e-05, "loss": 0.3569, "step": 1552 }, { "epoch": 0.2979447948392048, "grad_norm": 0.82636553292449, "learning_rate": 1.6466673250988235e-05, "loss": 0.7949, "step": 1553 }, { "epoch": 0.2981366459627329, "grad_norm": 0.9143895762880334, "learning_rate": 1.6461931509037457e-05, "loss": 0.8138, "step": 1554 }, { "epoch": 0.2983284970862611, "grad_norm": 0.947452035800288, "learning_rate": 1.6457187271229318e-05, "loss": 0.8143, "step": 1555 }, { "epoch": 0.2985203482097892, "grad_norm": 0.9420839564372412, "learning_rate": 1.645244053939623e-05, "loss": 0.8428, "step": 1556 }, { "epoch": 0.29871219933331733, "grad_norm": 1.039977480041524, "learning_rate": 1.644769131537157e-05, "loss": 0.7634, "step": 1557 }, { "epoch": 0.2989040504568455, "grad_norm": 0.984219595445929, "learning_rate": 1.644293960098968e-05, "loss": 0.8335, "step": 1558 }, { "epoch": 0.29909590158037364, "grad_norm": 0.9683846433632756, "learning_rate": 1.6438185398085868e-05, "loss": 0.7968, "step": 1559 }, { "epoch": 0.2992877527039018, "grad_norm": 0.9284656144256723, "learning_rate": 1.6433428708496393e-05, "loss": 0.7567, "step": 1560 }, { "epoch": 0.29947960382742994, "grad_norm": 0.64762286561599, "learning_rate": 1.642866953405848e-05, "loss": 0.3377, "step": 1561 }, { "epoch": 0.29967145495095804, "grad_norm": 0.9016130445507848, "learning_rate": 1.642390787661031e-05, "loss": 0.7861, "step": 1562 }, { "epoch": 0.2998633060744862, "grad_norm": 0.9637878549451843, "learning_rate": 1.6419143737991033e-05, "loss": 0.824, "step": 1563 }, { "epoch": 0.30005515719801434, "grad_norm": 1.0090936523241334, "learning_rate": 1.6414377120040747e-05, "loss": 0.7566, "step": 1564 }, { "epoch": 0.3002470083215425, "grad_norm": 0.9434921051196383, "learning_rate": 1.6409608024600508e-05, "loss": 0.8272, "step": 1565 }, { "epoch": 0.30043885944507065, "grad_norm": 0.8953901508382491, "learning_rate": 1.640483645351234e-05, "loss": 0.8673, "step": 1566 }, { "epoch": 0.30063071056859875, "grad_norm": 0.9483904646287536, "learning_rate": 1.640006240861921e-05, "loss": 0.8093, "step": 1567 }, { "epoch": 0.3008225616921269, "grad_norm": 1.0306453007953944, "learning_rate": 1.6395285891765046e-05, "loss": 0.8159, "step": 1568 }, { "epoch": 0.30101441281565505, "grad_norm": 0.6840380520025414, "learning_rate": 1.639050690479473e-05, "loss": 0.2951, "step": 1569 }, { "epoch": 0.3012062639391832, "grad_norm": 0.7147414556702629, "learning_rate": 1.6385725449554104e-05, "loss": 0.3418, "step": 1570 }, { "epoch": 0.30139811506271136, "grad_norm": 0.707952897238313, "learning_rate": 1.638094152788995e-05, "loss": 0.3526, "step": 1571 }, { "epoch": 0.30158996618623946, "grad_norm": 1.000662791475811, "learning_rate": 1.6376155141650015e-05, "loss": 0.817, "step": 1572 }, { "epoch": 0.3017818173097676, "grad_norm": 0.9247095133815016, "learning_rate": 1.6371366292682993e-05, "loss": 0.8475, "step": 1573 }, { "epoch": 0.30197366843329576, "grad_norm": 0.9337111115531147, "learning_rate": 1.636657498283853e-05, "loss": 0.7548, "step": 1574 }, { "epoch": 0.3021655195568239, "grad_norm": 0.7972130510153839, "learning_rate": 1.6361781213967222e-05, "loss": 0.7858, "step": 1575 }, { "epoch": 0.30235737068035207, "grad_norm": 0.9741981708078354, "learning_rate": 1.6356984987920617e-05, "loss": 0.8904, "step": 1576 }, { "epoch": 0.30254922180388016, "grad_norm": 0.8823512084255166, "learning_rate": 1.6352186306551206e-05, "loss": 0.8576, "step": 1577 }, { "epoch": 0.3027410729274083, "grad_norm": 0.8525811274396139, "learning_rate": 1.6347385171712434e-05, "loss": 0.8209, "step": 1578 }, { "epoch": 0.30293292405093647, "grad_norm": 0.9168688173484897, "learning_rate": 1.6342581585258695e-05, "loss": 0.8171, "step": 1579 }, { "epoch": 0.3031247751744646, "grad_norm": 0.9628048705091979, "learning_rate": 1.633777554904532e-05, "loss": 0.8221, "step": 1580 }, { "epoch": 0.3033166262979928, "grad_norm": 0.9237255891488634, "learning_rate": 1.63329670649286e-05, "loss": 0.7915, "step": 1581 }, { "epoch": 0.3035084774215209, "grad_norm": 1.0055497104655835, "learning_rate": 1.632815613476576e-05, "loss": 0.8902, "step": 1582 }, { "epoch": 0.303700328545049, "grad_norm": 0.948990749560267, "learning_rate": 1.632334276041498e-05, "loss": 0.8352, "step": 1583 }, { "epoch": 0.3038921796685772, "grad_norm": 0.9076444574230044, "learning_rate": 1.6318526943735373e-05, "loss": 0.7808, "step": 1584 }, { "epoch": 0.30408403079210533, "grad_norm": 0.9497833315400902, "learning_rate": 1.6313708686587002e-05, "loss": 0.8429, "step": 1585 }, { "epoch": 0.3042758819156335, "grad_norm": 0.9667716068279422, "learning_rate": 1.630888799083087e-05, "loss": 0.8476, "step": 1586 }, { "epoch": 0.30446773303916164, "grad_norm": 0.8738785358136284, "learning_rate": 1.6304064858328928e-05, "loss": 0.8168, "step": 1587 }, { "epoch": 0.30465958416268973, "grad_norm": 0.9751949261448503, "learning_rate": 1.6299239290944062e-05, "loss": 0.8962, "step": 1588 }, { "epoch": 0.3048514352862179, "grad_norm": 0.9994749391442251, "learning_rate": 1.629441129054009e-05, "loss": 0.8759, "step": 1589 }, { "epoch": 0.30504328640974604, "grad_norm": 1.0775623773431846, "learning_rate": 1.6289580858981796e-05, "loss": 0.8179, "step": 1590 }, { "epoch": 0.3052351375332742, "grad_norm": 0.9827386437318009, "learning_rate": 1.628474799813488e-05, "loss": 0.8744, "step": 1591 }, { "epoch": 0.30542698865680235, "grad_norm": 0.9980058269539916, "learning_rate": 1.627991270986598e-05, "loss": 0.7878, "step": 1592 }, { "epoch": 0.30561883978033044, "grad_norm": 0.9365296414939692, "learning_rate": 1.6275074996042687e-05, "loss": 0.7439, "step": 1593 }, { "epoch": 0.3058106909038586, "grad_norm": 1.1083421222170036, "learning_rate": 1.6270234858533512e-05, "loss": 0.8233, "step": 1594 }, { "epoch": 0.30600254202738675, "grad_norm": 1.0010842310884507, "learning_rate": 1.626539229920792e-05, "loss": 0.3837, "step": 1595 }, { "epoch": 0.3061943931509149, "grad_norm": 0.9813924846142904, "learning_rate": 1.6260547319936297e-05, "loss": 0.8471, "step": 1596 }, { "epoch": 0.30638624427444305, "grad_norm": 1.1127074972144824, "learning_rate": 1.6255699922589968e-05, "loss": 0.874, "step": 1597 }, { "epoch": 0.30657809539797115, "grad_norm": 0.9340123874497167, "learning_rate": 1.6250850109041194e-05, "loss": 0.7891, "step": 1598 }, { "epoch": 0.3067699465214993, "grad_norm": 0.8784267046447931, "learning_rate": 1.6245997881163166e-05, "loss": 0.8482, "step": 1599 }, { "epoch": 0.30696179764502746, "grad_norm": 1.1221368647275711, "learning_rate": 1.6241143240830014e-05, "loss": 0.8084, "step": 1600 }, { "epoch": 0.3071536487685556, "grad_norm": 0.840458083369212, "learning_rate": 1.6236286189916785e-05, "loss": 0.7946, "step": 1601 }, { "epoch": 0.30734549989208376, "grad_norm": 1.0454129315774787, "learning_rate": 1.6231426730299474e-05, "loss": 0.8429, "step": 1602 }, { "epoch": 0.30753735101561186, "grad_norm": 0.9671509062637034, "learning_rate": 1.6226564863855004e-05, "loss": 0.8376, "step": 1603 }, { "epoch": 0.30772920213914, "grad_norm": 1.127123061870388, "learning_rate": 1.6221700592461214e-05, "loss": 0.7637, "step": 1604 }, { "epoch": 0.30792105326266817, "grad_norm": 0.9001096167288029, "learning_rate": 1.6216833917996884e-05, "loss": 0.849, "step": 1605 }, { "epoch": 0.3081129043861963, "grad_norm": 0.8935846732619905, "learning_rate": 1.621196484234172e-05, "loss": 0.8558, "step": 1606 }, { "epoch": 0.30830475550972447, "grad_norm": 0.8759945384921592, "learning_rate": 1.6207093367376353e-05, "loss": 0.8267, "step": 1607 }, { "epoch": 0.3084966066332526, "grad_norm": 0.9828564906211124, "learning_rate": 1.620221949498235e-05, "loss": 0.7928, "step": 1608 }, { "epoch": 0.3086884577567807, "grad_norm": 1.0087079232820704, "learning_rate": 1.619734322704218e-05, "loss": 0.9158, "step": 1609 }, { "epoch": 0.3088803088803089, "grad_norm": 0.9543001062198215, "learning_rate": 1.619246456543927e-05, "loss": 0.8821, "step": 1610 }, { "epoch": 0.309072160003837, "grad_norm": 0.8509487692283784, "learning_rate": 1.618758351205794e-05, "loss": 0.8118, "step": 1611 }, { "epoch": 0.3092640111273652, "grad_norm": 0.8960730316871166, "learning_rate": 1.6182700068783463e-05, "loss": 0.7971, "step": 1612 }, { "epoch": 0.30945586225089333, "grad_norm": 0.908405365323263, "learning_rate": 1.6177814237502012e-05, "loss": 0.8531, "step": 1613 }, { "epoch": 0.30964771337442143, "grad_norm": 0.9698322349867115, "learning_rate": 1.6172926020100695e-05, "loss": 0.786, "step": 1614 }, { "epoch": 0.3098395644979496, "grad_norm": 0.8312077017221788, "learning_rate": 1.6168035418467535e-05, "loss": 0.7856, "step": 1615 }, { "epoch": 0.31003141562147774, "grad_norm": 0.8974249101294401, "learning_rate": 1.616314243449148e-05, "loss": 0.8437, "step": 1616 }, { "epoch": 0.3102232667450059, "grad_norm": 0.9725460754279407, "learning_rate": 1.6158247070062397e-05, "loss": 0.7852, "step": 1617 }, { "epoch": 0.31041511786853404, "grad_norm": 0.9779044996617474, "learning_rate": 1.6153349327071074e-05, "loss": 0.8519, "step": 1618 }, { "epoch": 0.31060696899206214, "grad_norm": 0.8453069434762399, "learning_rate": 1.614844920740921e-05, "loss": 0.8238, "step": 1619 }, { "epoch": 0.3107988201155903, "grad_norm": 0.9551535920852301, "learning_rate": 1.6143546712969436e-05, "loss": 0.8227, "step": 1620 }, { "epoch": 0.31099067123911844, "grad_norm": 0.8909154269879743, "learning_rate": 1.6138641845645288e-05, "loss": 0.8083, "step": 1621 }, { "epoch": 0.3111825223626466, "grad_norm": 0.8224365220981588, "learning_rate": 1.6133734607331223e-05, "loss": 0.7851, "step": 1622 }, { "epoch": 0.31137437348617475, "grad_norm": 1.060985599438847, "learning_rate": 1.612882499992261e-05, "loss": 0.8587, "step": 1623 }, { "epoch": 0.31156622460970285, "grad_norm": 0.9700731020661918, "learning_rate": 1.6123913025315743e-05, "loss": 0.799, "step": 1624 }, { "epoch": 0.311758075733231, "grad_norm": 0.8906722065726919, "learning_rate": 1.611899868540782e-05, "loss": 0.3461, "step": 1625 }, { "epoch": 0.31194992685675915, "grad_norm": 0.952399338774699, "learning_rate": 1.6114081982096955e-05, "loss": 0.8406, "step": 1626 }, { "epoch": 0.3121417779802873, "grad_norm": 1.057358017343743, "learning_rate": 1.610916291728218e-05, "loss": 0.7834, "step": 1627 }, { "epoch": 0.31233362910381546, "grad_norm": 1.0391516409768282, "learning_rate": 1.6104241492863437e-05, "loss": 0.8585, "step": 1628 }, { "epoch": 0.31252548022734356, "grad_norm": 0.8789994009788589, "learning_rate": 1.6099317710741565e-05, "loss": 0.7871, "step": 1629 }, { "epoch": 0.3127173313508717, "grad_norm": 1.0778026385848787, "learning_rate": 1.6094391572818345e-05, "loss": 0.9095, "step": 1630 }, { "epoch": 0.31290918247439986, "grad_norm": 1.0202620840655603, "learning_rate": 1.6089463080996432e-05, "loss": 0.8314, "step": 1631 }, { "epoch": 0.313101033597928, "grad_norm": 1.1040448919673582, "learning_rate": 1.608453223717942e-05, "loss": 0.8314, "step": 1632 }, { "epoch": 0.31329288472145617, "grad_norm": 0.975733128449985, "learning_rate": 1.607959904327179e-05, "loss": 0.7856, "step": 1633 }, { "epoch": 0.3134847358449843, "grad_norm": 1.1209942053834092, "learning_rate": 1.607466350117895e-05, "loss": 0.8368, "step": 1634 }, { "epoch": 0.3136765869685124, "grad_norm": 1.6010547799898918, "learning_rate": 1.6069725612807193e-05, "loss": 0.9173, "step": 1635 }, { "epoch": 0.31386843809204057, "grad_norm": 0.9959690371355304, "learning_rate": 1.6064785380063736e-05, "loss": 0.821, "step": 1636 }, { "epoch": 0.3140602892155687, "grad_norm": 1.0409779250151259, "learning_rate": 1.6059842804856693e-05, "loss": 0.8734, "step": 1637 }, { "epoch": 0.3142521403390969, "grad_norm": 1.088773119367386, "learning_rate": 1.605489788909509e-05, "loss": 0.9053, "step": 1638 }, { "epoch": 0.314443991462625, "grad_norm": 0.9681063297511199, "learning_rate": 1.6049950634688842e-05, "loss": 0.7979, "step": 1639 }, { "epoch": 0.3146358425861531, "grad_norm": 0.9848334486412698, "learning_rate": 1.6045001043548785e-05, "loss": 0.8606, "step": 1640 }, { "epoch": 0.3148276937096813, "grad_norm": 0.9417876166135545, "learning_rate": 1.6040049117586648e-05, "loss": 0.8884, "step": 1641 }, { "epoch": 0.31501954483320943, "grad_norm": 0.8386791412706713, "learning_rate": 1.6035094858715065e-05, "loss": 0.848, "step": 1642 }, { "epoch": 0.3152113959567376, "grad_norm": 0.9665030907503288, "learning_rate": 1.6030138268847564e-05, "loss": 0.7868, "step": 1643 }, { "epoch": 0.31540324708026574, "grad_norm": 0.9984664045161847, "learning_rate": 1.6025179349898587e-05, "loss": 0.8538, "step": 1644 }, { "epoch": 0.31559509820379383, "grad_norm": 1.0761502545883959, "learning_rate": 1.6020218103783463e-05, "loss": 0.7845, "step": 1645 }, { "epoch": 0.315786949327322, "grad_norm": 0.9544552907201299, "learning_rate": 1.6015254532418425e-05, "loss": 0.8767, "step": 1646 }, { "epoch": 0.31597880045085014, "grad_norm": 1.034382849383631, "learning_rate": 1.6010288637720605e-05, "loss": 0.8246, "step": 1647 }, { "epoch": 0.3161706515743783, "grad_norm": 0.9866233430991767, "learning_rate": 1.6005320421608022e-05, "loss": 0.8765, "step": 1648 }, { "epoch": 0.31636250269790644, "grad_norm": 1.1323455564112712, "learning_rate": 1.6000349885999618e-05, "loss": 0.8086, "step": 1649 }, { "epoch": 0.31655435382143454, "grad_norm": 0.8622851462410567, "learning_rate": 1.59953770328152e-05, "loss": 0.8025, "step": 1650 }, { "epoch": 0.3167462049449627, "grad_norm": 0.9505047745980733, "learning_rate": 1.5990401863975488e-05, "loss": 0.8287, "step": 1651 }, { "epoch": 0.31693805606849085, "grad_norm": 0.9713130571604524, "learning_rate": 1.5985424381402087e-05, "loss": 0.8673, "step": 1652 }, { "epoch": 0.317129907192019, "grad_norm": 1.0426479114796847, "learning_rate": 1.5980444587017506e-05, "loss": 0.7698, "step": 1653 }, { "epoch": 0.31732175831554715, "grad_norm": 1.100328122871799, "learning_rate": 1.5975462482745143e-05, "loss": 0.7926, "step": 1654 }, { "epoch": 0.31751360943907525, "grad_norm": 0.9079371732165704, "learning_rate": 1.5970478070509278e-05, "loss": 0.838, "step": 1655 }, { "epoch": 0.3177054605626034, "grad_norm": 0.8689832302939504, "learning_rate": 1.5965491352235104e-05, "loss": 0.8519, "step": 1656 }, { "epoch": 0.31789731168613156, "grad_norm": 1.0186223622491102, "learning_rate": 1.5960502329848683e-05, "loss": 0.8528, "step": 1657 }, { "epoch": 0.3180891628096597, "grad_norm": 0.9186542993032573, "learning_rate": 1.5955511005276975e-05, "loss": 0.8202, "step": 1658 }, { "epoch": 0.31828101393318786, "grad_norm": 0.9638840879384364, "learning_rate": 1.595051738044783e-05, "loss": 0.7228, "step": 1659 }, { "epoch": 0.318472865056716, "grad_norm": 1.0182964127187046, "learning_rate": 1.594552145728999e-05, "loss": 0.8129, "step": 1660 }, { "epoch": 0.3186647161802441, "grad_norm": 0.967434266395649, "learning_rate": 1.5940523237733082e-05, "loss": 0.7444, "step": 1661 }, { "epoch": 0.31885656730377226, "grad_norm": 0.9234518091255398, "learning_rate": 1.5935522723707617e-05, "loss": 0.826, "step": 1662 }, { "epoch": 0.3190484184273004, "grad_norm": 0.9287953904059193, "learning_rate": 1.593051991714499e-05, "loss": 0.8504, "step": 1663 }, { "epoch": 0.31924026955082857, "grad_norm": 1.0065854667498544, "learning_rate": 1.5925514819977495e-05, "loss": 0.8002, "step": 1664 }, { "epoch": 0.3194321206743567, "grad_norm": 0.8303622843107318, "learning_rate": 1.5920507434138294e-05, "loss": 0.8589, "step": 1665 }, { "epoch": 0.3196239717978848, "grad_norm": 0.8760325617841693, "learning_rate": 1.5915497761561443e-05, "loss": 0.7964, "step": 1666 }, { "epoch": 0.319815822921413, "grad_norm": 0.9346979636484212, "learning_rate": 1.5910485804181878e-05, "loss": 0.7983, "step": 1667 }, { "epoch": 0.3200076740449411, "grad_norm": 0.94759333306239, "learning_rate": 1.5905471563935416e-05, "loss": 0.7765, "step": 1668 }, { "epoch": 0.3201995251684693, "grad_norm": 0.9030629703032186, "learning_rate": 1.5900455042758763e-05, "loss": 0.8008, "step": 1669 }, { "epoch": 0.32039137629199743, "grad_norm": 1.0508757993014086, "learning_rate": 1.58954362425895e-05, "loss": 0.7857, "step": 1670 }, { "epoch": 0.32058322741552553, "grad_norm": 0.9924208152128723, "learning_rate": 1.5890415165366084e-05, "loss": 0.8466, "step": 1671 }, { "epoch": 0.3207750785390537, "grad_norm": 0.9283654912962085, "learning_rate": 1.588539181302786e-05, "loss": 0.8383, "step": 1672 }, { "epoch": 0.32096692966258183, "grad_norm": 0.8580955040517579, "learning_rate": 1.588036618751505e-05, "loss": 0.8052, "step": 1673 }, { "epoch": 0.32115878078611, "grad_norm": 0.8499985106486015, "learning_rate": 1.587533829076875e-05, "loss": 0.8616, "step": 1674 }, { "epoch": 0.32135063190963814, "grad_norm": 0.9182726298482699, "learning_rate": 1.5870308124730936e-05, "loss": 0.8119, "step": 1675 }, { "epoch": 0.32154248303316624, "grad_norm": 0.9286550343031877, "learning_rate": 1.586527569134446e-05, "loss": 0.8159, "step": 1676 }, { "epoch": 0.3217343341566944, "grad_norm": 0.937047444154367, "learning_rate": 1.5860240992553052e-05, "loss": 0.7918, "step": 1677 }, { "epoch": 0.32192618528022254, "grad_norm": 0.8904518179679317, "learning_rate": 1.585520403030131e-05, "loss": 0.8018, "step": 1678 }, { "epoch": 0.3221180364037507, "grad_norm": 0.9326289168726303, "learning_rate": 1.5850164806534714e-05, "loss": 0.8548, "step": 1679 }, { "epoch": 0.32230988752727885, "grad_norm": 0.8852388236869053, "learning_rate": 1.5845123323199615e-05, "loss": 0.7389, "step": 1680 }, { "epoch": 0.32250173865080695, "grad_norm": 0.8714981147194086, "learning_rate": 1.5840079582243235e-05, "loss": 0.8335, "step": 1681 }, { "epoch": 0.3226935897743351, "grad_norm": 0.9778286206155632, "learning_rate": 1.583503358561367e-05, "loss": 0.8717, "step": 1682 }, { "epoch": 0.32288544089786325, "grad_norm": 1.1557781778453304, "learning_rate": 1.5829985335259887e-05, "loss": 0.8636, "step": 1683 }, { "epoch": 0.3230772920213914, "grad_norm": 0.916542221353148, "learning_rate": 1.582493483313172e-05, "loss": 0.8158, "step": 1684 }, { "epoch": 0.32326914314491956, "grad_norm": 0.8538689391450779, "learning_rate": 1.5819882081179883e-05, "loss": 0.3036, "step": 1685 }, { "epoch": 0.3234609942684477, "grad_norm": 0.8559513749907014, "learning_rate": 1.5814827081355943e-05, "loss": 0.7937, "step": 1686 }, { "epoch": 0.3236528453919758, "grad_norm": 1.0211954523989824, "learning_rate": 1.580976983561235e-05, "loss": 0.7782, "step": 1687 }, { "epoch": 0.32384469651550396, "grad_norm": 1.0142976979375173, "learning_rate": 1.5804710345902406e-05, "loss": 0.8576, "step": 1688 }, { "epoch": 0.3240365476390321, "grad_norm": 1.0071044594875604, "learning_rate": 1.5799648614180302e-05, "loss": 0.7639, "step": 1689 }, { "epoch": 0.32422839876256027, "grad_norm": 1.0950985345017394, "learning_rate": 1.5794584642401076e-05, "loss": 0.8703, "step": 1690 }, { "epoch": 0.3244202498860884, "grad_norm": 1.04162185345971, "learning_rate": 1.5789518432520635e-05, "loss": 0.81, "step": 1691 }, { "epoch": 0.3246121010096165, "grad_norm": 1.0165485973487176, "learning_rate": 1.5784449986495755e-05, "loss": 0.8046, "step": 1692 }, { "epoch": 0.32480395213314467, "grad_norm": 0.9411344841269614, "learning_rate": 1.5779379306284074e-05, "loss": 0.8344, "step": 1693 }, { "epoch": 0.3249958032566728, "grad_norm": 1.1208215965162414, "learning_rate": 1.577430639384409e-05, "loss": 0.7935, "step": 1694 }, { "epoch": 0.325187654380201, "grad_norm": 0.9767268402654011, "learning_rate": 1.5769231251135165e-05, "loss": 0.8302, "step": 1695 }, { "epoch": 0.3253795055037291, "grad_norm": 0.8377652073512112, "learning_rate": 1.5764153880117528e-05, "loss": 0.7946, "step": 1696 }, { "epoch": 0.3255713566272572, "grad_norm": 0.8421480388103907, "learning_rate": 1.5759074282752254e-05, "loss": 0.8565, "step": 1697 }, { "epoch": 0.3257632077507854, "grad_norm": 0.9430754053056242, "learning_rate": 1.57539924610013e-05, "loss": 0.8436, "step": 1698 }, { "epoch": 0.32595505887431353, "grad_norm": 0.90904868431038, "learning_rate": 1.574890841682746e-05, "loss": 0.8392, "step": 1699 }, { "epoch": 0.3261469099978417, "grad_norm": 0.7466859052456158, "learning_rate": 1.5743822152194402e-05, "loss": 0.3258, "step": 1700 }, { "epoch": 0.32633876112136984, "grad_norm": 0.8540246891324551, "learning_rate": 1.5738733669066642e-05, "loss": 0.7588, "step": 1701 }, { "epoch": 0.32653061224489793, "grad_norm": 0.9620815006507262, "learning_rate": 1.5733642969409553e-05, "loss": 0.8081, "step": 1702 }, { "epoch": 0.3267224633684261, "grad_norm": 0.921437882312454, "learning_rate": 1.5728550055189374e-05, "loss": 0.8023, "step": 1703 }, { "epoch": 0.32691431449195424, "grad_norm": 0.8797632851313713, "learning_rate": 1.572345492837319e-05, "loss": 0.8542, "step": 1704 }, { "epoch": 0.3271061656154824, "grad_norm": 0.896476140629798, "learning_rate": 1.5718357590928945e-05, "loss": 0.7949, "step": 1705 }, { "epoch": 0.32729801673901054, "grad_norm": 0.9914408850287956, "learning_rate": 1.5713258044825437e-05, "loss": 0.821, "step": 1706 }, { "epoch": 0.3274898678625387, "grad_norm": 0.9425352621385975, "learning_rate": 1.5708156292032308e-05, "loss": 0.7888, "step": 1707 }, { "epoch": 0.3276817189860668, "grad_norm": 1.1608314276391487, "learning_rate": 1.5703052334520068e-05, "loss": 0.8235, "step": 1708 }, { "epoch": 0.32787357010959495, "grad_norm": 0.89907480368359, "learning_rate": 1.5697946174260065e-05, "loss": 0.8622, "step": 1709 }, { "epoch": 0.3280654212331231, "grad_norm": 1.0349545435746097, "learning_rate": 1.5692837813224502e-05, "loss": 0.8255, "step": 1710 }, { "epoch": 0.32825727235665125, "grad_norm": 0.9618725374469226, "learning_rate": 1.5687727253386443e-05, "loss": 0.8125, "step": 1711 }, { "epoch": 0.3284491234801794, "grad_norm": 0.950821581167267, "learning_rate": 1.568261449671978e-05, "loss": 0.8292, "step": 1712 }, { "epoch": 0.3286409746037075, "grad_norm": 0.9282194785785218, "learning_rate": 1.567749954519927e-05, "loss": 0.8057, "step": 1713 }, { "epoch": 0.32883282572723566, "grad_norm": 0.9097039097000623, "learning_rate": 1.5672382400800514e-05, "loss": 0.8831, "step": 1714 }, { "epoch": 0.3290246768507638, "grad_norm": 0.9091283968571121, "learning_rate": 1.5667263065499954e-05, "loss": 0.8612, "step": 1715 }, { "epoch": 0.32921652797429196, "grad_norm": 0.919144406691309, "learning_rate": 1.5662141541274886e-05, "loss": 0.8114, "step": 1716 }, { "epoch": 0.3294083790978201, "grad_norm": 0.8604609821899977, "learning_rate": 1.5657017830103448e-05, "loss": 0.7884, "step": 1717 }, { "epoch": 0.3296002302213482, "grad_norm": 0.6802375417125089, "learning_rate": 1.5651891933964623e-05, "loss": 0.3401, "step": 1718 }, { "epoch": 0.32979208134487636, "grad_norm": 1.014334211626853, "learning_rate": 1.5646763854838233e-05, "loss": 0.7628, "step": 1719 }, { "epoch": 0.3299839324684045, "grad_norm": 0.9647134612147358, "learning_rate": 1.564163359470496e-05, "loss": 0.829, "step": 1720 }, { "epoch": 0.33017578359193267, "grad_norm": 1.1173773972955354, "learning_rate": 1.5636501155546303e-05, "loss": 0.865, "step": 1721 }, { "epoch": 0.3303676347154608, "grad_norm": 1.0138284264030561, "learning_rate": 1.5631366539344622e-05, "loss": 0.8126, "step": 1722 }, { "epoch": 0.3305594858389889, "grad_norm": 1.0855193310482611, "learning_rate": 1.5626229748083116e-05, "loss": 0.8268, "step": 1723 }, { "epoch": 0.3307513369625171, "grad_norm": 0.9155328880465308, "learning_rate": 1.562109078374581e-05, "loss": 0.7453, "step": 1724 }, { "epoch": 0.3309431880860452, "grad_norm": 1.0371791477334875, "learning_rate": 1.561594964831759e-05, "loss": 0.8923, "step": 1725 }, { "epoch": 0.3311350392095734, "grad_norm": 0.8547599675620361, "learning_rate": 1.561080634378416e-05, "loss": 0.8227, "step": 1726 }, { "epoch": 0.33132689033310153, "grad_norm": 0.8726974137977447, "learning_rate": 1.5605660872132074e-05, "loss": 0.8218, "step": 1727 }, { "epoch": 0.33151874145662963, "grad_norm": 0.9246052127994246, "learning_rate": 1.560051323534872e-05, "loss": 0.8168, "step": 1728 }, { "epoch": 0.3317105925801578, "grad_norm": 0.8874937829196134, "learning_rate": 1.5595363435422323e-05, "loss": 0.8419, "step": 1729 }, { "epoch": 0.33190244370368593, "grad_norm": 0.9280130310612493, "learning_rate": 1.559021147434194e-05, "loss": 0.7524, "step": 1730 }, { "epoch": 0.3320942948272141, "grad_norm": 0.8160501557599952, "learning_rate": 1.5585057354097465e-05, "loss": 0.7609, "step": 1731 }, { "epoch": 0.33228614595074224, "grad_norm": 0.9925777936705177, "learning_rate": 1.5579901076679625e-05, "loss": 0.8466, "step": 1732 }, { "epoch": 0.3324779970742704, "grad_norm": 0.9821945856612877, "learning_rate": 1.5574742644079984e-05, "loss": 0.8248, "step": 1733 }, { "epoch": 0.3326698481977985, "grad_norm": 0.8685298420784827, "learning_rate": 1.556958205829094e-05, "loss": 0.8106, "step": 1734 }, { "epoch": 0.33286169932132664, "grad_norm": 1.0185388114034277, "learning_rate": 1.556441932130571e-05, "loss": 0.8644, "step": 1735 }, { "epoch": 0.3330535504448548, "grad_norm": 0.8942169264085459, "learning_rate": 1.5559254435118354e-05, "loss": 0.864, "step": 1736 }, { "epoch": 0.33324540156838295, "grad_norm": 1.0017834588969143, "learning_rate": 1.5554087401723758e-05, "loss": 0.7721, "step": 1737 }, { "epoch": 0.3334372526919111, "grad_norm": 0.9159209325315378, "learning_rate": 1.5548918223117642e-05, "loss": 0.8408, "step": 1738 }, { "epoch": 0.3336291038154392, "grad_norm": 0.9614099130334467, "learning_rate": 1.5543746901296545e-05, "loss": 0.8098, "step": 1739 }, { "epoch": 0.33382095493896735, "grad_norm": 0.8879917550917132, "learning_rate": 1.553857343825784e-05, "loss": 0.8803, "step": 1740 }, { "epoch": 0.3340128060624955, "grad_norm": 0.8571970373460563, "learning_rate": 1.5533397835999724e-05, "loss": 0.816, "step": 1741 }, { "epoch": 0.33420465718602366, "grad_norm": 1.1790235761618455, "learning_rate": 1.5528220096521227e-05, "loss": 0.8374, "step": 1742 }, { "epoch": 0.3343965083095518, "grad_norm": 1.1312538021581078, "learning_rate": 1.5523040221822197e-05, "loss": 0.7701, "step": 1743 }, { "epoch": 0.3345883594330799, "grad_norm": 0.8406504287582202, "learning_rate": 1.551785821390331e-05, "loss": 0.7632, "step": 1744 }, { "epoch": 0.33478021055660806, "grad_norm": 0.941754464474301, "learning_rate": 1.551267407476607e-05, "loss": 0.8694, "step": 1745 }, { "epoch": 0.3349720616801362, "grad_norm": 0.9367444867967479, "learning_rate": 1.5507487806412792e-05, "loss": 0.787, "step": 1746 }, { "epoch": 0.33516391280366437, "grad_norm": 1.1173331105502122, "learning_rate": 1.5502299410846626e-05, "loss": 0.8555, "step": 1747 }, { "epoch": 0.3353557639271925, "grad_norm": 0.8949060100784766, "learning_rate": 1.5497108890071535e-05, "loss": 0.8413, "step": 1748 }, { "epoch": 0.3355476150507206, "grad_norm": 0.8809570734453649, "learning_rate": 1.549191624609231e-05, "loss": 0.8056, "step": 1749 }, { "epoch": 0.33573946617424877, "grad_norm": 0.8850993345080131, "learning_rate": 1.5486721480914556e-05, "loss": 0.8029, "step": 1750 }, { "epoch": 0.3359313172977769, "grad_norm": 0.9847292874430438, "learning_rate": 1.5481524596544705e-05, "loss": 0.8659, "step": 1751 }, { "epoch": 0.3361231684213051, "grad_norm": 0.9782975371298501, "learning_rate": 1.5476325594989998e-05, "loss": 0.8583, "step": 1752 }, { "epoch": 0.3363150195448332, "grad_norm": 0.8284985606584923, "learning_rate": 1.5471124478258496e-05, "loss": 0.7331, "step": 1753 }, { "epoch": 0.3365068706683613, "grad_norm": 0.9209463947578574, "learning_rate": 1.546592124835908e-05, "loss": 0.7948, "step": 1754 }, { "epoch": 0.3366987217918895, "grad_norm": 0.8203822114394956, "learning_rate": 1.5460715907301453e-05, "loss": 0.7597, "step": 1755 }, { "epoch": 0.33689057291541763, "grad_norm": 0.980799718631655, "learning_rate": 1.545550845709612e-05, "loss": 0.8577, "step": 1756 }, { "epoch": 0.3370824240389458, "grad_norm": 0.9679377367580555, "learning_rate": 1.5450298899754407e-05, "loss": 0.803, "step": 1757 }, { "epoch": 0.33727427516247394, "grad_norm": 0.6980882751443537, "learning_rate": 1.5445087237288457e-05, "loss": 0.2863, "step": 1758 }, { "epoch": 0.3374661262860021, "grad_norm": 0.8916650650944302, "learning_rate": 1.5439873471711222e-05, "loss": 0.7495, "step": 1759 }, { "epoch": 0.3376579774095302, "grad_norm": 0.9259128926798883, "learning_rate": 1.543465760503647e-05, "loss": 0.7798, "step": 1760 }, { "epoch": 0.33784982853305834, "grad_norm": 0.9166488359585525, "learning_rate": 1.5429439639278772e-05, "loss": 0.8578, "step": 1761 }, { "epoch": 0.3380416796565865, "grad_norm": 1.0427018334531486, "learning_rate": 1.5424219576453526e-05, "loss": 0.8463, "step": 1762 }, { "epoch": 0.33823353078011464, "grad_norm": 0.8830733972549358, "learning_rate": 1.541899741857692e-05, "loss": 0.8111, "step": 1763 }, { "epoch": 0.3384253819036428, "grad_norm": 0.9653559572863338, "learning_rate": 1.5413773167665968e-05, "loss": 0.8133, "step": 1764 }, { "epoch": 0.3386172330271709, "grad_norm": 0.9799092684014963, "learning_rate": 1.5408546825738477e-05, "loss": 0.8286, "step": 1765 }, { "epoch": 0.33880908415069905, "grad_norm": 0.9219133851891725, "learning_rate": 1.540331839481308e-05, "loss": 0.8201, "step": 1766 }, { "epoch": 0.3390009352742272, "grad_norm": 0.9262820779112575, "learning_rate": 1.53980878769092e-05, "loss": 0.8176, "step": 1767 }, { "epoch": 0.33919278639775535, "grad_norm": 0.6981702173567524, "learning_rate": 1.5392855274047074e-05, "loss": 0.3204, "step": 1768 }, { "epoch": 0.3393846375212835, "grad_norm": 0.9837612105779967, "learning_rate": 1.5387620588247752e-05, "loss": 0.8609, "step": 1769 }, { "epoch": 0.3395764886448116, "grad_norm": 0.9700096337456979, "learning_rate": 1.538238382153306e-05, "loss": 0.8734, "step": 1770 }, { "epoch": 0.33976833976833976, "grad_norm": 1.0063242556490248, "learning_rate": 1.5377144975925672e-05, "loss": 0.8976, "step": 1771 }, { "epoch": 0.3399601908918679, "grad_norm": 0.852882133430345, "learning_rate": 1.537190405344902e-05, "loss": 0.8188, "step": 1772 }, { "epoch": 0.34015204201539606, "grad_norm": 0.9963545633363715, "learning_rate": 1.536666105612737e-05, "loss": 0.817, "step": 1773 }, { "epoch": 0.3403438931389242, "grad_norm": 1.1177395003615187, "learning_rate": 1.5361415985985778e-05, "loss": 0.836, "step": 1774 }, { "epoch": 0.3405357442624523, "grad_norm": 1.0064296517398297, "learning_rate": 1.5356168845050094e-05, "loss": 0.7768, "step": 1775 }, { "epoch": 0.34072759538598046, "grad_norm": 0.8446814855427456, "learning_rate": 1.535091963534698e-05, "loss": 0.8121, "step": 1776 }, { "epoch": 0.3409194465095086, "grad_norm": 0.9954007852591016, "learning_rate": 1.5345668358903886e-05, "loss": 0.7908, "step": 1777 }, { "epoch": 0.34111129763303677, "grad_norm": 0.9500584689610135, "learning_rate": 1.534041501774907e-05, "loss": 0.8557, "step": 1778 }, { "epoch": 0.3413031487565649, "grad_norm": 0.9392979828862563, "learning_rate": 1.5335159613911587e-05, "loss": 0.8089, "step": 1779 }, { "epoch": 0.3414949998800931, "grad_norm": 0.8793810348803452, "learning_rate": 1.5329902149421277e-05, "loss": 0.8271, "step": 1780 }, { "epoch": 0.3416868510036212, "grad_norm": 0.8187026941516966, "learning_rate": 1.532464262630879e-05, "loss": 0.8342, "step": 1781 }, { "epoch": 0.3418787021271493, "grad_norm": 0.8593042889760686, "learning_rate": 1.531938104660556e-05, "loss": 0.8535, "step": 1782 }, { "epoch": 0.3420705532506775, "grad_norm": 0.9162836049753801, "learning_rate": 1.5314117412343824e-05, "loss": 0.8004, "step": 1783 }, { "epoch": 0.34226240437420563, "grad_norm": 0.9750328815833348, "learning_rate": 1.5308851725556606e-05, "loss": 0.8244, "step": 1784 }, { "epoch": 0.3424542554977338, "grad_norm": 0.8484866334184726, "learning_rate": 1.530358398827773e-05, "loss": 0.833, "step": 1785 }, { "epoch": 0.3426461066212619, "grad_norm": 0.9484786438504035, "learning_rate": 1.5298314202541807e-05, "loss": 0.803, "step": 1786 }, { "epoch": 0.34283795774479003, "grad_norm": 0.9447654572219323, "learning_rate": 1.529304237038423e-05, "loss": 0.8157, "step": 1787 }, { "epoch": 0.3430298088683182, "grad_norm": 1.1051337877285994, "learning_rate": 1.5287768493841207e-05, "loss": 0.849, "step": 1788 }, { "epoch": 0.34322165999184634, "grad_norm": 1.0712122788996614, "learning_rate": 1.528249257494971e-05, "loss": 0.8384, "step": 1789 }, { "epoch": 0.3434135111153745, "grad_norm": 0.9934819386427871, "learning_rate": 1.5277214615747516e-05, "loss": 0.7805, "step": 1790 }, { "epoch": 0.3436053622389026, "grad_norm": 0.9126709490635213, "learning_rate": 1.5271934618273182e-05, "loss": 0.7709, "step": 1791 }, { "epoch": 0.34379721336243074, "grad_norm": 1.0307184129231437, "learning_rate": 1.5266652584566056e-05, "loss": 0.8878, "step": 1792 }, { "epoch": 0.3439890644859589, "grad_norm": 0.9295959803203753, "learning_rate": 1.5261368516666274e-05, "loss": 0.8454, "step": 1793 }, { "epoch": 0.34418091560948705, "grad_norm": 0.9323875118016187, "learning_rate": 1.525608241661475e-05, "loss": 0.7722, "step": 1794 }, { "epoch": 0.3443727667330152, "grad_norm": 0.9439212959223138, "learning_rate": 1.5250794286453192e-05, "loss": 0.8681, "step": 1795 }, { "epoch": 0.3445646178565433, "grad_norm": 0.8297951518302856, "learning_rate": 1.5245504128224089e-05, "loss": 0.8358, "step": 1796 }, { "epoch": 0.34475646898007145, "grad_norm": 0.9133897817586447, "learning_rate": 1.5240211943970708e-05, "loss": 0.8153, "step": 1797 }, { "epoch": 0.3449483201035996, "grad_norm": 0.954140515890628, "learning_rate": 1.5234917735737108e-05, "loss": 0.8362, "step": 1798 }, { "epoch": 0.34514017122712776, "grad_norm": 1.038892932275658, "learning_rate": 1.5229621505568118e-05, "loss": 0.7504, "step": 1799 }, { "epoch": 0.3453320223506559, "grad_norm": 0.8364521931445027, "learning_rate": 1.5224323255509361e-05, "loss": 0.8445, "step": 1800 }, { "epoch": 0.345523873474184, "grad_norm": 1.0185078752955312, "learning_rate": 1.5219022987607233e-05, "loss": 0.8794, "step": 1801 }, { "epoch": 0.34571572459771216, "grad_norm": 0.8669026750796328, "learning_rate": 1.5213720703908905e-05, "loss": 0.7393, "step": 1802 }, { "epoch": 0.3459075757212403, "grad_norm": 1.0276150458278377, "learning_rate": 1.5208416406462341e-05, "loss": 0.8148, "step": 1803 }, { "epoch": 0.34609942684476847, "grad_norm": 0.9957774534472383, "learning_rate": 1.5203110097316267e-05, "loss": 0.8556, "step": 1804 }, { "epoch": 0.3462912779682966, "grad_norm": 1.0331346796673573, "learning_rate": 1.5197801778520194e-05, "loss": 0.8026, "step": 1805 }, { "epoch": 0.34648312909182477, "grad_norm": 0.7064594275973419, "learning_rate": 1.5192491452124408e-05, "loss": 0.3511, "step": 1806 }, { "epoch": 0.34667498021535287, "grad_norm": 1.0872286373269133, "learning_rate": 1.5187179120179969e-05, "loss": 0.7777, "step": 1807 }, { "epoch": 0.346866831338881, "grad_norm": 0.8380594802005882, "learning_rate": 1.5181864784738713e-05, "loss": 0.8077, "step": 1808 }, { "epoch": 0.3470586824624092, "grad_norm": 0.9901978758453309, "learning_rate": 1.5176548447853252e-05, "loss": 0.8382, "step": 1809 }, { "epoch": 0.3472505335859373, "grad_norm": 0.9799510431322562, "learning_rate": 1.5171230111576969e-05, "loss": 0.8252, "step": 1810 }, { "epoch": 0.3474423847094655, "grad_norm": 1.0739598742468208, "learning_rate": 1.5165909777964017e-05, "loss": 0.8569, "step": 1811 }, { "epoch": 0.3476342358329936, "grad_norm": 0.8484099685896265, "learning_rate": 1.5160587449069321e-05, "loss": 0.7798, "step": 1812 }, { "epoch": 0.34782608695652173, "grad_norm": 0.9312112818366945, "learning_rate": 1.5155263126948583e-05, "loss": 0.9012, "step": 1813 }, { "epoch": 0.3480179380800499, "grad_norm": 0.9753375972612769, "learning_rate": 1.5149936813658264e-05, "loss": 0.7929, "step": 1814 }, { "epoch": 0.34820978920357804, "grad_norm": 0.9549507032573142, "learning_rate": 1.5144608511255605e-05, "loss": 0.8003, "step": 1815 }, { "epoch": 0.3484016403271062, "grad_norm": 0.9595189921755302, "learning_rate": 1.5139278221798605e-05, "loss": 0.8583, "step": 1816 }, { "epoch": 0.3485934914506343, "grad_norm": 0.914675357814637, "learning_rate": 1.513394594734604e-05, "loss": 0.8011, "step": 1817 }, { "epoch": 0.34878534257416244, "grad_norm": 0.9551133535289449, "learning_rate": 1.5128611689957451e-05, "loss": 0.7314, "step": 1818 }, { "epoch": 0.3489771936976906, "grad_norm": 0.9431934996770213, "learning_rate": 1.5123275451693136e-05, "loss": 0.8679, "step": 1819 }, { "epoch": 0.34916904482121874, "grad_norm": 1.0735693160336537, "learning_rate": 1.5117937234614167e-05, "loss": 0.8307, "step": 1820 }, { "epoch": 0.3493608959447469, "grad_norm": 0.8733606460402723, "learning_rate": 1.5112597040782377e-05, "loss": 0.8299, "step": 1821 }, { "epoch": 0.349552747068275, "grad_norm": 0.9812893539213337, "learning_rate": 1.5107254872260366e-05, "loss": 0.8501, "step": 1822 }, { "epoch": 0.34974459819180315, "grad_norm": 1.1037146755393783, "learning_rate": 1.510191073111149e-05, "loss": 0.8148, "step": 1823 }, { "epoch": 0.3499364493153313, "grad_norm": 1.0692925686103474, "learning_rate": 1.5096564619399877e-05, "loss": 0.896, "step": 1824 }, { "epoch": 0.35012830043885945, "grad_norm": 0.74315320137781, "learning_rate": 1.5091216539190402e-05, "loss": 0.3057, "step": 1825 }, { "epoch": 0.3503201515623876, "grad_norm": 1.0242624637079651, "learning_rate": 1.5085866492548715e-05, "loss": 0.8056, "step": 1826 }, { "epoch": 0.3505120026859157, "grad_norm": 1.0523818490446903, "learning_rate": 1.5080514481541214e-05, "loss": 0.7802, "step": 1827 }, { "epoch": 0.35070385380944386, "grad_norm": 0.9617213918729521, "learning_rate": 1.5075160508235062e-05, "loss": 0.8135, "step": 1828 }, { "epoch": 0.350895704932972, "grad_norm": 0.9415930695029963, "learning_rate": 1.5069804574698182e-05, "loss": 0.8284, "step": 1829 }, { "epoch": 0.35108755605650016, "grad_norm": 1.0980724185525461, "learning_rate": 1.5064446682999245e-05, "loss": 0.8023, "step": 1830 }, { "epoch": 0.3512794071800283, "grad_norm": 0.9326454624488397, "learning_rate": 1.5059086835207688e-05, "loss": 0.7704, "step": 1831 }, { "epoch": 0.35147125830355647, "grad_norm": 1.2360495231186275, "learning_rate": 1.5053725033393698e-05, "loss": 0.7165, "step": 1832 }, { "epoch": 0.35166310942708456, "grad_norm": 1.0301550483160944, "learning_rate": 1.5048361279628217e-05, "loss": 0.8001, "step": 1833 }, { "epoch": 0.3518549605506127, "grad_norm": 0.9150661889852102, "learning_rate": 1.5042995575982944e-05, "loss": 0.7873, "step": 1834 }, { "epoch": 0.35204681167414087, "grad_norm": 1.0807691477640329, "learning_rate": 1.5037627924530326e-05, "loss": 0.7955, "step": 1835 }, { "epoch": 0.352238662797669, "grad_norm": 0.9983796708672572, "learning_rate": 1.5032258327343567e-05, "loss": 0.7998, "step": 1836 }, { "epoch": 0.3524305139211972, "grad_norm": 0.8795645292627977, "learning_rate": 1.5026886786496624e-05, "loss": 0.8319, "step": 1837 }, { "epoch": 0.35262236504472527, "grad_norm": 0.912885274415649, "learning_rate": 1.5021513304064197e-05, "loss": 0.8552, "step": 1838 }, { "epoch": 0.3528142161682534, "grad_norm": 0.9713412915674005, "learning_rate": 1.5016137882121746e-05, "loss": 0.8612, "step": 1839 }, { "epoch": 0.3530060672917816, "grad_norm": 0.9556324229256726, "learning_rate": 1.501076052274547e-05, "loss": 0.7816, "step": 1840 }, { "epoch": 0.35319791841530973, "grad_norm": 0.9503675733639274, "learning_rate": 1.5005381228012321e-05, "loss": 0.8297, "step": 1841 }, { "epoch": 0.3533897695388379, "grad_norm": 0.9347977588967998, "learning_rate": 1.5000000000000002e-05, "loss": 0.8371, "step": 1842 }, { "epoch": 0.353581620662366, "grad_norm": 1.0856654600274325, "learning_rate": 1.4994616840786956e-05, "loss": 0.8238, "step": 1843 }, { "epoch": 0.35377347178589413, "grad_norm": 0.903695250306737, "learning_rate": 1.4989231752452378e-05, "loss": 0.8579, "step": 1844 }, { "epoch": 0.3539653229094223, "grad_norm": 0.9038609110560083, "learning_rate": 1.4983844737076205e-05, "loss": 0.8666, "step": 1845 }, { "epoch": 0.35415717403295044, "grad_norm": 1.0124678728936778, "learning_rate": 1.4978455796739117e-05, "loss": 0.8287, "step": 1846 }, { "epoch": 0.3543490251564786, "grad_norm": 0.8801782965660486, "learning_rate": 1.497306493352254e-05, "loss": 0.7707, "step": 1847 }, { "epoch": 0.3545408762800067, "grad_norm": 0.9252365523852486, "learning_rate": 1.4967672149508641e-05, "loss": 0.8537, "step": 1848 }, { "epoch": 0.35473272740353484, "grad_norm": 0.8244219013063886, "learning_rate": 1.496227744678033e-05, "loss": 0.7761, "step": 1849 }, { "epoch": 0.354924578527063, "grad_norm": 0.8289196388639058, "learning_rate": 1.4956880827421256e-05, "loss": 0.3326, "step": 1850 }, { "epoch": 0.35511642965059115, "grad_norm": 0.8784249705188392, "learning_rate": 1.4951482293515815e-05, "loss": 0.8044, "step": 1851 }, { "epoch": 0.3553082807741193, "grad_norm": 1.0029441561685002, "learning_rate": 1.4946081847149134e-05, "loss": 0.8785, "step": 1852 }, { "epoch": 0.3555001318976474, "grad_norm": 0.8611114172310798, "learning_rate": 1.4940679490407084e-05, "loss": 0.7523, "step": 1853 }, { "epoch": 0.35569198302117555, "grad_norm": 0.6802597682619211, "learning_rate": 1.493527522537627e-05, "loss": 0.3017, "step": 1854 }, { "epoch": 0.3558838341447037, "grad_norm": 0.9856083589722174, "learning_rate": 1.4929869054144033e-05, "loss": 0.8441, "step": 1855 }, { "epoch": 0.35607568526823186, "grad_norm": 0.942996580067082, "learning_rate": 1.4924460978798463e-05, "loss": 0.8073, "step": 1856 }, { "epoch": 0.35626753639176, "grad_norm": 0.863803078690689, "learning_rate": 1.4919051001428369e-05, "loss": 0.8415, "step": 1857 }, { "epoch": 0.35645938751528816, "grad_norm": 0.9298465021945095, "learning_rate": 1.4913639124123297e-05, "loss": 0.8873, "step": 1858 }, { "epoch": 0.35665123863881626, "grad_norm": 0.6984131573874174, "learning_rate": 1.4908225348973541e-05, "loss": 0.3009, "step": 1859 }, { "epoch": 0.3568430897623444, "grad_norm": 0.954947341247497, "learning_rate": 1.4902809678070115e-05, "loss": 0.8696, "step": 1860 }, { "epoch": 0.35703494088587256, "grad_norm": 0.9426024602905133, "learning_rate": 1.4897392113504767e-05, "loss": 0.8141, "step": 1861 }, { "epoch": 0.3572267920094007, "grad_norm": 1.0554995385782335, "learning_rate": 1.4891972657369977e-05, "loss": 0.8522, "step": 1862 }, { "epoch": 0.35741864313292887, "grad_norm": 0.8638240007816956, "learning_rate": 1.4886551311758962e-05, "loss": 0.8166, "step": 1863 }, { "epoch": 0.35761049425645697, "grad_norm": 0.9344818932412334, "learning_rate": 1.4881128078765654e-05, "loss": 0.8523, "step": 1864 }, { "epoch": 0.3578023453799851, "grad_norm": 0.9869063451276119, "learning_rate": 1.4875702960484731e-05, "loss": 0.7998, "step": 1865 }, { "epoch": 0.3579941965035133, "grad_norm": 1.1071878759344356, "learning_rate": 1.4870275959011591e-05, "loss": 0.7815, "step": 1866 }, { "epoch": 0.3581860476270414, "grad_norm": 0.9649703867231231, "learning_rate": 1.4864847076442358e-05, "loss": 0.8862, "step": 1867 }, { "epoch": 0.3583778987505696, "grad_norm": 0.9572593084076303, "learning_rate": 1.4859416314873882e-05, "loss": 0.8834, "step": 1868 }, { "epoch": 0.3585697498740977, "grad_norm": 0.9894203310559407, "learning_rate": 1.4853983676403743e-05, "loss": 0.8141, "step": 1869 }, { "epoch": 0.35876160099762583, "grad_norm": 0.8745799945234893, "learning_rate": 1.4848549163130246e-05, "loss": 0.7669, "step": 1870 }, { "epoch": 0.358953452121154, "grad_norm": 1.0299007724397253, "learning_rate": 1.4843112777152415e-05, "loss": 0.8225, "step": 1871 }, { "epoch": 0.35914530324468213, "grad_norm": 0.8974660828790392, "learning_rate": 1.4837674520570003e-05, "loss": 0.8273, "step": 1872 }, { "epoch": 0.3593371543682103, "grad_norm": 0.9294397518120737, "learning_rate": 1.4832234395483484e-05, "loss": 0.8473, "step": 1873 }, { "epoch": 0.3595290054917384, "grad_norm": 1.1719942778043304, "learning_rate": 1.482679240399405e-05, "loss": 0.8616, "step": 1874 }, { "epoch": 0.35972085661526654, "grad_norm": 0.9123561427815164, "learning_rate": 1.482134854820362e-05, "loss": 0.8735, "step": 1875 }, { "epoch": 0.3599127077387947, "grad_norm": 0.8483803188503528, "learning_rate": 1.4815902830214826e-05, "loss": 0.9034, "step": 1876 }, { "epoch": 0.36010455886232284, "grad_norm": 0.9279391280325768, "learning_rate": 1.4810455252131019e-05, "loss": 0.874, "step": 1877 }, { "epoch": 0.360296409985851, "grad_norm": 0.9659084924659627, "learning_rate": 1.4805005816056283e-05, "loss": 0.7874, "step": 1878 }, { "epoch": 0.36048826110937915, "grad_norm": 0.9101903617238557, "learning_rate": 1.47995545240954e-05, "loss": 0.8406, "step": 1879 }, { "epoch": 0.36068011223290725, "grad_norm": 0.910508423362423, "learning_rate": 1.4794101378353884e-05, "loss": 0.8227, "step": 1880 }, { "epoch": 0.3608719633564354, "grad_norm": 1.0082777051055345, "learning_rate": 1.4788646380937957e-05, "loss": 0.7859, "step": 1881 }, { "epoch": 0.36106381447996355, "grad_norm": 0.7075572201066844, "learning_rate": 1.4783189533954555e-05, "loss": 0.3218, "step": 1882 }, { "epoch": 0.3612556656034917, "grad_norm": 0.8513830480816659, "learning_rate": 1.4777730839511335e-05, "loss": 0.8821, "step": 1883 }, { "epoch": 0.36144751672701986, "grad_norm": 0.9962784249878166, "learning_rate": 1.4772270299716658e-05, "loss": 0.8168, "step": 1884 }, { "epoch": 0.36163936785054795, "grad_norm": 0.8430071965461178, "learning_rate": 1.4766807916679613e-05, "loss": 0.8099, "step": 1885 }, { "epoch": 0.3618312189740761, "grad_norm": 1.069505233816428, "learning_rate": 1.4761343692509983e-05, "loss": 0.8258, "step": 1886 }, { "epoch": 0.36202307009760426, "grad_norm": 1.1671843193793512, "learning_rate": 1.4755877629318276e-05, "loss": 0.8615, "step": 1887 }, { "epoch": 0.3622149212211324, "grad_norm": 0.9485803589414636, "learning_rate": 1.4750409729215704e-05, "loss": 0.7943, "step": 1888 }, { "epoch": 0.36240677234466057, "grad_norm": 0.6050181655613456, "learning_rate": 1.474493999431419e-05, "loss": 0.3162, "step": 1889 }, { "epoch": 0.36259862346818866, "grad_norm": 0.8647031149709498, "learning_rate": 1.4739468426726365e-05, "loss": 0.8877, "step": 1890 }, { "epoch": 0.3627904745917168, "grad_norm": 0.9143090418818226, "learning_rate": 1.4733995028565564e-05, "loss": 0.7974, "step": 1891 }, { "epoch": 0.36298232571524497, "grad_norm": 0.91787023576503, "learning_rate": 1.4728519801945836e-05, "loss": 0.8444, "step": 1892 }, { "epoch": 0.3631741768387731, "grad_norm": 0.9107513335499653, "learning_rate": 1.4723042748981936e-05, "loss": 0.8514, "step": 1893 }, { "epoch": 0.3633660279623013, "grad_norm": 0.9820614993238028, "learning_rate": 1.4717563871789318e-05, "loss": 0.841, "step": 1894 }, { "epoch": 0.36355787908582937, "grad_norm": 1.1916033007330171, "learning_rate": 1.4712083172484146e-05, "loss": 0.8263, "step": 1895 }, { "epoch": 0.3637497302093575, "grad_norm": 0.9573035677954027, "learning_rate": 1.4706600653183287e-05, "loss": 0.775, "step": 1896 }, { "epoch": 0.3639415813328857, "grad_norm": 0.8887835712668439, "learning_rate": 1.4701116316004307e-05, "loss": 0.8158, "step": 1897 }, { "epoch": 0.36413343245641383, "grad_norm": 0.7186015543075173, "learning_rate": 1.469563016306548e-05, "loss": 0.3489, "step": 1898 }, { "epoch": 0.364325283579942, "grad_norm": 0.9291526335587842, "learning_rate": 1.4690142196485775e-05, "loss": 0.8508, "step": 1899 }, { "epoch": 0.3645171347034701, "grad_norm": 0.868372321089526, "learning_rate": 1.468465241838487e-05, "loss": 0.8807, "step": 1900 }, { "epoch": 0.36470898582699823, "grad_norm": 0.935583933934316, "learning_rate": 1.4679160830883135e-05, "loss": 0.8468, "step": 1901 }, { "epoch": 0.3649008369505264, "grad_norm": 1.04057805891294, "learning_rate": 1.4673667436101641e-05, "loss": 0.7661, "step": 1902 }, { "epoch": 0.36509268807405454, "grad_norm": 0.8925935434281479, "learning_rate": 1.4668172236162162e-05, "loss": 0.8056, "step": 1903 }, { "epoch": 0.3652845391975827, "grad_norm": 0.8497652907974886, "learning_rate": 1.4662675233187158e-05, "loss": 0.7966, "step": 1904 }, { "epoch": 0.36547639032111084, "grad_norm": 0.9434761504948651, "learning_rate": 1.4657176429299798e-05, "loss": 0.8572, "step": 1905 }, { "epoch": 0.36566824144463894, "grad_norm": 0.8576601531674329, "learning_rate": 1.4651675826623933e-05, "loss": 0.7998, "step": 1906 }, { "epoch": 0.3658600925681671, "grad_norm": 1.3615571086223117, "learning_rate": 1.4646173427284125e-05, "loss": 0.7642, "step": 1907 }, { "epoch": 0.36605194369169525, "grad_norm": 1.01960522354446, "learning_rate": 1.464066923340562e-05, "loss": 0.7677, "step": 1908 }, { "epoch": 0.3662437948152234, "grad_norm": 0.8809990463212201, "learning_rate": 1.4635163247114355e-05, "loss": 0.837, "step": 1909 }, { "epoch": 0.36643564593875155, "grad_norm": 0.9540414041183881, "learning_rate": 1.4629655470536965e-05, "loss": 0.9046, "step": 1910 }, { "epoch": 0.36662749706227965, "grad_norm": 1.0062150463703812, "learning_rate": 1.4624145905800773e-05, "loss": 0.8816, "step": 1911 }, { "epoch": 0.3668193481858078, "grad_norm": 0.9613863154623848, "learning_rate": 1.46186345550338e-05, "loss": 0.769, "step": 1912 }, { "epoch": 0.36701119930933596, "grad_norm": 0.870968452070177, "learning_rate": 1.4613121420364742e-05, "loss": 0.7301, "step": 1913 }, { "epoch": 0.3672030504328641, "grad_norm": 1.0355731491853246, "learning_rate": 1.4607606503922998e-05, "loss": 0.7565, "step": 1914 }, { "epoch": 0.36739490155639226, "grad_norm": 0.8947718078296832, "learning_rate": 1.4602089807838651e-05, "loss": 0.8642, "step": 1915 }, { "epoch": 0.36758675267992036, "grad_norm": 0.8744486487306051, "learning_rate": 1.4596571334242468e-05, "loss": 0.7894, "step": 1916 }, { "epoch": 0.3677786038034485, "grad_norm": 0.86683132788447, "learning_rate": 1.4591051085265907e-05, "loss": 0.8055, "step": 1917 }, { "epoch": 0.36797045492697666, "grad_norm": 0.9485046043111642, "learning_rate": 1.4585529063041107e-05, "loss": 0.797, "step": 1918 }, { "epoch": 0.3681623060505048, "grad_norm": 0.9829492448883178, "learning_rate": 1.4580005269700896e-05, "loss": 0.7781, "step": 1919 }, { "epoch": 0.36835415717403297, "grad_norm": 0.8866776152217881, "learning_rate": 1.4574479707378788e-05, "loss": 0.8368, "step": 1920 }, { "epoch": 0.36854600829756107, "grad_norm": 1.0078983219342352, "learning_rate": 1.4568952378208972e-05, "loss": 0.8239, "step": 1921 }, { "epoch": 0.3687378594210892, "grad_norm": 1.0040602862399155, "learning_rate": 1.4563423284326326e-05, "loss": 0.7637, "step": 1922 }, { "epoch": 0.3689297105446174, "grad_norm": 1.012171499253599, "learning_rate": 1.455789242786641e-05, "loss": 0.8514, "step": 1923 }, { "epoch": 0.3691215616681455, "grad_norm": 0.9517067990679101, "learning_rate": 1.455235981096546e-05, "loss": 0.846, "step": 1924 }, { "epoch": 0.3693134127916737, "grad_norm": 0.7383343179779923, "learning_rate": 1.4546825435760399e-05, "loss": 0.3511, "step": 1925 }, { "epoch": 0.3695052639152018, "grad_norm": 1.1543214329892215, "learning_rate": 1.4541289304388819e-05, "loss": 0.8342, "step": 1926 }, { "epoch": 0.36969711503872993, "grad_norm": 1.241943204915414, "learning_rate": 1.4535751418989e-05, "loss": 0.8575, "step": 1927 }, { "epoch": 0.3698889661622581, "grad_norm": 0.9047339377337937, "learning_rate": 1.4530211781699892e-05, "loss": 0.7825, "step": 1928 }, { "epoch": 0.37008081728578623, "grad_norm": 1.0011699788759425, "learning_rate": 1.4524670394661134e-05, "loss": 0.7482, "step": 1929 }, { "epoch": 0.3702726684093144, "grad_norm": 0.8441158871345358, "learning_rate": 1.4519127260013025e-05, "loss": 0.8195, "step": 1930 }, { "epoch": 0.37046451953284254, "grad_norm": 0.9208427011871475, "learning_rate": 1.4513582379896545e-05, "loss": 0.7965, "step": 1931 }, { "epoch": 0.37065637065637064, "grad_norm": 1.2987214139391032, "learning_rate": 1.4508035756453351e-05, "loss": 0.7762, "step": 1932 }, { "epoch": 0.3708482217798988, "grad_norm": 0.956022675695025, "learning_rate": 1.4502487391825773e-05, "loss": 0.7427, "step": 1933 }, { "epoch": 0.37104007290342694, "grad_norm": 0.7768378693580981, "learning_rate": 1.4496937288156814e-05, "loss": 0.7756, "step": 1934 }, { "epoch": 0.3712319240269551, "grad_norm": 0.94927192516387, "learning_rate": 1.4491385447590143e-05, "loss": 0.7809, "step": 1935 }, { "epoch": 0.37142377515048325, "grad_norm": 0.9258638644582284, "learning_rate": 1.4485831872270103e-05, "loss": 0.8213, "step": 1936 }, { "epoch": 0.37161562627401135, "grad_norm": 0.9006529312432199, "learning_rate": 1.4480276564341714e-05, "loss": 0.8525, "step": 1937 }, { "epoch": 0.3718074773975395, "grad_norm": 0.9158227778081405, "learning_rate": 1.4474719525950654e-05, "loss": 0.8234, "step": 1938 }, { "epoch": 0.37199932852106765, "grad_norm": 0.884358761506917, "learning_rate": 1.4469160759243272e-05, "loss": 0.778, "step": 1939 }, { "epoch": 0.3721911796445958, "grad_norm": 0.6734593508899414, "learning_rate": 1.4463600266366591e-05, "loss": 0.3674, "step": 1940 }, { "epoch": 0.37238303076812396, "grad_norm": 0.8878648057275191, "learning_rate": 1.4458038049468295e-05, "loss": 0.8499, "step": 1941 }, { "epoch": 0.37257488189165205, "grad_norm": 0.6796954190622916, "learning_rate": 1.4452474110696738e-05, "loss": 0.2911, "step": 1942 }, { "epoch": 0.3727667330151802, "grad_norm": 0.8347288779211952, "learning_rate": 1.4446908452200932e-05, "loss": 0.7353, "step": 1943 }, { "epoch": 0.37295858413870836, "grad_norm": 0.8715633327089441, "learning_rate": 1.4441341076130565e-05, "loss": 0.827, "step": 1944 }, { "epoch": 0.3731504352622365, "grad_norm": 0.9544797419515827, "learning_rate": 1.4435771984635971e-05, "loss": 0.7319, "step": 1945 }, { "epoch": 0.37334228638576467, "grad_norm": 0.8244164545788669, "learning_rate": 1.4430201179868167e-05, "loss": 0.8202, "step": 1946 }, { "epoch": 0.37353413750929276, "grad_norm": 0.9208953175479542, "learning_rate": 1.4424628663978813e-05, "loss": 0.8515, "step": 1947 }, { "epoch": 0.3737259886328209, "grad_norm": 0.8646758017114649, "learning_rate": 1.441905443912024e-05, "loss": 0.7639, "step": 1948 }, { "epoch": 0.37391783975634907, "grad_norm": 0.9511474311149491, "learning_rate": 1.4413478507445441e-05, "loss": 0.8296, "step": 1949 }, { "epoch": 0.3741096908798772, "grad_norm": 1.0281332027062717, "learning_rate": 1.4407900871108064e-05, "loss": 0.8324, "step": 1950 }, { "epoch": 0.3743015420034054, "grad_norm": 1.0416792418504346, "learning_rate": 1.4402321532262416e-05, "loss": 0.7823, "step": 1951 }, { "epoch": 0.3744933931269335, "grad_norm": 0.8873655566766727, "learning_rate": 1.4396740493063459e-05, "loss": 0.8152, "step": 1952 }, { "epoch": 0.3746852442504616, "grad_norm": 0.9389233465830512, "learning_rate": 1.4391157755666818e-05, "loss": 0.7555, "step": 1953 }, { "epoch": 0.3748770953739898, "grad_norm": 0.9885660156102912, "learning_rate": 1.4385573322228768e-05, "loss": 0.7437, "step": 1954 }, { "epoch": 0.37506894649751793, "grad_norm": 1.0701561445833598, "learning_rate": 1.4379987194906237e-05, "loss": 0.7868, "step": 1955 }, { "epoch": 0.3752607976210461, "grad_norm": 0.8920386112580588, "learning_rate": 1.4374399375856823e-05, "loss": 0.8491, "step": 1956 }, { "epoch": 0.37545264874457424, "grad_norm": 0.8702245141545434, "learning_rate": 1.4368809867238754e-05, "loss": 0.3011, "step": 1957 }, { "epoch": 0.37564449986810233, "grad_norm": 0.925398506849586, "learning_rate": 1.436321867121093e-05, "loss": 0.766, "step": 1958 }, { "epoch": 0.3758363509916305, "grad_norm": 0.8844864890265578, "learning_rate": 1.4357625789932894e-05, "loss": 0.697, "step": 1959 }, { "epoch": 0.37602820211515864, "grad_norm": 0.8846099120517038, "learning_rate": 1.4352031225564842e-05, "loss": 0.7529, "step": 1960 }, { "epoch": 0.3762200532386868, "grad_norm": 1.155919971533601, "learning_rate": 1.4346434980267616e-05, "loss": 0.8675, "step": 1961 }, { "epoch": 0.37641190436221494, "grad_norm": 0.6836180855525898, "learning_rate": 1.4340837056202712e-05, "loss": 0.3435, "step": 1962 }, { "epoch": 0.37660375548574304, "grad_norm": 1.0985556231701719, "learning_rate": 1.4335237455532277e-05, "loss": 0.7952, "step": 1963 }, { "epoch": 0.3767956066092712, "grad_norm": 0.8672313063139759, "learning_rate": 1.4329636180419094e-05, "loss": 0.8114, "step": 1964 }, { "epoch": 0.37698745773279935, "grad_norm": 1.0177019765606432, "learning_rate": 1.4324033233026606e-05, "loss": 0.7671, "step": 1965 }, { "epoch": 0.3771793088563275, "grad_norm": 0.9209617534899774, "learning_rate": 1.4318428615518895e-05, "loss": 0.8847, "step": 1966 }, { "epoch": 0.37737115997985565, "grad_norm": 0.9229116087335173, "learning_rate": 1.4312822330060689e-05, "loss": 0.8636, "step": 1967 }, { "epoch": 0.37756301110338375, "grad_norm": 0.8491769664059569, "learning_rate": 1.4307214378817362e-05, "loss": 0.861, "step": 1968 }, { "epoch": 0.3777548622269119, "grad_norm": 0.695764623905624, "learning_rate": 1.4301604763954929e-05, "loss": 0.3354, "step": 1969 }, { "epoch": 0.37794671335044006, "grad_norm": 0.9917555206650834, "learning_rate": 1.4295993487640043e-05, "loss": 0.8774, "step": 1970 }, { "epoch": 0.3781385644739682, "grad_norm": 0.8594648149275307, "learning_rate": 1.4290380552040013e-05, "loss": 0.7523, "step": 1971 }, { "epoch": 0.37833041559749636, "grad_norm": 0.8008910373555325, "learning_rate": 1.4284765959322772e-05, "loss": 0.8029, "step": 1972 }, { "epoch": 0.37852226672102446, "grad_norm": 1.1602705267048476, "learning_rate": 1.427914971165691e-05, "loss": 0.8542, "step": 1973 }, { "epoch": 0.3787141178445526, "grad_norm": 0.9617026221302496, "learning_rate": 1.4273531811211642e-05, "loss": 0.8271, "step": 1974 }, { "epoch": 0.37890596896808076, "grad_norm": 0.9857162976768031, "learning_rate": 1.4267912260156824e-05, "loss": 0.8249, "step": 1975 }, { "epoch": 0.3790978200916089, "grad_norm": 0.8902955641016688, "learning_rate": 1.4262291060662959e-05, "loss": 0.754, "step": 1976 }, { "epoch": 0.37928967121513707, "grad_norm": 0.8967955279955072, "learning_rate": 1.4256668214901172e-05, "loss": 0.7799, "step": 1977 }, { "epoch": 0.3794815223386652, "grad_norm": 0.8160873166599799, "learning_rate": 1.425104372504324e-05, "loss": 0.7903, "step": 1978 }, { "epoch": 0.3796733734621933, "grad_norm": 0.9466475805081621, "learning_rate": 1.4245417593261558e-05, "loss": 0.7692, "step": 1979 }, { "epoch": 0.3798652245857215, "grad_norm": 0.977095887202593, "learning_rate": 1.4239789821729177e-05, "loss": 0.8868, "step": 1980 }, { "epoch": 0.3800570757092496, "grad_norm": 0.88904416799773, "learning_rate": 1.4234160412619758e-05, "loss": 0.8084, "step": 1981 }, { "epoch": 0.3802489268327778, "grad_norm": 0.9009987597814955, "learning_rate": 1.42285293681076e-05, "loss": 0.821, "step": 1982 }, { "epoch": 0.38044077795630593, "grad_norm": 0.9288719871596332, "learning_rate": 1.422289669036765e-05, "loss": 0.8157, "step": 1983 }, { "epoch": 0.38063262907983403, "grad_norm": 0.9452552845238454, "learning_rate": 1.4217262381575469e-05, "loss": 0.7652, "step": 1984 }, { "epoch": 0.3808244802033622, "grad_norm": 0.6821913460693357, "learning_rate": 1.4211626443907256e-05, "loss": 0.3024, "step": 1985 }, { "epoch": 0.38101633132689033, "grad_norm": 1.0134644601457172, "learning_rate": 1.4205988879539832e-05, "loss": 0.8707, "step": 1986 }, { "epoch": 0.3812081824504185, "grad_norm": 0.6796689540443571, "learning_rate": 1.4200349690650654e-05, "loss": 0.3271, "step": 1987 }, { "epoch": 0.38140003357394664, "grad_norm": 1.1176762302621355, "learning_rate": 1.4194708879417802e-05, "loss": 0.789, "step": 1988 }, { "epoch": 0.38159188469747474, "grad_norm": 0.9178992101519539, "learning_rate": 1.4189066448019981e-05, "loss": 0.8689, "step": 1989 }, { "epoch": 0.3817837358210029, "grad_norm": 0.6567776275736072, "learning_rate": 1.418342239863653e-05, "loss": 0.31, "step": 1990 }, { "epoch": 0.38197558694453104, "grad_norm": 0.9028544865012966, "learning_rate": 1.4177776733447406e-05, "loss": 0.8415, "step": 1991 }, { "epoch": 0.3821674380680592, "grad_norm": 1.039341257225118, "learning_rate": 1.4172129454633188e-05, "loss": 0.8227, "step": 1992 }, { "epoch": 0.38235928919158735, "grad_norm": 0.9085490936797066, "learning_rate": 1.4166480564375084e-05, "loss": 0.8177, "step": 1993 }, { "epoch": 0.38255114031511545, "grad_norm": 1.1310605223799883, "learning_rate": 1.4160830064854925e-05, "loss": 0.8005, "step": 1994 }, { "epoch": 0.3827429914386436, "grad_norm": 0.7088500552056155, "learning_rate": 1.4155177958255158e-05, "loss": 0.3422, "step": 1995 }, { "epoch": 0.38293484256217175, "grad_norm": 1.033457555299066, "learning_rate": 1.4149524246758847e-05, "loss": 0.7987, "step": 1996 }, { "epoch": 0.3831266936856999, "grad_norm": 0.9977075161655127, "learning_rate": 1.4143868932549696e-05, "loss": 0.8105, "step": 1997 }, { "epoch": 0.38331854480922806, "grad_norm": 1.004591755781269, "learning_rate": 1.4138212017812006e-05, "loss": 0.8653, "step": 1998 }, { "epoch": 0.38351039593275615, "grad_norm": 1.164155385043789, "learning_rate": 1.4132553504730703e-05, "loss": 0.8435, "step": 1999 }, { "epoch": 0.3837022470562843, "grad_norm": 0.925587847198671, "learning_rate": 1.4126893395491336e-05, "loss": 0.8065, "step": 2000 }, { "epoch": 0.38389409817981246, "grad_norm": 0.9213650148472876, "learning_rate": 1.4121231692280066e-05, "loss": 0.7709, "step": 2001 }, { "epoch": 0.3840859493033406, "grad_norm": 0.8302519682830851, "learning_rate": 1.411556839728367e-05, "loss": 0.8187, "step": 2002 }, { "epoch": 0.38427780042686877, "grad_norm": 0.9975315720371827, "learning_rate": 1.410990351268954e-05, "loss": 0.8131, "step": 2003 }, { "epoch": 0.3844696515503969, "grad_norm": 0.8928120640641889, "learning_rate": 1.4104237040685678e-05, "loss": 0.8139, "step": 2004 }, { "epoch": 0.384661502673925, "grad_norm": 1.027545271274896, "learning_rate": 1.4098568983460707e-05, "loss": 0.7417, "step": 2005 }, { "epoch": 0.38485335379745317, "grad_norm": 0.8793231886356261, "learning_rate": 1.4092899343203856e-05, "loss": 0.8549, "step": 2006 }, { "epoch": 0.3850452049209813, "grad_norm": 1.014785868705646, "learning_rate": 1.408722812210497e-05, "loss": 0.891, "step": 2007 }, { "epoch": 0.3852370560445095, "grad_norm": 0.888040809222287, "learning_rate": 1.4081555322354501e-05, "loss": 0.8657, "step": 2008 }, { "epoch": 0.3854289071680376, "grad_norm": 0.9642704135325435, "learning_rate": 1.4075880946143514e-05, "loss": 0.8303, "step": 2009 }, { "epoch": 0.3856207582915657, "grad_norm": 0.9763534262916713, "learning_rate": 1.4070204995663677e-05, "loss": 0.8242, "step": 2010 }, { "epoch": 0.3858126094150939, "grad_norm": 0.7386730112925637, "learning_rate": 1.406452747310727e-05, "loss": 0.3111, "step": 2011 }, { "epoch": 0.38600446053862203, "grad_norm": 0.9892648022150394, "learning_rate": 1.4058848380667187e-05, "loss": 0.8212, "step": 2012 }, { "epoch": 0.3861963116621502, "grad_norm": 0.9759563478558263, "learning_rate": 1.4053167720536914e-05, "loss": 0.9188, "step": 2013 }, { "epoch": 0.38638816278567834, "grad_norm": 0.6766494050070996, "learning_rate": 1.4047485494910551e-05, "loss": 0.3562, "step": 2014 }, { "epoch": 0.38658001390920643, "grad_norm": 0.9491058630884045, "learning_rate": 1.4041801705982804e-05, "loss": 0.7639, "step": 2015 }, { "epoch": 0.3867718650327346, "grad_norm": 1.152058463594599, "learning_rate": 1.403611635594898e-05, "loss": 0.7801, "step": 2016 }, { "epoch": 0.38696371615626274, "grad_norm": 0.860233374135897, "learning_rate": 1.4030429447004992e-05, "loss": 0.8244, "step": 2017 }, { "epoch": 0.3871555672797909, "grad_norm": 0.9377186235497627, "learning_rate": 1.4024740981347344e-05, "loss": 0.9029, "step": 2018 }, { "epoch": 0.38734741840331904, "grad_norm": 0.9064018506228484, "learning_rate": 1.401905096117316e-05, "loss": 0.8258, "step": 2019 }, { "epoch": 0.38753926952684714, "grad_norm": 0.8506456384046333, "learning_rate": 1.4013359388680148e-05, "loss": 0.8415, "step": 2020 }, { "epoch": 0.3877311206503753, "grad_norm": 0.9232324131684526, "learning_rate": 1.4007666266066622e-05, "loss": 0.7897, "step": 2021 }, { "epoch": 0.38792297177390345, "grad_norm": 0.9784518719136869, "learning_rate": 1.4001971595531499e-05, "loss": 0.8597, "step": 2022 }, { "epoch": 0.3881148228974316, "grad_norm": 1.00903994449818, "learning_rate": 1.3996275379274284e-05, "loss": 0.8022, "step": 2023 }, { "epoch": 0.38830667402095975, "grad_norm": 0.9743297788705602, "learning_rate": 1.399057761949509e-05, "loss": 0.7418, "step": 2024 }, { "epoch": 0.3884985251444879, "grad_norm": 0.8675540400383055, "learning_rate": 1.3984878318394613e-05, "loss": 0.8323, "step": 2025 }, { "epoch": 0.388690376268016, "grad_norm": 0.9591098815554269, "learning_rate": 1.3979177478174158e-05, "loss": 0.8932, "step": 2026 }, { "epoch": 0.38888222739154416, "grad_norm": 1.0147844983989895, "learning_rate": 1.3973475101035616e-05, "loss": 0.8223, "step": 2027 }, { "epoch": 0.3890740785150723, "grad_norm": 0.8982158894145001, "learning_rate": 1.396777118918147e-05, "loss": 0.7561, "step": 2028 }, { "epoch": 0.38926592963860046, "grad_norm": 0.9124166895948957, "learning_rate": 1.3962065744814813e-05, "loss": 0.84, "step": 2029 }, { "epoch": 0.3894577807621286, "grad_norm": 0.8921046518332757, "learning_rate": 1.3956358770139304e-05, "loss": 0.8676, "step": 2030 }, { "epoch": 0.3896496318856567, "grad_norm": 0.9818724556365114, "learning_rate": 1.3950650267359209e-05, "loss": 0.8335, "step": 2031 }, { "epoch": 0.38984148300918486, "grad_norm": 0.926339351312681, "learning_rate": 1.3944940238679384e-05, "loss": 0.7128, "step": 2032 }, { "epoch": 0.390033334132713, "grad_norm": 1.057047503255708, "learning_rate": 1.3939228686305266e-05, "loss": 0.81, "step": 2033 }, { "epoch": 0.39022518525624117, "grad_norm": 0.9324617388803842, "learning_rate": 1.3933515612442889e-05, "loss": 0.7833, "step": 2034 }, { "epoch": 0.3904170363797693, "grad_norm": 0.9681871912805243, "learning_rate": 1.392780101929887e-05, "loss": 0.8069, "step": 2035 }, { "epoch": 0.3906088875032974, "grad_norm": 0.996899641222563, "learning_rate": 1.3922084909080418e-05, "loss": 0.8366, "step": 2036 }, { "epoch": 0.3908007386268256, "grad_norm": 0.8568105702760358, "learning_rate": 1.391636728399532e-05, "loss": 0.8107, "step": 2037 }, { "epoch": 0.3909925897503537, "grad_norm": 0.9114578467789733, "learning_rate": 1.3910648146251958e-05, "loss": 0.8736, "step": 2038 }, { "epoch": 0.3911844408738819, "grad_norm": 1.0628525005867007, "learning_rate": 1.3904927498059285e-05, "loss": 0.8041, "step": 2039 }, { "epoch": 0.39137629199741003, "grad_norm": 0.9592362739085118, "learning_rate": 1.3899205341626847e-05, "loss": 0.8015, "step": 2040 }, { "epoch": 0.39156814312093813, "grad_norm": 1.0375569246982237, "learning_rate": 1.3893481679164776e-05, "loss": 0.8912, "step": 2041 }, { "epoch": 0.3917599942444663, "grad_norm": 0.6968535743010699, "learning_rate": 1.3887756512883775e-05, "loss": 0.331, "step": 2042 }, { "epoch": 0.39195184536799443, "grad_norm": 1.0946539152565913, "learning_rate": 1.3882029844995131e-05, "loss": 0.9156, "step": 2043 }, { "epoch": 0.3921436964915226, "grad_norm": 1.030849870796569, "learning_rate": 1.387630167771072e-05, "loss": 0.8197, "step": 2044 }, { "epoch": 0.39233554761505074, "grad_norm": 0.6565896418364807, "learning_rate": 1.3870572013242986e-05, "loss": 0.2857, "step": 2045 }, { "epoch": 0.39252739873857884, "grad_norm": 0.8677681912772836, "learning_rate": 1.3864840853804958e-05, "loss": 0.822, "step": 2046 }, { "epoch": 0.392719249862107, "grad_norm": 0.9768092451160398, "learning_rate": 1.3859108201610236e-05, "loss": 0.8389, "step": 2047 }, { "epoch": 0.39291110098563514, "grad_norm": 0.9695201581703348, "learning_rate": 1.3853374058873002e-05, "loss": 0.9143, "step": 2048 }, { "epoch": 0.3931029521091633, "grad_norm": 0.8623587945014507, "learning_rate": 1.3847638427808012e-05, "loss": 0.8196, "step": 2049 }, { "epoch": 0.39329480323269145, "grad_norm": 0.9175847054786249, "learning_rate": 1.3841901310630597e-05, "loss": 0.8155, "step": 2050 }, { "epoch": 0.3934866543562196, "grad_norm": 1.0079277395965245, "learning_rate": 1.3836162709556665e-05, "loss": 0.7839, "step": 2051 }, { "epoch": 0.3936785054797477, "grad_norm": 0.9517158043232806, "learning_rate": 1.3830422626802691e-05, "loss": 0.7625, "step": 2052 }, { "epoch": 0.39387035660327585, "grad_norm": 0.9194006723015316, "learning_rate": 1.3824681064585721e-05, "loss": 0.7809, "step": 2053 }, { "epoch": 0.394062207726804, "grad_norm": 0.9538944022353582, "learning_rate": 1.3818938025123387e-05, "loss": 0.8492, "step": 2054 }, { "epoch": 0.39425405885033216, "grad_norm": 0.9150274120537748, "learning_rate": 1.3813193510633873e-05, "loss": 0.8113, "step": 2055 }, { "epoch": 0.3944459099738603, "grad_norm": 1.5651902551201076, "learning_rate": 1.3807447523335947e-05, "loss": 0.841, "step": 2056 }, { "epoch": 0.3946377610973884, "grad_norm": 0.9137356109815045, "learning_rate": 1.3801700065448933e-05, "loss": 0.8006, "step": 2057 }, { "epoch": 0.39482961222091656, "grad_norm": 0.9328476933600507, "learning_rate": 1.379595113919274e-05, "loss": 0.8976, "step": 2058 }, { "epoch": 0.3950214633444447, "grad_norm": 0.8592257030020283, "learning_rate": 1.3790200746787825e-05, "loss": 0.8383, "step": 2059 }, { "epoch": 0.39521331446797286, "grad_norm": 0.8428008474596679, "learning_rate": 1.3784448890455225e-05, "loss": 0.7747, "step": 2060 }, { "epoch": 0.395405165591501, "grad_norm": 1.0656126799536842, "learning_rate": 1.3778695572416535e-05, "loss": 0.8228, "step": 2061 }, { "epoch": 0.3955970167150291, "grad_norm": 0.8652437995246388, "learning_rate": 1.3772940794893916e-05, "loss": 0.3357, "step": 2062 }, { "epoch": 0.39578886783855727, "grad_norm": 0.9340938880169635, "learning_rate": 1.3767184560110103e-05, "loss": 0.8712, "step": 2063 }, { "epoch": 0.3959807189620854, "grad_norm": 1.0360235721275737, "learning_rate": 1.3761426870288377e-05, "loss": 0.8819, "step": 2064 }, { "epoch": 0.3961725700856136, "grad_norm": 0.8419019990218147, "learning_rate": 1.375566772765259e-05, "loss": 0.7529, "step": 2065 }, { "epoch": 0.3963644212091417, "grad_norm": 0.9092704700032122, "learning_rate": 1.3749907134427155e-05, "loss": 0.8376, "step": 2066 }, { "epoch": 0.3965562723326698, "grad_norm": 0.9784662165388257, "learning_rate": 1.3744145092837045e-05, "loss": 0.7822, "step": 2067 }, { "epoch": 0.396748123456198, "grad_norm": 0.886243788895936, "learning_rate": 1.373838160510779e-05, "loss": 0.803, "step": 2068 }, { "epoch": 0.39693997457972613, "grad_norm": 0.9321827105966156, "learning_rate": 1.3732616673465481e-05, "loss": 0.7432, "step": 2069 }, { "epoch": 0.3971318257032543, "grad_norm": 0.9673637051900944, "learning_rate": 1.3726850300136767e-05, "loss": 0.8542, "step": 2070 }, { "epoch": 0.39732367682678243, "grad_norm": 0.9196039512948364, "learning_rate": 1.3721082487348853e-05, "loss": 0.7885, "step": 2071 }, { "epoch": 0.39751552795031053, "grad_norm": 0.9201153828929142, "learning_rate": 1.37153132373295e-05, "loss": 0.8521, "step": 2072 }, { "epoch": 0.3977073790738387, "grad_norm": 1.1340252344287765, "learning_rate": 1.3709542552307023e-05, "loss": 0.8364, "step": 2073 }, { "epoch": 0.39789923019736684, "grad_norm": 0.8738791794385207, "learning_rate": 1.3703770434510291e-05, "loss": 0.8, "step": 2074 }, { "epoch": 0.398091081320895, "grad_norm": 1.0624885064862406, "learning_rate": 1.369799688616873e-05, "loss": 0.8532, "step": 2075 }, { "epoch": 0.39828293244442314, "grad_norm": 0.8251131781194683, "learning_rate": 1.3692221909512313e-05, "loss": 0.8262, "step": 2076 }, { "epoch": 0.3984747835679513, "grad_norm": 0.8985633727566132, "learning_rate": 1.368644550677157e-05, "loss": 0.7611, "step": 2077 }, { "epoch": 0.3986666346914794, "grad_norm": 0.936590326408946, "learning_rate": 1.3680667680177581e-05, "loss": 0.8258, "step": 2078 }, { "epoch": 0.39885848581500755, "grad_norm": 0.7345472781976546, "learning_rate": 1.3674888431961977e-05, "loss": 0.3404, "step": 2079 }, { "epoch": 0.3990503369385357, "grad_norm": 0.9378662245097825, "learning_rate": 1.3669107764356928e-05, "loss": 0.7642, "step": 2080 }, { "epoch": 0.39924218806206385, "grad_norm": 1.3037257714060615, "learning_rate": 1.3663325679595166e-05, "loss": 0.7726, "step": 2081 }, { "epoch": 0.399434039185592, "grad_norm": 1.0924804170306284, "learning_rate": 1.3657542179909956e-05, "loss": 0.7736, "step": 2082 }, { "epoch": 0.3996258903091201, "grad_norm": 0.8455307452335059, "learning_rate": 1.3651757267535129e-05, "loss": 0.7776, "step": 2083 }, { "epoch": 0.39981774143264825, "grad_norm": 0.9285732455207506, "learning_rate": 1.3645970944705042e-05, "loss": 0.9075, "step": 2084 }, { "epoch": 0.4000095925561764, "grad_norm": 0.8747289809244562, "learning_rate": 1.3640183213654612e-05, "loss": 0.849, "step": 2085 }, { "epoch": 0.40020144367970456, "grad_norm": 0.965331922254692, "learning_rate": 1.3634394076619288e-05, "loss": 0.8519, "step": 2086 }, { "epoch": 0.4003932948032327, "grad_norm": 1.0349333576699047, "learning_rate": 1.362860353583507e-05, "loss": 0.7448, "step": 2087 }, { "epoch": 0.4005851459267608, "grad_norm": 0.924367146483197, "learning_rate": 1.3622811593538495e-05, "loss": 0.8282, "step": 2088 }, { "epoch": 0.40077699705028896, "grad_norm": 1.0078410390356283, "learning_rate": 1.3617018251966641e-05, "loss": 0.844, "step": 2089 }, { "epoch": 0.4009688481738171, "grad_norm": 0.9206620738915416, "learning_rate": 1.3611223513357134e-05, "loss": 0.8506, "step": 2090 }, { "epoch": 0.40116069929734527, "grad_norm": 0.8921402335233277, "learning_rate": 1.3605427379948137e-05, "loss": 0.7695, "step": 2091 }, { "epoch": 0.4013525504208734, "grad_norm": 0.891534304785687, "learning_rate": 1.3599629853978342e-05, "loss": 0.7961, "step": 2092 }, { "epoch": 0.4015444015444015, "grad_norm": 0.6657187959072431, "learning_rate": 1.3593830937686991e-05, "loss": 0.3205, "step": 2093 }, { "epoch": 0.40173625266792967, "grad_norm": 0.9291079679641506, "learning_rate": 1.3588030633313858e-05, "loss": 0.7679, "step": 2094 }, { "epoch": 0.4019281037914578, "grad_norm": 0.946748574386241, "learning_rate": 1.3582228943099253e-05, "loss": 0.8653, "step": 2095 }, { "epoch": 0.402119954914986, "grad_norm": 1.1330631878818098, "learning_rate": 1.3576425869284018e-05, "loss": 0.7986, "step": 2096 }, { "epoch": 0.40231180603851413, "grad_norm": 0.9101038570895078, "learning_rate": 1.357062141410954e-05, "loss": 0.8019, "step": 2097 }, { "epoch": 0.4025036571620422, "grad_norm": 0.8257756875380075, "learning_rate": 1.3564815579817727e-05, "loss": 0.7574, "step": 2098 }, { "epoch": 0.4026955082855704, "grad_norm": 0.8627989879906928, "learning_rate": 1.3559008368651028e-05, "loss": 0.8466, "step": 2099 }, { "epoch": 0.40288735940909853, "grad_norm": 0.8296800276661562, "learning_rate": 1.355319978285242e-05, "loss": 0.7768, "step": 2100 }, { "epoch": 0.4030792105326267, "grad_norm": 0.9051519087308195, "learning_rate": 1.3547389824665413e-05, "loss": 0.7931, "step": 2101 }, { "epoch": 0.40327106165615484, "grad_norm": 0.9277272197902335, "learning_rate": 1.3541578496334045e-05, "loss": 0.8296, "step": 2102 }, { "epoch": 0.403462912779683, "grad_norm": 0.6797003589607822, "learning_rate": 1.3535765800102885e-05, "loss": 0.3187, "step": 2103 }, { "epoch": 0.4036547639032111, "grad_norm": 0.9217596812732584, "learning_rate": 1.3529951738217029e-05, "loss": 0.8056, "step": 2104 }, { "epoch": 0.40384661502673924, "grad_norm": 0.9323877232540726, "learning_rate": 1.35241363129221e-05, "loss": 0.8809, "step": 2105 }, { "epoch": 0.4040384661502674, "grad_norm": 0.975941318409103, "learning_rate": 1.3518319526464253e-05, "loss": 0.8023, "step": 2106 }, { "epoch": 0.40423031727379555, "grad_norm": 1.194343125103267, "learning_rate": 1.3512501381090158e-05, "loss": 0.8084, "step": 2107 }, { "epoch": 0.4044221683973237, "grad_norm": 0.957032730188565, "learning_rate": 1.3506681879047024e-05, "loss": 0.8245, "step": 2108 }, { "epoch": 0.4046140195208518, "grad_norm": 0.8980155252885219, "learning_rate": 1.3500861022582569e-05, "loss": 0.7985, "step": 2109 }, { "epoch": 0.40480587064437995, "grad_norm": 0.8569999081269969, "learning_rate": 1.349503881394505e-05, "loss": 0.7423, "step": 2110 }, { "epoch": 0.4049977217679081, "grad_norm": 0.976536460100488, "learning_rate": 1.3489215255383226e-05, "loss": 0.7291, "step": 2111 }, { "epoch": 0.40518957289143626, "grad_norm": 0.9783667286316442, "learning_rate": 1.34833903491464e-05, "loss": 0.8474, "step": 2112 }, { "epoch": 0.4053814240149644, "grad_norm": 0.978860399015814, "learning_rate": 1.3477564097484374e-05, "loss": 0.8191, "step": 2113 }, { "epoch": 0.4055732751384925, "grad_norm": 1.054677535052269, "learning_rate": 1.3471736502647493e-05, "loss": 0.8771, "step": 2114 }, { "epoch": 0.40576512626202066, "grad_norm": 1.0313927503489615, "learning_rate": 1.3465907566886599e-05, "loss": 0.7597, "step": 2115 }, { "epoch": 0.4059569773855488, "grad_norm": 0.9470490766814508, "learning_rate": 1.3460077292453064e-05, "loss": 0.848, "step": 2116 }, { "epoch": 0.40614882850907696, "grad_norm": 1.0000733835498843, "learning_rate": 1.3454245681598775e-05, "loss": 0.7335, "step": 2117 }, { "epoch": 0.4063406796326051, "grad_norm": 1.027821355354502, "learning_rate": 1.3448412736576128e-05, "loss": 0.7782, "step": 2118 }, { "epoch": 0.4065325307561332, "grad_norm": 1.0524763896846177, "learning_rate": 1.3442578459638048e-05, "loss": 0.837, "step": 2119 }, { "epoch": 0.40672438187966137, "grad_norm": 0.8508320333315036, "learning_rate": 1.3436742853037967e-05, "loss": 0.8751, "step": 2120 }, { "epoch": 0.4069162330031895, "grad_norm": 0.9297686879331545, "learning_rate": 1.3430905919029824e-05, "loss": 0.7768, "step": 2121 }, { "epoch": 0.4071080841267177, "grad_norm": 0.9633091369772241, "learning_rate": 1.3425067659868084e-05, "loss": 0.8059, "step": 2122 }, { "epoch": 0.4072999352502458, "grad_norm": 1.0454644716016985, "learning_rate": 1.3419228077807711e-05, "loss": 0.7946, "step": 2123 }, { "epoch": 0.407491786373774, "grad_norm": 1.1452545486794499, "learning_rate": 1.3413387175104193e-05, "loss": 0.8199, "step": 2124 }, { "epoch": 0.4076836374973021, "grad_norm": 0.8934851422923391, "learning_rate": 1.3407544954013515e-05, "loss": 0.8606, "step": 2125 }, { "epoch": 0.40787548862083023, "grad_norm": 0.9100704476733659, "learning_rate": 1.3401701416792179e-05, "loss": 0.8395, "step": 2126 }, { "epoch": 0.4080673397443584, "grad_norm": 0.9187063487891876, "learning_rate": 1.3395856565697194e-05, "loss": 0.8037, "step": 2127 }, { "epoch": 0.40825919086788653, "grad_norm": 0.889498528852771, "learning_rate": 1.3390010402986075e-05, "loss": 0.79, "step": 2128 }, { "epoch": 0.4084510419914147, "grad_norm": 1.073197590710849, "learning_rate": 1.3384162930916854e-05, "loss": 0.8759, "step": 2129 }, { "epoch": 0.4086428931149428, "grad_norm": 0.708277622554326, "learning_rate": 1.3378314151748045e-05, "loss": 0.3259, "step": 2130 }, { "epoch": 0.40883474423847094, "grad_norm": 0.7022848530888953, "learning_rate": 1.3372464067738686e-05, "loss": 0.361, "step": 2131 }, { "epoch": 0.4090265953619991, "grad_norm": 0.9017205442162852, "learning_rate": 1.336661268114832e-05, "loss": 0.7534, "step": 2132 }, { "epoch": 0.40921844648552724, "grad_norm": 1.008892408670118, "learning_rate": 1.3360759994236982e-05, "loss": 0.8557, "step": 2133 }, { "epoch": 0.4094102976090554, "grad_norm": 0.9031617439105357, "learning_rate": 1.3354906009265215e-05, "loss": 0.8206, "step": 2134 }, { "epoch": 0.4096021487325835, "grad_norm": 1.0186615414804039, "learning_rate": 1.3349050728494066e-05, "loss": 0.9016, "step": 2135 }, { "epoch": 0.40979399985611165, "grad_norm": 0.9435414968113729, "learning_rate": 1.334319415418508e-05, "loss": 0.8633, "step": 2136 }, { "epoch": 0.4099858509796398, "grad_norm": 0.9936768349030409, "learning_rate": 1.3337336288600297e-05, "loss": 0.8209, "step": 2137 }, { "epoch": 0.41017770210316795, "grad_norm": 0.9634883772969762, "learning_rate": 1.333147713400226e-05, "loss": 0.7773, "step": 2138 }, { "epoch": 0.4103695532266961, "grad_norm": 0.9675410669285797, "learning_rate": 1.3325616692654014e-05, "loss": 0.7662, "step": 2139 }, { "epoch": 0.4105614043502242, "grad_norm": 0.7874460526373681, "learning_rate": 1.331975496681909e-05, "loss": 0.8629, "step": 2140 }, { "epoch": 0.41075325547375235, "grad_norm": 0.9515603883948107, "learning_rate": 1.331389195876153e-05, "loss": 0.8156, "step": 2141 }, { "epoch": 0.4109451065972805, "grad_norm": 0.842090101651478, "learning_rate": 1.3308027670745858e-05, "loss": 0.8182, "step": 2142 }, { "epoch": 0.41113695772080866, "grad_norm": 0.965471909566465, "learning_rate": 1.33021621050371e-05, "loss": 0.8641, "step": 2143 }, { "epoch": 0.4113288088443368, "grad_norm": 0.909583813634534, "learning_rate": 1.329629526390077e-05, "loss": 0.8561, "step": 2144 }, { "epoch": 0.4115206599678649, "grad_norm": 0.9680692141103397, "learning_rate": 1.3290427149602876e-05, "loss": 0.8034, "step": 2145 }, { "epoch": 0.41171251109139306, "grad_norm": 0.9289734712423102, "learning_rate": 1.3284557764409927e-05, "loss": 0.7887, "step": 2146 }, { "epoch": 0.4119043622149212, "grad_norm": 0.85750127824812, "learning_rate": 1.3278687110588908e-05, "loss": 0.7745, "step": 2147 }, { "epoch": 0.41209621333844937, "grad_norm": 0.803986986842003, "learning_rate": 1.3272815190407301e-05, "loss": 0.7995, "step": 2148 }, { "epoch": 0.4122880644619775, "grad_norm": 0.8244744218851328, "learning_rate": 1.3266942006133086e-05, "loss": 0.7677, "step": 2149 }, { "epoch": 0.4124799155855057, "grad_norm": 0.810530590058931, "learning_rate": 1.3261067560034712e-05, "loss": 0.8848, "step": 2150 }, { "epoch": 0.41267176670903377, "grad_norm": 0.8619618193199229, "learning_rate": 1.3255191854381136e-05, "loss": 0.3199, "step": 2151 }, { "epoch": 0.4128636178325619, "grad_norm": 0.924602405511294, "learning_rate": 1.324931489144178e-05, "loss": 0.8203, "step": 2152 }, { "epoch": 0.4130554689560901, "grad_norm": 0.882481259366556, "learning_rate": 1.3243436673486572e-05, "loss": 0.7726, "step": 2153 }, { "epoch": 0.41324732007961823, "grad_norm": 0.8726298060099887, "learning_rate": 1.323755720278591e-05, "loss": 0.8453, "step": 2154 }, { "epoch": 0.4134391712031464, "grad_norm": 0.9355922601991462, "learning_rate": 1.3231676481610687e-05, "loss": 0.8318, "step": 2155 }, { "epoch": 0.4136310223266745, "grad_norm": 0.9070858377515724, "learning_rate": 1.322579451223227e-05, "loss": 0.797, "step": 2156 }, { "epoch": 0.41382287345020263, "grad_norm": 0.938958373410704, "learning_rate": 1.3219911296922513e-05, "loss": 0.8415, "step": 2157 }, { "epoch": 0.4140147245737308, "grad_norm": 0.9373679380727702, "learning_rate": 1.3214026837953749e-05, "loss": 0.85, "step": 2158 }, { "epoch": 0.41420657569725894, "grad_norm": 0.9094805208957326, "learning_rate": 1.3208141137598793e-05, "loss": 0.7834, "step": 2159 }, { "epoch": 0.4143984268207871, "grad_norm": 1.0671798236109475, "learning_rate": 1.3202254198130937e-05, "loss": 0.8286, "step": 2160 }, { "epoch": 0.4145902779443152, "grad_norm": 1.0366422766380066, "learning_rate": 1.3196366021823956e-05, "loss": 0.834, "step": 2161 }, { "epoch": 0.41478212906784334, "grad_norm": 0.8803230790627258, "learning_rate": 1.31904766109521e-05, "loss": 0.7124, "step": 2162 }, { "epoch": 0.4149739801913715, "grad_norm": 1.0146659452481315, "learning_rate": 1.3184585967790094e-05, "loss": 0.7869, "step": 2163 }, { "epoch": 0.41516583131489965, "grad_norm": 0.9655862371560225, "learning_rate": 1.3178694094613143e-05, "loss": 0.8111, "step": 2164 }, { "epoch": 0.4153576824384278, "grad_norm": 1.1252145866112733, "learning_rate": 1.3172800993696921e-05, "loss": 0.7694, "step": 2165 }, { "epoch": 0.4155495335619559, "grad_norm": 0.9735722064368331, "learning_rate": 1.3166906667317586e-05, "loss": 0.8543, "step": 2166 }, { "epoch": 0.41574138468548405, "grad_norm": 0.8318653370065194, "learning_rate": 1.3161011117751756e-05, "loss": 0.8063, "step": 2167 }, { "epoch": 0.4159332358090122, "grad_norm": 0.9914549501544312, "learning_rate": 1.3155114347276538e-05, "loss": 0.863, "step": 2168 }, { "epoch": 0.41612508693254036, "grad_norm": 0.9679778380353458, "learning_rate": 1.3149216358169494e-05, "loss": 0.8271, "step": 2169 }, { "epoch": 0.4163169380560685, "grad_norm": 0.9260821104950789, "learning_rate": 1.3143317152708668e-05, "loss": 0.7935, "step": 2170 }, { "epoch": 0.4165087891795966, "grad_norm": 0.9240377856532539, "learning_rate": 1.3137416733172567e-05, "loss": 0.8293, "step": 2171 }, { "epoch": 0.41670064030312476, "grad_norm": 0.8982912797171304, "learning_rate": 1.3131515101840176e-05, "loss": 0.8025, "step": 2172 }, { "epoch": 0.4168924914266529, "grad_norm": 0.9923208047774789, "learning_rate": 1.3125612260990934e-05, "loss": 0.8009, "step": 2173 }, { "epoch": 0.41708434255018106, "grad_norm": 0.8898022604829398, "learning_rate": 1.3119708212904758e-05, "loss": 0.7924, "step": 2174 }, { "epoch": 0.4172761936737092, "grad_norm": 1.486988982877841, "learning_rate": 1.3113802959862032e-05, "loss": 0.8458, "step": 2175 }, { "epoch": 0.41746804479723737, "grad_norm": 0.9796256249166595, "learning_rate": 1.31078965041436e-05, "loss": 0.7754, "step": 2176 }, { "epoch": 0.41765989592076547, "grad_norm": 1.1001205958589526, "learning_rate": 1.3101988848030767e-05, "loss": 0.8171, "step": 2177 }, { "epoch": 0.4178517470442936, "grad_norm": 0.8576301599031234, "learning_rate": 1.3096079993805314e-05, "loss": 0.7798, "step": 2178 }, { "epoch": 0.4180435981678218, "grad_norm": 0.9272912732984769, "learning_rate": 1.3090169943749475e-05, "loss": 0.8358, "step": 2179 }, { "epoch": 0.4182354492913499, "grad_norm": 0.9301948375269405, "learning_rate": 1.3084258700145948e-05, "loss": 0.841, "step": 2180 }, { "epoch": 0.4184273004148781, "grad_norm": 0.9717677717777237, "learning_rate": 1.3078346265277894e-05, "loss": 0.8012, "step": 2181 }, { "epoch": 0.4186191515384062, "grad_norm": 0.8769828677847679, "learning_rate": 1.3072432641428931e-05, "loss": 0.8142, "step": 2182 }, { "epoch": 0.41881100266193433, "grad_norm": 0.9801233231167366, "learning_rate": 1.306651783088314e-05, "loss": 0.7434, "step": 2183 }, { "epoch": 0.4190028537854625, "grad_norm": 0.9099365100314081, "learning_rate": 1.3060601835925055e-05, "loss": 0.7828, "step": 2184 }, { "epoch": 0.41919470490899063, "grad_norm": 1.124186483231673, "learning_rate": 1.3054684658839674e-05, "loss": 0.8121, "step": 2185 }, { "epoch": 0.4193865560325188, "grad_norm": 0.9435027789826615, "learning_rate": 1.3048766301912448e-05, "loss": 0.7923, "step": 2186 }, { "epoch": 0.4195784071560469, "grad_norm": 0.8566541857221343, "learning_rate": 1.3042846767429283e-05, "loss": 0.7923, "step": 2187 }, { "epoch": 0.41977025827957504, "grad_norm": 0.8771536246878238, "learning_rate": 1.3036926057676542e-05, "loss": 0.8331, "step": 2188 }, { "epoch": 0.4199621094031032, "grad_norm": 0.8689924076324639, "learning_rate": 1.3031004174941035e-05, "loss": 0.8035, "step": 2189 }, { "epoch": 0.42015396052663134, "grad_norm": 0.86914543782515, "learning_rate": 1.3025081121510042e-05, "loss": 0.8266, "step": 2190 }, { "epoch": 0.4203458116501595, "grad_norm": 1.0354640186320416, "learning_rate": 1.3019156899671273e-05, "loss": 0.8387, "step": 2191 }, { "epoch": 0.4205376627736876, "grad_norm": 0.8603390147182604, "learning_rate": 1.3013231511712908e-05, "loss": 0.8359, "step": 2192 }, { "epoch": 0.42072951389721575, "grad_norm": 0.8590459527355134, "learning_rate": 1.3007304959923568e-05, "loss": 0.8388, "step": 2193 }, { "epoch": 0.4209213650207439, "grad_norm": 0.9239915271201741, "learning_rate": 1.3001377246592317e-05, "loss": 0.7561, "step": 2194 }, { "epoch": 0.42111321614427205, "grad_norm": 1.025671095665028, "learning_rate": 1.299544837400869e-05, "loss": 0.806, "step": 2195 }, { "epoch": 0.4213050672678002, "grad_norm": 0.9823423910262711, "learning_rate": 1.2989518344462644e-05, "loss": 0.8504, "step": 2196 }, { "epoch": 0.42149691839132836, "grad_norm": 0.8572035065522599, "learning_rate": 1.2983587160244602e-05, "loss": 0.7776, "step": 2197 }, { "epoch": 0.42168876951485645, "grad_norm": 0.8836424552337783, "learning_rate": 1.2977654823645421e-05, "loss": 0.8286, "step": 2198 }, { "epoch": 0.4218806206383846, "grad_norm": 0.921950743721866, "learning_rate": 1.2971721336956407e-05, "loss": 0.3469, "step": 2199 }, { "epoch": 0.42207247176191276, "grad_norm": 0.8386802394883911, "learning_rate": 1.2965786702469316e-05, "loss": 0.7548, "step": 2200 }, { "epoch": 0.4222643228854409, "grad_norm": 0.874547723441189, "learning_rate": 1.2959850922476336e-05, "loss": 0.8099, "step": 2201 }, { "epoch": 0.42245617400896907, "grad_norm": 0.6539787569784241, "learning_rate": 1.295391399927011e-05, "loss": 0.3123, "step": 2202 }, { "epoch": 0.42264802513249716, "grad_norm": 0.9345563870603386, "learning_rate": 1.294797593514371e-05, "loss": 0.8547, "step": 2203 }, { "epoch": 0.4228398762560253, "grad_norm": 0.9563466054596507, "learning_rate": 1.2942036732390656e-05, "loss": 0.8108, "step": 2204 }, { "epoch": 0.42303172737955347, "grad_norm": 0.9096727000309174, "learning_rate": 1.293609639330491e-05, "loss": 0.7558, "step": 2205 }, { "epoch": 0.4232235785030816, "grad_norm": 1.2133439072320573, "learning_rate": 1.2930154920180862e-05, "loss": 0.7681, "step": 2206 }, { "epoch": 0.4234154296266098, "grad_norm": 0.8448862283422117, "learning_rate": 1.292421231531336e-05, "loss": 0.7952, "step": 2207 }, { "epoch": 0.42360728075013787, "grad_norm": 0.8394371943172222, "learning_rate": 1.2918268580997665e-05, "loss": 0.8109, "step": 2208 }, { "epoch": 0.423799131873666, "grad_norm": 0.8740392909718324, "learning_rate": 1.2912323719529489e-05, "loss": 0.8587, "step": 2209 }, { "epoch": 0.4239909829971942, "grad_norm": 0.7958257855200693, "learning_rate": 1.2906377733204978e-05, "loss": 0.7942, "step": 2210 }, { "epoch": 0.42418283412072233, "grad_norm": 0.9414654375762782, "learning_rate": 1.2900430624320707e-05, "loss": 0.7716, "step": 2211 }, { "epoch": 0.4243746852442505, "grad_norm": 1.2316592480849922, "learning_rate": 1.2894482395173695e-05, "loss": 0.8792, "step": 2212 }, { "epoch": 0.4245665363677786, "grad_norm": 1.000270167223024, "learning_rate": 1.288853304806138e-05, "loss": 0.8143, "step": 2213 }, { "epoch": 0.42475838749130673, "grad_norm": 0.8365393408747787, "learning_rate": 1.2882582585281647e-05, "loss": 0.7608, "step": 2214 }, { "epoch": 0.4249502386148349, "grad_norm": 0.9317936769592697, "learning_rate": 1.2876631009132793e-05, "loss": 0.7954, "step": 2215 }, { "epoch": 0.42514208973836304, "grad_norm": 1.0047147222542292, "learning_rate": 1.2870678321913559e-05, "loss": 0.896, "step": 2216 }, { "epoch": 0.4253339408618912, "grad_norm": 0.8376399673441127, "learning_rate": 1.2864724525923117e-05, "loss": 0.7688, "step": 2217 }, { "epoch": 0.4255257919854193, "grad_norm": 0.8607154198962071, "learning_rate": 1.2858769623461055e-05, "loss": 0.8124, "step": 2218 }, { "epoch": 0.42571764310894744, "grad_norm": 0.9396697932833139, "learning_rate": 1.2852813616827404e-05, "loss": 0.8612, "step": 2219 }, { "epoch": 0.4259094942324756, "grad_norm": 0.957708922524034, "learning_rate": 1.2846856508322605e-05, "loss": 0.8074, "step": 2220 }, { "epoch": 0.42610134535600375, "grad_norm": 0.8848769892756113, "learning_rate": 1.2840898300247536e-05, "loss": 0.811, "step": 2221 }, { "epoch": 0.4262931964795319, "grad_norm": 0.8842737254203418, "learning_rate": 1.2834938994903494e-05, "loss": 0.767, "step": 2222 }, { "epoch": 0.42648504760306005, "grad_norm": 1.0117832640660067, "learning_rate": 1.2828978594592203e-05, "loss": 0.8287, "step": 2223 }, { "epoch": 0.42667689872658815, "grad_norm": 0.8193440878331496, "learning_rate": 1.282301710161581e-05, "loss": 0.7603, "step": 2224 }, { "epoch": 0.4268687498501163, "grad_norm": 0.9238257440663971, "learning_rate": 1.2817054518276884e-05, "loss": 0.7707, "step": 2225 }, { "epoch": 0.42706060097364446, "grad_norm": 0.9291128080099011, "learning_rate": 1.2811090846878409e-05, "loss": 0.7496, "step": 2226 }, { "epoch": 0.4272524520971726, "grad_norm": 0.9925269632862554, "learning_rate": 1.2805126089723798e-05, "loss": 0.8455, "step": 2227 }, { "epoch": 0.42744430322070076, "grad_norm": 0.9847694025787693, "learning_rate": 1.2799160249116879e-05, "loss": 0.8262, "step": 2228 }, { "epoch": 0.42763615434422886, "grad_norm": 0.8725415405336475, "learning_rate": 1.2793193327361904e-05, "loss": 0.7736, "step": 2229 }, { "epoch": 0.427828005467757, "grad_norm": 0.9702878823090492, "learning_rate": 1.2787225326763526e-05, "loss": 0.8449, "step": 2230 }, { "epoch": 0.42801985659128516, "grad_norm": 0.8942017288149988, "learning_rate": 1.2781256249626837e-05, "loss": 0.7493, "step": 2231 }, { "epoch": 0.4282117077148133, "grad_norm": 0.9142409944610073, "learning_rate": 1.277528609825733e-05, "loss": 0.8008, "step": 2232 }, { "epoch": 0.42840355883834147, "grad_norm": 0.9726799608350449, "learning_rate": 1.2769314874960916e-05, "loss": 0.8926, "step": 2233 }, { "epoch": 0.42859540996186957, "grad_norm": 1.0562400548449313, "learning_rate": 1.2763342582043925e-05, "loss": 0.8291, "step": 2234 }, { "epoch": 0.4287872610853977, "grad_norm": 0.8774941183061039, "learning_rate": 1.2757369221813097e-05, "loss": 0.8531, "step": 2235 }, { "epoch": 0.4289791122089259, "grad_norm": 1.0491491072856625, "learning_rate": 1.2751394796575576e-05, "loss": 0.7763, "step": 2236 }, { "epoch": 0.429170963332454, "grad_norm": 0.8960638559311662, "learning_rate": 1.2745419308638933e-05, "loss": 0.7519, "step": 2237 }, { "epoch": 0.4293628144559822, "grad_norm": 0.9153183463750002, "learning_rate": 1.2739442760311134e-05, "loss": 0.8225, "step": 2238 }, { "epoch": 0.4295546655795103, "grad_norm": 0.938303832150391, "learning_rate": 1.2733465153900569e-05, "loss": 0.8278, "step": 2239 }, { "epoch": 0.42974651670303843, "grad_norm": 0.9541884713808375, "learning_rate": 1.2727486491716024e-05, "loss": 0.7663, "step": 2240 }, { "epoch": 0.4299383678265666, "grad_norm": 0.891377109423386, "learning_rate": 1.27215067760667e-05, "loss": 0.8344, "step": 2241 }, { "epoch": 0.43013021895009473, "grad_norm": 0.8823673740250243, "learning_rate": 1.2715526009262209e-05, "loss": 0.7926, "step": 2242 }, { "epoch": 0.4303220700736229, "grad_norm": 0.9608698671091601, "learning_rate": 1.2709544193612554e-05, "loss": 0.7589, "step": 2243 }, { "epoch": 0.430513921197151, "grad_norm": 0.9633677819552643, "learning_rate": 1.2703561331428161e-05, "loss": 0.8851, "step": 2244 }, { "epoch": 0.43070577232067914, "grad_norm": 1.0320835861476456, "learning_rate": 1.269757742501984e-05, "loss": 0.8183, "step": 2245 }, { "epoch": 0.4308976234442073, "grad_norm": 1.0568873099755, "learning_rate": 1.2691592476698828e-05, "loss": 0.8642, "step": 2246 }, { "epoch": 0.43108947456773544, "grad_norm": 1.1428048447256094, "learning_rate": 1.2685606488776748e-05, "loss": 0.895, "step": 2247 }, { "epoch": 0.4312813256912636, "grad_norm": 0.924524491316675, "learning_rate": 1.2679619463565623e-05, "loss": 0.8184, "step": 2248 }, { "epoch": 0.43147317681479175, "grad_norm": 0.862812888961313, "learning_rate": 1.267363140337789e-05, "loss": 0.8018, "step": 2249 }, { "epoch": 0.43166502793831985, "grad_norm": 0.9239591712256754, "learning_rate": 1.2667642310526378e-05, "loss": 0.8101, "step": 2250 }, { "epoch": 0.431856879061848, "grad_norm": 0.808308900332463, "learning_rate": 1.2661652187324314e-05, "loss": 0.7515, "step": 2251 }, { "epoch": 0.43204873018537615, "grad_norm": 0.9075934122423285, "learning_rate": 1.2655661036085318e-05, "loss": 0.817, "step": 2252 }, { "epoch": 0.4322405813089043, "grad_norm": 0.7874142107035279, "learning_rate": 1.264966885912342e-05, "loss": 0.3532, "step": 2253 }, { "epoch": 0.43243243243243246, "grad_norm": 0.7983626776265857, "learning_rate": 1.2643675658753041e-05, "loss": 0.765, "step": 2254 }, { "epoch": 0.43262428355596055, "grad_norm": 1.0735927982769766, "learning_rate": 1.2637681437288986e-05, "loss": 0.8502, "step": 2255 }, { "epoch": 0.4328161346794887, "grad_norm": 0.7741589325035643, "learning_rate": 1.2631686197046474e-05, "loss": 0.7483, "step": 2256 }, { "epoch": 0.43300798580301686, "grad_norm": 1.014134895750441, "learning_rate": 1.2625689940341102e-05, "loss": 0.6989, "step": 2257 }, { "epoch": 0.433199836926545, "grad_norm": 0.9153087942733052, "learning_rate": 1.2619692669488868e-05, "loss": 0.8383, "step": 2258 }, { "epoch": 0.43339168805007316, "grad_norm": 1.0145351321915987, "learning_rate": 1.2613694386806157e-05, "loss": 0.7577, "step": 2259 }, { "epoch": 0.43358353917360126, "grad_norm": 0.9549557416476323, "learning_rate": 1.2607695094609744e-05, "loss": 0.8812, "step": 2260 }, { "epoch": 0.4337753902971294, "grad_norm": 1.1366295279990817, "learning_rate": 1.2601694795216801e-05, "loss": 0.7712, "step": 2261 }, { "epoch": 0.43396724142065757, "grad_norm": 0.9283431971962, "learning_rate": 1.2595693490944883e-05, "loss": 0.823, "step": 2262 }, { "epoch": 0.4341590925441857, "grad_norm": 0.6618247602953659, "learning_rate": 1.2589691184111935e-05, "loss": 0.3349, "step": 2263 }, { "epoch": 0.4343509436677139, "grad_norm": 0.9600490595216699, "learning_rate": 1.258368787703629e-05, "loss": 0.8022, "step": 2264 }, { "epoch": 0.43454279479124197, "grad_norm": 0.873715172157577, "learning_rate": 1.2577683572036664e-05, "loss": 0.7949, "step": 2265 }, { "epoch": 0.4347346459147701, "grad_norm": 0.6752956594495637, "learning_rate": 1.2571678271432157e-05, "loss": 0.2979, "step": 2266 }, { "epoch": 0.4349264970382983, "grad_norm": 0.9031596319888813, "learning_rate": 1.2565671977542264e-05, "loss": 0.8231, "step": 2267 }, { "epoch": 0.43511834816182643, "grad_norm": 0.660957982105979, "learning_rate": 1.2559664692686855e-05, "loss": 0.3211, "step": 2268 }, { "epoch": 0.4353101992853546, "grad_norm": 0.841947718354096, "learning_rate": 1.255365641918618e-05, "loss": 0.8208, "step": 2269 }, { "epoch": 0.4355020504088827, "grad_norm": 1.0516588228515726, "learning_rate": 1.2547647159360882e-05, "loss": 0.7884, "step": 2270 }, { "epoch": 0.43569390153241083, "grad_norm": 0.9607812353254677, "learning_rate": 1.254163691553198e-05, "loss": 0.7792, "step": 2271 }, { "epoch": 0.435885752655939, "grad_norm": 1.264166281003547, "learning_rate": 1.2535625690020861e-05, "loss": 0.8764, "step": 2272 }, { "epoch": 0.43607760377946714, "grad_norm": 0.909918139099051, "learning_rate": 1.2529613485149308e-05, "loss": 0.812, "step": 2273 }, { "epoch": 0.4362694549029953, "grad_norm": 0.9431024864320815, "learning_rate": 1.2523600303239475e-05, "loss": 0.7726, "step": 2274 }, { "epoch": 0.43646130602652344, "grad_norm": 1.0355773502384569, "learning_rate": 1.2517586146613899e-05, "loss": 0.7874, "step": 2275 }, { "epoch": 0.43665315715005154, "grad_norm": 0.8928176738062493, "learning_rate": 1.2511571017595481e-05, "loss": 0.8188, "step": 2276 }, { "epoch": 0.4368450082735797, "grad_norm": 0.9167411615426605, "learning_rate": 1.2505554918507507e-05, "loss": 0.7666, "step": 2277 }, { "epoch": 0.43703685939710785, "grad_norm": 1.2199765664096964, "learning_rate": 1.2499537851673645e-05, "loss": 0.888, "step": 2278 }, { "epoch": 0.437228710520636, "grad_norm": 0.9562356828809965, "learning_rate": 1.2493519819417915e-05, "loss": 0.7837, "step": 2279 }, { "epoch": 0.43742056164416415, "grad_norm": 1.1637520838367432, "learning_rate": 1.2487500824064726e-05, "loss": 0.8532, "step": 2280 }, { "epoch": 0.43761241276769225, "grad_norm": 1.1135250054848282, "learning_rate": 1.2481480867938865e-05, "loss": 0.8354, "step": 2281 }, { "epoch": 0.4378042638912204, "grad_norm": 0.8494968600423214, "learning_rate": 1.2475459953365466e-05, "loss": 0.7804, "step": 2282 }, { "epoch": 0.43799611501474855, "grad_norm": 1.0664877077545185, "learning_rate": 1.246943808267006e-05, "loss": 0.8241, "step": 2283 }, { "epoch": 0.4381879661382767, "grad_norm": 0.8973703428438455, "learning_rate": 1.2463415258178527e-05, "loss": 0.857, "step": 2284 }, { "epoch": 0.43837981726180486, "grad_norm": 0.9758000277719202, "learning_rate": 1.2457391482217132e-05, "loss": 0.7412, "step": 2285 }, { "epoch": 0.43857166838533296, "grad_norm": 0.8970981380238334, "learning_rate": 1.2451366757112491e-05, "loss": 0.7297, "step": 2286 }, { "epoch": 0.4387635195088611, "grad_norm": 0.8559644335985205, "learning_rate": 1.24453410851916e-05, "loss": 0.8405, "step": 2287 }, { "epoch": 0.43895537063238926, "grad_norm": 0.8781903527234631, "learning_rate": 1.2439314468781814e-05, "loss": 0.7804, "step": 2288 }, { "epoch": 0.4391472217559174, "grad_norm": 0.8760790801521832, "learning_rate": 1.243328691021085e-05, "loss": 0.8029, "step": 2289 }, { "epoch": 0.43933907287944557, "grad_norm": 0.8011938756806415, "learning_rate": 1.24272584118068e-05, "loss": 0.3213, "step": 2290 }, { "epoch": 0.43953092400297367, "grad_norm": 1.009506507567454, "learning_rate": 1.2421228975898108e-05, "loss": 0.8295, "step": 2291 }, { "epoch": 0.4397227751265018, "grad_norm": 0.905893866672249, "learning_rate": 1.2415198604813585e-05, "loss": 0.8873, "step": 2292 }, { "epoch": 0.43991462625002997, "grad_norm": 0.913373234895865, "learning_rate": 1.2409167300882403e-05, "loss": 0.8184, "step": 2293 }, { "epoch": 0.4401064773735581, "grad_norm": 1.0096618560418018, "learning_rate": 1.240313506643409e-05, "loss": 0.8405, "step": 2294 }, { "epoch": 0.4402983284970863, "grad_norm": 0.8868097962459087, "learning_rate": 1.2397101903798545e-05, "loss": 0.8213, "step": 2295 }, { "epoch": 0.44049017962061443, "grad_norm": 0.8714338742397137, "learning_rate": 1.2391067815306009e-05, "loss": 0.819, "step": 2296 }, { "epoch": 0.4406820307441425, "grad_norm": 0.8851405146033465, "learning_rate": 1.2385032803287096e-05, "loss": 0.8506, "step": 2297 }, { "epoch": 0.4408738818676707, "grad_norm": 0.7746452878595669, "learning_rate": 1.2378996870072765e-05, "loss": 0.7118, "step": 2298 }, { "epoch": 0.44106573299119883, "grad_norm": 0.9046722144139894, "learning_rate": 1.237296001799434e-05, "loss": 0.8215, "step": 2299 }, { "epoch": 0.441257584114727, "grad_norm": 0.6835379346656977, "learning_rate": 1.2366922249383493e-05, "loss": 0.3266, "step": 2300 }, { "epoch": 0.44144943523825514, "grad_norm": 0.9240077985921251, "learning_rate": 1.236088356657225e-05, "loss": 0.8521, "step": 2301 }, { "epoch": 0.44164128636178324, "grad_norm": 0.8659752561362325, "learning_rate": 1.2354843971892998e-05, "loss": 0.7446, "step": 2302 }, { "epoch": 0.4418331374853114, "grad_norm": 1.0043368031436455, "learning_rate": 1.234880346767847e-05, "loss": 0.8473, "step": 2303 }, { "epoch": 0.44202498860883954, "grad_norm": 0.8633818698650008, "learning_rate": 1.2342762056261745e-05, "loss": 0.7591, "step": 2304 }, { "epoch": 0.4422168397323677, "grad_norm": 0.9716221408786025, "learning_rate": 1.2336719739976269e-05, "loss": 0.8338, "step": 2305 }, { "epoch": 0.44240869085589585, "grad_norm": 0.9970593750443775, "learning_rate": 1.2330676521155818e-05, "loss": 0.7839, "step": 2306 }, { "epoch": 0.44260054197942394, "grad_norm": 0.9052576481594076, "learning_rate": 1.2324632402134533e-05, "loss": 0.8156, "step": 2307 }, { "epoch": 0.4427923931029521, "grad_norm": 0.9032297216545285, "learning_rate": 1.2318587385246887e-05, "loss": 0.8123, "step": 2308 }, { "epoch": 0.44298424422648025, "grad_norm": 0.8299838505570782, "learning_rate": 1.2312541472827715e-05, "loss": 0.8098, "step": 2309 }, { "epoch": 0.4431760953500084, "grad_norm": 0.9625696538851638, "learning_rate": 1.230649466721219e-05, "loss": 0.7771, "step": 2310 }, { "epoch": 0.44336794647353656, "grad_norm": 1.0316090076816595, "learning_rate": 1.2300446970735828e-05, "loss": 0.7868, "step": 2311 }, { "epoch": 0.44355979759706465, "grad_norm": 0.9462832185781268, "learning_rate": 1.2294398385734496e-05, "loss": 0.8235, "step": 2312 }, { "epoch": 0.4437516487205928, "grad_norm": 0.8760158613688535, "learning_rate": 1.2288348914544399e-05, "loss": 0.8052, "step": 2313 }, { "epoch": 0.44394349984412096, "grad_norm": 0.983687154263216, "learning_rate": 1.2282298559502086e-05, "loss": 0.6898, "step": 2314 }, { "epoch": 0.4441353509676491, "grad_norm": 0.7917661669023416, "learning_rate": 1.2276247322944443e-05, "loss": 0.7647, "step": 2315 }, { "epoch": 0.44432720209117726, "grad_norm": 0.9371407546435413, "learning_rate": 1.2270195207208705e-05, "loss": 0.7634, "step": 2316 }, { "epoch": 0.44451905321470536, "grad_norm": 1.1605095293614467, "learning_rate": 1.2264142214632441e-05, "loss": 0.8087, "step": 2317 }, { "epoch": 0.4447109043382335, "grad_norm": 1.0029717026452278, "learning_rate": 1.2258088347553559e-05, "loss": 0.8264, "step": 2318 }, { "epoch": 0.44490275546176167, "grad_norm": 0.9948911193309754, "learning_rate": 1.2252033608310307e-05, "loss": 0.8365, "step": 2319 }, { "epoch": 0.4450946065852898, "grad_norm": 1.2383913570685692, "learning_rate": 1.2245977999241268e-05, "loss": 0.6954, "step": 2320 }, { "epoch": 0.445286457708818, "grad_norm": 0.8986158783637139, "learning_rate": 1.223992152268536e-05, "loss": 0.7976, "step": 2321 }, { "epoch": 0.4454783088323461, "grad_norm": 0.9554100327820184, "learning_rate": 1.2233864180981838e-05, "loss": 0.8466, "step": 2322 }, { "epoch": 0.4456701599558742, "grad_norm": 1.0103429271697892, "learning_rate": 1.222780597647029e-05, "loss": 0.7908, "step": 2323 }, { "epoch": 0.4458620110794024, "grad_norm": 0.932762535399097, "learning_rate": 1.222174691149064e-05, "loss": 0.7771, "step": 2324 }, { "epoch": 0.44605386220293053, "grad_norm": 0.9896200965655833, "learning_rate": 1.2215686988383138e-05, "loss": 0.8347, "step": 2325 }, { "epoch": 0.4462457133264587, "grad_norm": 0.8822992510857804, "learning_rate": 1.2209626209488375e-05, "loss": 0.8913, "step": 2326 }, { "epoch": 0.44643756444998683, "grad_norm": 0.950681740340918, "learning_rate": 1.2203564577147262e-05, "loss": 0.7936, "step": 2327 }, { "epoch": 0.44662941557351493, "grad_norm": 0.9095177738324717, "learning_rate": 1.2197502093701048e-05, "loss": 0.8232, "step": 2328 }, { "epoch": 0.4468212666970431, "grad_norm": 0.9010878911075224, "learning_rate": 1.2191438761491307e-05, "loss": 0.8278, "step": 2329 }, { "epoch": 0.44701311782057124, "grad_norm": 0.9022927358230571, "learning_rate": 1.218537458285994e-05, "loss": 0.7342, "step": 2330 }, { "epoch": 0.4472049689440994, "grad_norm": 0.9560521114204819, "learning_rate": 1.2179309560149178e-05, "loss": 0.8255, "step": 2331 }, { "epoch": 0.44739682006762754, "grad_norm": 0.929082160556327, "learning_rate": 1.2173243695701575e-05, "loss": 0.765, "step": 2332 }, { "epoch": 0.44758867119115564, "grad_norm": 1.1331064521704284, "learning_rate": 1.216717699186001e-05, "loss": 0.7893, "step": 2333 }, { "epoch": 0.4477805223146838, "grad_norm": 0.7222053408500461, "learning_rate": 1.2161109450967689e-05, "loss": 0.3484, "step": 2334 }, { "epoch": 0.44797237343821195, "grad_norm": 0.9011614392810712, "learning_rate": 1.2155041075368143e-05, "loss": 0.7522, "step": 2335 }, { "epoch": 0.4481642245617401, "grad_norm": 0.9432174202242837, "learning_rate": 1.2148971867405219e-05, "loss": 0.822, "step": 2336 }, { "epoch": 0.44835607568526825, "grad_norm": 0.9848809171953006, "learning_rate": 1.2142901829423091e-05, "loss": 0.7348, "step": 2337 }, { "epoch": 0.44854792680879635, "grad_norm": 1.1013054318150317, "learning_rate": 1.2136830963766245e-05, "loss": 0.8724, "step": 2338 }, { "epoch": 0.4487397779323245, "grad_norm": 0.8692777028640727, "learning_rate": 1.2130759272779497e-05, "loss": 0.7603, "step": 2339 }, { "epoch": 0.44893162905585265, "grad_norm": 0.9405101377935869, "learning_rate": 1.212468675880798e-05, "loss": 0.8249, "step": 2340 }, { "epoch": 0.4491234801793808, "grad_norm": 0.8927902928237607, "learning_rate": 1.2118613424197143e-05, "loss": 0.8311, "step": 2341 }, { "epoch": 0.44931533130290896, "grad_norm": 0.7157501461063174, "learning_rate": 1.2112539271292748e-05, "loss": 0.2938, "step": 2342 }, { "epoch": 0.44950718242643706, "grad_norm": 1.0965551911880178, "learning_rate": 1.210646430244088e-05, "loss": 0.7536, "step": 2343 }, { "epoch": 0.4496990335499652, "grad_norm": 0.634143629645637, "learning_rate": 1.2100388519987933e-05, "loss": 0.3381, "step": 2344 }, { "epoch": 0.44989088467349336, "grad_norm": 0.9055448592679914, "learning_rate": 1.2094311926280617e-05, "loss": 0.7779, "step": 2345 }, { "epoch": 0.4500827357970215, "grad_norm": 0.8380604135590466, "learning_rate": 1.208823452366596e-05, "loss": 0.8035, "step": 2346 }, { "epoch": 0.45027458692054967, "grad_norm": 0.8309061765517156, "learning_rate": 1.2082156314491298e-05, "loss": 0.778, "step": 2347 }, { "epoch": 0.4504664380440778, "grad_norm": 0.8019498109450299, "learning_rate": 1.2076077301104282e-05, "loss": 0.7467, "step": 2348 }, { "epoch": 0.4506582891676059, "grad_norm": 0.9579717371985724, "learning_rate": 1.2069997485852867e-05, "loss": 0.8437, "step": 2349 }, { "epoch": 0.45085014029113407, "grad_norm": 0.9646548181962836, "learning_rate": 1.2063916871085319e-05, "loss": 0.8256, "step": 2350 }, { "epoch": 0.4510419914146622, "grad_norm": 0.9752736975112682, "learning_rate": 1.2057835459150222e-05, "loss": 0.8545, "step": 2351 }, { "epoch": 0.4512338425381904, "grad_norm": 1.0151870681672828, "learning_rate": 1.2051753252396457e-05, "loss": 0.7913, "step": 2352 }, { "epoch": 0.45142569366171853, "grad_norm": 0.887608683429581, "learning_rate": 1.2045670253173218e-05, "loss": 0.7398, "step": 2353 }, { "epoch": 0.4516175447852466, "grad_norm": 1.1060865475958688, "learning_rate": 1.2039586463830006e-05, "loss": 0.81, "step": 2354 }, { "epoch": 0.4518093959087748, "grad_norm": 0.8376576573551082, "learning_rate": 1.2033501886716622e-05, "loss": 0.8031, "step": 2355 }, { "epoch": 0.45200124703230293, "grad_norm": 0.9099129012435887, "learning_rate": 1.2027416524183173e-05, "loss": 0.8128, "step": 2356 }, { "epoch": 0.4521930981558311, "grad_norm": 0.8915027135369287, "learning_rate": 1.2021330378580071e-05, "loss": 0.8194, "step": 2357 }, { "epoch": 0.45238494927935924, "grad_norm": 0.8096931528284942, "learning_rate": 1.2015243452258032e-05, "loss": 0.7962, "step": 2358 }, { "epoch": 0.45257680040288734, "grad_norm": 0.9671617579426546, "learning_rate": 1.200915574756807e-05, "loss": 0.7681, "step": 2359 }, { "epoch": 0.4527686515264155, "grad_norm": 0.826679041132246, "learning_rate": 1.2003067266861496e-05, "loss": 0.3115, "step": 2360 }, { "epoch": 0.45296050264994364, "grad_norm": 0.7433581527092575, "learning_rate": 1.1996978012489935e-05, "loss": 0.3137, "step": 2361 }, { "epoch": 0.4531523537734718, "grad_norm": 0.8954813286666616, "learning_rate": 1.1990887986805295e-05, "loss": 0.7619, "step": 2362 }, { "epoch": 0.45334420489699995, "grad_norm": 0.8866939219070311, "learning_rate": 1.1984797192159791e-05, "loss": 0.7357, "step": 2363 }, { "epoch": 0.45353605602052804, "grad_norm": 0.8924452657906503, "learning_rate": 1.1978705630905931e-05, "loss": 0.8124, "step": 2364 }, { "epoch": 0.4537279071440562, "grad_norm": 0.9467328080328824, "learning_rate": 1.1972613305396521e-05, "loss": 0.7785, "step": 2365 }, { "epoch": 0.45391975826758435, "grad_norm": 0.9976205889802126, "learning_rate": 1.1966520217984664e-05, "loss": 0.8074, "step": 2366 }, { "epoch": 0.4541116093911125, "grad_norm": 0.8335857365329128, "learning_rate": 1.1960426371023747e-05, "loss": 0.7755, "step": 2367 }, { "epoch": 0.45430346051464066, "grad_norm": 0.8013008540903124, "learning_rate": 1.1954331766867469e-05, "loss": 0.8041, "step": 2368 }, { "epoch": 0.4544953116381688, "grad_norm": 0.8568933650152821, "learning_rate": 1.1948236407869804e-05, "loss": 0.7325, "step": 2369 }, { "epoch": 0.4546871627616969, "grad_norm": 0.8384168351537491, "learning_rate": 1.1942140296385027e-05, "loss": 0.7882, "step": 2370 }, { "epoch": 0.45487901388522506, "grad_norm": 0.8966636976296805, "learning_rate": 1.1936043434767699e-05, "loss": 0.8618, "step": 2371 }, { "epoch": 0.4550708650087532, "grad_norm": 0.9631934695258015, "learning_rate": 1.1929945825372674e-05, "loss": 0.8173, "step": 2372 }, { "epoch": 0.45526271613228136, "grad_norm": 0.8833438579173042, "learning_rate": 1.1923847470555094e-05, "loss": 0.7846, "step": 2373 }, { "epoch": 0.4554545672558095, "grad_norm": 0.9712481780825776, "learning_rate": 1.1917748372670386e-05, "loss": 0.7384, "step": 2374 }, { "epoch": 0.4556464183793376, "grad_norm": 0.9213986985741116, "learning_rate": 1.1911648534074271e-05, "loss": 0.7336, "step": 2375 }, { "epoch": 0.45583826950286577, "grad_norm": 0.8600594915297012, "learning_rate": 1.1905547957122748e-05, "loss": 0.8142, "step": 2376 }, { "epoch": 0.4560301206263939, "grad_norm": 1.0115370094879745, "learning_rate": 1.1899446644172106e-05, "loss": 0.8275, "step": 2377 }, { "epoch": 0.4562219717499221, "grad_norm": 0.8922892221826711, "learning_rate": 1.1893344597578917e-05, "loss": 0.8295, "step": 2378 }, { "epoch": 0.4564138228734502, "grad_norm": 0.8913137864739271, "learning_rate": 1.1887241819700033e-05, "loss": 0.8027, "step": 2379 }, { "epoch": 0.4566056739969783, "grad_norm": 0.9727907601196449, "learning_rate": 1.1881138312892598e-05, "loss": 0.8625, "step": 2380 }, { "epoch": 0.4567975251205065, "grad_norm": 0.8767807584584366, "learning_rate": 1.1875034079514026e-05, "loss": 0.8468, "step": 2381 }, { "epoch": 0.45698937624403463, "grad_norm": 0.8993622575414413, "learning_rate": 1.1868929121922019e-05, "loss": 0.8248, "step": 2382 }, { "epoch": 0.4571812273675628, "grad_norm": 0.9590408600543661, "learning_rate": 1.1862823442474557e-05, "loss": 0.7949, "step": 2383 }, { "epoch": 0.45737307849109093, "grad_norm": 0.9020328325030388, "learning_rate": 1.18567170435299e-05, "loss": 0.7962, "step": 2384 }, { "epoch": 0.45756492961461903, "grad_norm": 0.9408951195100849, "learning_rate": 1.1850609927446582e-05, "loss": 0.8346, "step": 2385 }, { "epoch": 0.4577567807381472, "grad_norm": 0.9702934796355775, "learning_rate": 1.1844502096583415e-05, "loss": 0.317, "step": 2386 }, { "epoch": 0.45794863186167534, "grad_norm": 0.8002787134010156, "learning_rate": 1.1838393553299495e-05, "loss": 0.3122, "step": 2387 }, { "epoch": 0.4581404829852035, "grad_norm": 0.9470734203752754, "learning_rate": 1.1832284299954177e-05, "loss": 0.7939, "step": 2388 }, { "epoch": 0.45833233410873164, "grad_norm": 0.917057808159734, "learning_rate": 1.1826174338907109e-05, "loss": 0.8821, "step": 2389 }, { "epoch": 0.45852418523225974, "grad_norm": 0.9394378786341822, "learning_rate": 1.1820063672518198e-05, "loss": 0.7727, "step": 2390 }, { "epoch": 0.4587160363557879, "grad_norm": 0.8883755223698788, "learning_rate": 1.1813952303147631e-05, "loss": 0.7789, "step": 2391 }, { "epoch": 0.45890788747931605, "grad_norm": 0.9437713755837395, "learning_rate": 1.1807840233155863e-05, "loss": 0.8711, "step": 2392 }, { "epoch": 0.4590997386028442, "grad_norm": 0.8808606054949667, "learning_rate": 1.1801727464903621e-05, "loss": 0.7959, "step": 2393 }, { "epoch": 0.45929158972637235, "grad_norm": 0.9739341020681637, "learning_rate": 1.1795614000751899e-05, "loss": 0.8335, "step": 2394 }, { "epoch": 0.4594834408499005, "grad_norm": 0.8797856263596266, "learning_rate": 1.1789499843061967e-05, "loss": 0.7642, "step": 2395 }, { "epoch": 0.4596752919734286, "grad_norm": 0.9456854261251508, "learning_rate": 1.1783384994195355e-05, "loss": 0.7881, "step": 2396 }, { "epoch": 0.45986714309695675, "grad_norm": 0.9721846345103347, "learning_rate": 1.1777269456513865e-05, "loss": 0.745, "step": 2397 }, { "epoch": 0.4600589942204849, "grad_norm": 0.8988720724937982, "learning_rate": 1.1771153232379562e-05, "loss": 0.7494, "step": 2398 }, { "epoch": 0.46025084534401306, "grad_norm": 0.9092085567570105, "learning_rate": 1.176503632415478e-05, "loss": 0.8073, "step": 2399 }, { "epoch": 0.4604426964675412, "grad_norm": 1.4103622402499194, "learning_rate": 1.1758918734202109e-05, "loss": 0.3451, "step": 2400 }, { "epoch": 0.4606345475910693, "grad_norm": 0.8496609148740908, "learning_rate": 1.1752800464884411e-05, "loss": 0.7627, "step": 2401 }, { "epoch": 0.46082639871459746, "grad_norm": 0.910038675548133, "learning_rate": 1.1746681518564809e-05, "loss": 0.7697, "step": 2402 }, { "epoch": 0.4610182498381256, "grad_norm": 0.9491936562660473, "learning_rate": 1.174056189760668e-05, "loss": 0.8243, "step": 2403 }, { "epoch": 0.46121010096165377, "grad_norm": 0.9314374794379756, "learning_rate": 1.1734441604373674e-05, "loss": 0.7811, "step": 2404 }, { "epoch": 0.4614019520851819, "grad_norm": 0.8577487857946554, "learning_rate": 1.1728320641229691e-05, "loss": 0.8387, "step": 2405 }, { "epoch": 0.46159380320871, "grad_norm": 0.8988446797473142, "learning_rate": 1.1722199010538896e-05, "loss": 0.7746, "step": 2406 }, { "epoch": 0.46178565433223817, "grad_norm": 0.9087619967258896, "learning_rate": 1.1716076714665701e-05, "loss": 0.8184, "step": 2407 }, { "epoch": 0.4619775054557663, "grad_norm": 0.8558537793538141, "learning_rate": 1.1709953755974786e-05, "loss": 0.7483, "step": 2408 }, { "epoch": 0.4621693565792945, "grad_norm": 1.0551096103956115, "learning_rate": 1.1703830136831086e-05, "loss": 0.8334, "step": 2409 }, { "epoch": 0.46236120770282263, "grad_norm": 0.8924225315461471, "learning_rate": 1.1697705859599789e-05, "loss": 0.8048, "step": 2410 }, { "epoch": 0.4625530588263507, "grad_norm": 0.8813091114533161, "learning_rate": 1.1691580926646328e-05, "loss": 0.7565, "step": 2411 }, { "epoch": 0.4627449099498789, "grad_norm": 0.8961812294383318, "learning_rate": 1.168545534033641e-05, "loss": 0.8043, "step": 2412 }, { "epoch": 0.46293676107340703, "grad_norm": 1.0316755787353054, "learning_rate": 1.1679329103035977e-05, "loss": 0.7871, "step": 2413 }, { "epoch": 0.4631286121969352, "grad_norm": 0.9414547079895668, "learning_rate": 1.1673202217111227e-05, "loss": 0.7865, "step": 2414 }, { "epoch": 0.46332046332046334, "grad_norm": 0.8997102326775418, "learning_rate": 1.1667074684928608e-05, "loss": 0.8249, "step": 2415 }, { "epoch": 0.46351231444399144, "grad_norm": 0.8671251057514542, "learning_rate": 1.166094650885482e-05, "loss": 0.8414, "step": 2416 }, { "epoch": 0.4637041655675196, "grad_norm": 0.9242385694314151, "learning_rate": 1.1654817691256814e-05, "loss": 0.773, "step": 2417 }, { "epoch": 0.46389601669104774, "grad_norm": 0.9576537252897716, "learning_rate": 1.1648688234501781e-05, "loss": 0.8536, "step": 2418 }, { "epoch": 0.4640878678145759, "grad_norm": 0.8720506256110795, "learning_rate": 1.1642558140957165e-05, "loss": 0.8432, "step": 2419 }, { "epoch": 0.46427971893810405, "grad_norm": 1.1308763301909055, "learning_rate": 1.1636427412990661e-05, "loss": 0.7786, "step": 2420 }, { "epoch": 0.4644715700616322, "grad_norm": 0.9113060285373433, "learning_rate": 1.1630296052970186e-05, "loss": 0.8673, "step": 2421 }, { "epoch": 0.4646634211851603, "grad_norm": 0.900867152208308, "learning_rate": 1.1624164063263931e-05, "loss": 0.8518, "step": 2422 }, { "epoch": 0.46485527230868845, "grad_norm": 0.9551621118137141, "learning_rate": 1.1618031446240312e-05, "loss": 0.7569, "step": 2423 }, { "epoch": 0.4650471234322166, "grad_norm": 0.8782814143854846, "learning_rate": 1.1611898204267995e-05, "loss": 0.3387, "step": 2424 }, { "epoch": 0.46523897455574476, "grad_norm": 0.9376630917663288, "learning_rate": 1.1605764339715877e-05, "loss": 0.7697, "step": 2425 }, { "epoch": 0.4654308256792729, "grad_norm": 0.7322683341622855, "learning_rate": 1.1599629854953114e-05, "loss": 0.3258, "step": 2426 }, { "epoch": 0.465622676802801, "grad_norm": 0.817616263596838, "learning_rate": 1.1593494752349079e-05, "loss": 0.8131, "step": 2427 }, { "epoch": 0.46581452792632916, "grad_norm": 1.019015871254734, "learning_rate": 1.15873590342734e-05, "loss": 0.7936, "step": 2428 }, { "epoch": 0.4660063790498573, "grad_norm": 0.8871012764430241, "learning_rate": 1.1581222703095938e-05, "loss": 0.8354, "step": 2429 }, { "epoch": 0.46619823017338546, "grad_norm": 1.0381125168324372, "learning_rate": 1.1575085761186788e-05, "loss": 0.8583, "step": 2430 }, { "epoch": 0.4663900812969136, "grad_norm": 1.057951269984747, "learning_rate": 1.1568948210916286e-05, "loss": 0.8126, "step": 2431 }, { "epoch": 0.4665819324204417, "grad_norm": 1.0012129707260136, "learning_rate": 1.1562810054655002e-05, "loss": 0.8501, "step": 2432 }, { "epoch": 0.46677378354396987, "grad_norm": 0.8856698323723605, "learning_rate": 1.1556671294773733e-05, "loss": 0.8235, "step": 2433 }, { "epoch": 0.466965634667498, "grad_norm": 0.8910799462756303, "learning_rate": 1.155053193364352e-05, "loss": 0.8049, "step": 2434 }, { "epoch": 0.4671574857910262, "grad_norm": 0.962177684856666, "learning_rate": 1.1544391973635624e-05, "loss": 0.7902, "step": 2435 }, { "epoch": 0.4673493369145543, "grad_norm": 0.9671861804358173, "learning_rate": 1.1538251417121551e-05, "loss": 0.7906, "step": 2436 }, { "epoch": 0.4675411880380824, "grad_norm": 0.8920297798559294, "learning_rate": 1.1532110266473026e-05, "loss": 0.7866, "step": 2437 }, { "epoch": 0.4677330391616106, "grad_norm": 0.9335999869482897, "learning_rate": 1.152596852406201e-05, "loss": 0.7187, "step": 2438 }, { "epoch": 0.46792489028513873, "grad_norm": 0.8323950700256371, "learning_rate": 1.1519826192260691e-05, "loss": 0.7921, "step": 2439 }, { "epoch": 0.4681167414086669, "grad_norm": 0.8685802772903718, "learning_rate": 1.151368327344148e-05, "loss": 0.8628, "step": 2440 }, { "epoch": 0.46830859253219503, "grad_norm": 0.9403559998209532, "learning_rate": 1.1507539769977025e-05, "loss": 0.7694, "step": 2441 }, { "epoch": 0.46850044365572313, "grad_norm": 0.843160449968856, "learning_rate": 1.1501395684240189e-05, "loss": 0.7687, "step": 2442 }, { "epoch": 0.4686922947792513, "grad_norm": 0.9742052001371304, "learning_rate": 1.1495251018604065e-05, "loss": 0.7869, "step": 2443 }, { "epoch": 0.46888414590277944, "grad_norm": 0.8258283513874266, "learning_rate": 1.1489105775441973e-05, "loss": 0.7377, "step": 2444 }, { "epoch": 0.4690759970263076, "grad_norm": 0.9420869738003942, "learning_rate": 1.1482959957127448e-05, "loss": 0.7762, "step": 2445 }, { "epoch": 0.46926784814983574, "grad_norm": 0.8856833761465757, "learning_rate": 1.1476813566034255e-05, "loss": 0.8222, "step": 2446 }, { "epoch": 0.4694596992733639, "grad_norm": 0.9586620186753023, "learning_rate": 1.1470666604536378e-05, "loss": 0.859, "step": 2447 }, { "epoch": 0.469651550396892, "grad_norm": 1.3360687891470993, "learning_rate": 1.1464519075008015e-05, "loss": 0.785, "step": 2448 }, { "epoch": 0.46984340152042015, "grad_norm": 0.8910328936788348, "learning_rate": 1.1458370979823595e-05, "loss": 0.7564, "step": 2449 }, { "epoch": 0.4700352526439483, "grad_norm": 0.8313761141787643, "learning_rate": 1.1452222321357753e-05, "loss": 0.7591, "step": 2450 }, { "epoch": 0.47022710376747645, "grad_norm": 0.8193876929377724, "learning_rate": 1.1446073101985354e-05, "loss": 0.7467, "step": 2451 }, { "epoch": 0.4704189548910046, "grad_norm": 1.0660275311859013, "learning_rate": 1.1439923324081465e-05, "loss": 0.7503, "step": 2452 }, { "epoch": 0.4706108060145327, "grad_norm": 0.89168140342102, "learning_rate": 1.1433772990021388e-05, "loss": 0.7832, "step": 2453 }, { "epoch": 0.47080265713806085, "grad_norm": 0.9836818494553443, "learning_rate": 1.1427622102180623e-05, "loss": 0.8122, "step": 2454 }, { "epoch": 0.470994508261589, "grad_norm": 1.0547738878340729, "learning_rate": 1.1421470662934888e-05, "loss": 0.7716, "step": 2455 }, { "epoch": 0.47118635938511716, "grad_norm": 0.9229119073185434, "learning_rate": 1.1415318674660118e-05, "loss": 0.8009, "step": 2456 }, { "epoch": 0.4713782105086453, "grad_norm": 0.8568805679004629, "learning_rate": 1.1409166139732457e-05, "loss": 0.7408, "step": 2457 }, { "epoch": 0.4715700616321734, "grad_norm": 1.1460811433267373, "learning_rate": 1.1403013060528266e-05, "loss": 0.7887, "step": 2458 }, { "epoch": 0.47176191275570156, "grad_norm": 1.0836553416925787, "learning_rate": 1.1396859439424103e-05, "loss": 0.8137, "step": 2459 }, { "epoch": 0.4719537638792297, "grad_norm": 0.9706916790216795, "learning_rate": 1.1390705278796747e-05, "loss": 0.8282, "step": 2460 }, { "epoch": 0.47214561500275787, "grad_norm": 0.8756728148096744, "learning_rate": 1.1384550581023185e-05, "loss": 0.7958, "step": 2461 }, { "epoch": 0.472337466126286, "grad_norm": 0.8495648075320797, "learning_rate": 1.1378395348480604e-05, "loss": 0.3098, "step": 2462 }, { "epoch": 0.4725293172498141, "grad_norm": 0.8776200560458263, "learning_rate": 1.1372239583546404e-05, "loss": 0.7748, "step": 2463 }, { "epoch": 0.47272116837334227, "grad_norm": 1.0230652599876504, "learning_rate": 1.1366083288598187e-05, "loss": 0.8632, "step": 2464 }, { "epoch": 0.4729130194968704, "grad_norm": 0.8846126729782199, "learning_rate": 1.1359926466013764e-05, "loss": 0.7458, "step": 2465 }, { "epoch": 0.4731048706203986, "grad_norm": 0.9171061320422209, "learning_rate": 1.1353769118171145e-05, "loss": 0.7248, "step": 2466 }, { "epoch": 0.47329672174392673, "grad_norm": 1.1384731401281674, "learning_rate": 1.1347611247448544e-05, "loss": 0.851, "step": 2467 }, { "epoch": 0.4734885728674549, "grad_norm": 1.00656048402436, "learning_rate": 1.1341452856224381e-05, "loss": 0.7827, "step": 2468 }, { "epoch": 0.473680423990983, "grad_norm": 0.8938922490228169, "learning_rate": 1.1335293946877273e-05, "loss": 0.8541, "step": 2469 }, { "epoch": 0.47387227511451113, "grad_norm": 0.9911390923720356, "learning_rate": 1.1329134521786039e-05, "loss": 0.72, "step": 2470 }, { "epoch": 0.4740641262380393, "grad_norm": 0.84350854695323, "learning_rate": 1.132297458332969e-05, "loss": 0.732, "step": 2471 }, { "epoch": 0.47425597736156744, "grad_norm": 0.8793984813248857, "learning_rate": 1.1316814133887447e-05, "loss": 0.7446, "step": 2472 }, { "epoch": 0.4744478284850956, "grad_norm": 1.0457926096145433, "learning_rate": 1.1310653175838722e-05, "loss": 0.796, "step": 2473 }, { "epoch": 0.4746396796086237, "grad_norm": 1.0220620187888767, "learning_rate": 1.1304491711563124e-05, "loss": 0.7283, "step": 2474 }, { "epoch": 0.47483153073215184, "grad_norm": 0.9272382490725167, "learning_rate": 1.129832974344046e-05, "loss": 0.7884, "step": 2475 }, { "epoch": 0.47502338185568, "grad_norm": 0.9712414573507475, "learning_rate": 1.1292167273850725e-05, "loss": 0.7753, "step": 2476 }, { "epoch": 0.47521523297920815, "grad_norm": 0.8340868207937647, "learning_rate": 1.1286004305174117e-05, "loss": 0.7147, "step": 2477 }, { "epoch": 0.4754070841027363, "grad_norm": 0.8896410794641609, "learning_rate": 1.1279840839791018e-05, "loss": 0.7102, "step": 2478 }, { "epoch": 0.4755989352262644, "grad_norm": 1.0064743510994414, "learning_rate": 1.1273676880082007e-05, "loss": 0.8519, "step": 2479 }, { "epoch": 0.47579078634979255, "grad_norm": 0.9830649451782972, "learning_rate": 1.1267512428427853e-05, "loss": 0.8027, "step": 2480 }, { "epoch": 0.4759826374733207, "grad_norm": 1.0000030415418657, "learning_rate": 1.1261347487209515e-05, "loss": 0.7256, "step": 2481 }, { "epoch": 0.47617448859684885, "grad_norm": 0.9537628299884002, "learning_rate": 1.1255182058808143e-05, "loss": 0.8041, "step": 2482 }, { "epoch": 0.476366339720377, "grad_norm": 0.9531038692031432, "learning_rate": 1.124901614560507e-05, "loss": 0.7478, "step": 2483 }, { "epoch": 0.4765581908439051, "grad_norm": 0.9712268845711618, "learning_rate": 1.1242849749981817e-05, "loss": 0.7951, "step": 2484 }, { "epoch": 0.47675004196743326, "grad_norm": 0.8194102490385671, "learning_rate": 1.12366828743201e-05, "loss": 0.7506, "step": 2485 }, { "epoch": 0.4769418930909614, "grad_norm": 0.9227101190181454, "learning_rate": 1.1230515521001805e-05, "loss": 0.761, "step": 2486 }, { "epoch": 0.47713374421448956, "grad_norm": 0.9813742590072556, "learning_rate": 1.122434769240902e-05, "loss": 0.7745, "step": 2487 }, { "epoch": 0.4773255953380177, "grad_norm": 0.8213246169485263, "learning_rate": 1.1218179390924004e-05, "loss": 0.7347, "step": 2488 }, { "epoch": 0.4775174464615458, "grad_norm": 0.9187793274251347, "learning_rate": 1.1212010618929203e-05, "loss": 0.8164, "step": 2489 }, { "epoch": 0.47770929758507397, "grad_norm": 0.9550493553472497, "learning_rate": 1.1205841378807245e-05, "loss": 0.8285, "step": 2490 }, { "epoch": 0.4779011487086021, "grad_norm": 0.8066433144624932, "learning_rate": 1.1199671672940939e-05, "loss": 0.7961, "step": 2491 }, { "epoch": 0.47809299983213027, "grad_norm": 0.9279179461088995, "learning_rate": 1.1193501503713272e-05, "loss": 0.781, "step": 2492 }, { "epoch": 0.4782848509556584, "grad_norm": 0.8670093766847556, "learning_rate": 1.118733087350741e-05, "loss": 0.8012, "step": 2493 }, { "epoch": 0.4784767020791866, "grad_norm": 0.9596396294009919, "learning_rate": 1.1181159784706696e-05, "loss": 0.8039, "step": 2494 }, { "epoch": 0.4786685532027147, "grad_norm": 1.207457567171434, "learning_rate": 1.117498823969466e-05, "loss": 0.701, "step": 2495 }, { "epoch": 0.47886040432624283, "grad_norm": 1.0039668887953255, "learning_rate": 1.1168816240854994e-05, "loss": 0.8331, "step": 2496 }, { "epoch": 0.479052255449771, "grad_norm": 0.9602814572374063, "learning_rate": 1.1162643790571574e-05, "loss": 0.7467, "step": 2497 }, { "epoch": 0.47924410657329913, "grad_norm": 0.9719257796632549, "learning_rate": 1.115647089122845e-05, "loss": 0.8054, "step": 2498 }, { "epoch": 0.4794359576968273, "grad_norm": 1.0266284818293265, "learning_rate": 1.1150297545209837e-05, "loss": 0.8358, "step": 2499 }, { "epoch": 0.4796278088203554, "grad_norm": 0.6805979143444982, "learning_rate": 1.1144123754900135e-05, "loss": 0.3522, "step": 2500 }, { "epoch": 0.47981965994388354, "grad_norm": 0.9549882863832425, "learning_rate": 1.1137949522683906e-05, "loss": 0.8666, "step": 2501 }, { "epoch": 0.4800115110674117, "grad_norm": 0.9469456258018214, "learning_rate": 1.1131774850945892e-05, "loss": 0.8646, "step": 2502 }, { "epoch": 0.48020336219093984, "grad_norm": 0.9280851838699754, "learning_rate": 1.1125599742070992e-05, "loss": 0.7885, "step": 2503 }, { "epoch": 0.480395213314468, "grad_norm": 0.9953376075591824, "learning_rate": 1.1119424198444288e-05, "loss": 0.7288, "step": 2504 }, { "epoch": 0.4805870644379961, "grad_norm": 0.8284349367375113, "learning_rate": 1.1113248222451018e-05, "loss": 0.7653, "step": 2505 }, { "epoch": 0.48077891556152424, "grad_norm": 0.9245940357627634, "learning_rate": 1.110707181647659e-05, "loss": 0.8008, "step": 2506 }, { "epoch": 0.4809707666850524, "grad_norm": 1.1272706189069055, "learning_rate": 1.1100894982906584e-05, "loss": 0.7398, "step": 2507 }, { "epoch": 0.48116261780858055, "grad_norm": 0.8727507539710413, "learning_rate": 1.1094717724126741e-05, "loss": 0.8214, "step": 2508 }, { "epoch": 0.4813544689321087, "grad_norm": 0.8971921818588388, "learning_rate": 1.1088540042522967e-05, "loss": 0.7289, "step": 2509 }, { "epoch": 0.4815463200556368, "grad_norm": 0.9719864197391826, "learning_rate": 1.1082361940481328e-05, "loss": 0.8492, "step": 2510 }, { "epoch": 0.48173817117916495, "grad_norm": 0.9118745218750235, "learning_rate": 1.1076183420388057e-05, "loss": 0.7749, "step": 2511 }, { "epoch": 0.4819300223026931, "grad_norm": 0.9351899574239604, "learning_rate": 1.1070004484629543e-05, "loss": 0.7449, "step": 2512 }, { "epoch": 0.48212187342622126, "grad_norm": 1.0973844361123442, "learning_rate": 1.106382513559234e-05, "loss": 0.7469, "step": 2513 }, { "epoch": 0.4823137245497494, "grad_norm": 0.6530121999269889, "learning_rate": 1.1057645375663166e-05, "loss": 0.3158, "step": 2514 }, { "epoch": 0.4825055756732775, "grad_norm": 0.9992376020480307, "learning_rate": 1.1051465207228889e-05, "loss": 0.8253, "step": 2515 }, { "epoch": 0.48269742679680566, "grad_norm": 0.959403509600788, "learning_rate": 1.1045284632676535e-05, "loss": 0.8317, "step": 2516 }, { "epoch": 0.4828892779203338, "grad_norm": 0.9492400522202926, "learning_rate": 1.1039103654393295e-05, "loss": 0.769, "step": 2517 }, { "epoch": 0.48308112904386197, "grad_norm": 0.8316752954525082, "learning_rate": 1.1032922274766508e-05, "loss": 0.8128, "step": 2518 }, { "epoch": 0.4832729801673901, "grad_norm": 0.9719931538098091, "learning_rate": 1.1026740496183674e-05, "loss": 0.7354, "step": 2519 }, { "epoch": 0.4834648312909183, "grad_norm": 0.9620267895402428, "learning_rate": 1.1020558321032437e-05, "loss": 0.7885, "step": 2520 }, { "epoch": 0.48365668241444637, "grad_norm": 0.8950332307765453, "learning_rate": 1.101437575170061e-05, "loss": 0.8351, "step": 2521 }, { "epoch": 0.4838485335379745, "grad_norm": 0.9477751891092574, "learning_rate": 1.1008192790576144e-05, "loss": 0.836, "step": 2522 }, { "epoch": 0.4840403846615027, "grad_norm": 0.8808872341758376, "learning_rate": 1.1002009440047147e-05, "loss": 0.7605, "step": 2523 }, { "epoch": 0.48423223578503083, "grad_norm": 0.9766621210837102, "learning_rate": 1.0995825702501882e-05, "loss": 0.8508, "step": 2524 }, { "epoch": 0.484424086908559, "grad_norm": 0.8061973597362343, "learning_rate": 1.0989641580328752e-05, "loss": 0.7977, "step": 2525 }, { "epoch": 0.4846159380320871, "grad_norm": 0.914376120044133, "learning_rate": 1.0983457075916314e-05, "loss": 0.772, "step": 2526 }, { "epoch": 0.48480778915561523, "grad_norm": 0.9587529836006842, "learning_rate": 1.0977272191653272e-05, "loss": 0.8163, "step": 2527 }, { "epoch": 0.4849996402791434, "grad_norm": 0.8499395570306941, "learning_rate": 1.0971086929928475e-05, "loss": 0.7556, "step": 2528 }, { "epoch": 0.48519149140267154, "grad_norm": 1.1626561331410832, "learning_rate": 1.0964901293130924e-05, "loss": 0.848, "step": 2529 }, { "epoch": 0.4853833425261997, "grad_norm": 0.8584405728928957, "learning_rate": 1.0958715283649755e-05, "loss": 0.7701, "step": 2530 }, { "epoch": 0.4855751936497278, "grad_norm": 0.9565446884019505, "learning_rate": 1.0952528903874258e-05, "loss": 0.7993, "step": 2531 }, { "epoch": 0.48576704477325594, "grad_norm": 0.8562981243492537, "learning_rate": 1.0946342156193858e-05, "loss": 0.7134, "step": 2532 }, { "epoch": 0.4859588958967841, "grad_norm": 0.6705306177791919, "learning_rate": 1.0940155042998126e-05, "loss": 0.294, "step": 2533 }, { "epoch": 0.48615074702031225, "grad_norm": 0.9974114971900344, "learning_rate": 1.0933967566676775e-05, "loss": 0.8014, "step": 2534 }, { "epoch": 0.4863425981438404, "grad_norm": 0.7900730191260789, "learning_rate": 1.0927779729619654e-05, "loss": 0.7975, "step": 2535 }, { "epoch": 0.4865344492673685, "grad_norm": 0.8509549773485569, "learning_rate": 1.0921591534216757e-05, "loss": 0.736, "step": 2536 }, { "epoch": 0.48672630039089665, "grad_norm": 1.0541292776686115, "learning_rate": 1.0915402982858212e-05, "loss": 0.8206, "step": 2537 }, { "epoch": 0.4869181515144248, "grad_norm": 1.0596320997939064, "learning_rate": 1.0909214077934286e-05, "loss": 0.7977, "step": 2538 }, { "epoch": 0.48711000263795295, "grad_norm": 0.8468290379398835, "learning_rate": 1.0903024821835382e-05, "loss": 0.8135, "step": 2539 }, { "epoch": 0.4873018537614811, "grad_norm": 1.0216347751426589, "learning_rate": 1.0896835216952039e-05, "loss": 0.6472, "step": 2540 }, { "epoch": 0.48749370488500926, "grad_norm": 1.0343750458311016, "learning_rate": 1.0890645265674935e-05, "loss": 0.7816, "step": 2541 }, { "epoch": 0.48768555600853736, "grad_norm": 0.9536435494487873, "learning_rate": 1.0884454970394871e-05, "loss": 0.8385, "step": 2542 }, { "epoch": 0.4878774071320655, "grad_norm": 1.026709949089275, "learning_rate": 1.0878264333502793e-05, "loss": 0.8288, "step": 2543 }, { "epoch": 0.48806925825559366, "grad_norm": 1.0578970723356376, "learning_rate": 1.087207335738977e-05, "loss": 0.7304, "step": 2544 }, { "epoch": 0.4882611093791218, "grad_norm": 0.9781642427562921, "learning_rate": 1.0865882044447006e-05, "loss": 0.7857, "step": 2545 }, { "epoch": 0.48845296050264997, "grad_norm": 0.9592813290517264, "learning_rate": 1.0859690397065838e-05, "loss": 0.7728, "step": 2546 }, { "epoch": 0.48864481162617807, "grad_norm": 0.8414580308372951, "learning_rate": 1.0853498417637726e-05, "loss": 0.7645, "step": 2547 }, { "epoch": 0.4888366627497062, "grad_norm": 1.0306507730739622, "learning_rate": 1.084730610855426e-05, "loss": 0.8285, "step": 2548 }, { "epoch": 0.48902851387323437, "grad_norm": 0.884371616623811, "learning_rate": 1.084111347220716e-05, "loss": 0.7696, "step": 2549 }, { "epoch": 0.4892203649967625, "grad_norm": 0.832080061360907, "learning_rate": 1.0834920510988272e-05, "loss": 0.8576, "step": 2550 }, { "epoch": 0.4894122161202907, "grad_norm": 0.9335645355011468, "learning_rate": 1.0828727227289564e-05, "loss": 0.7741, "step": 2551 }, { "epoch": 0.4896040672438188, "grad_norm": 0.8647157238391134, "learning_rate": 1.0822533623503128e-05, "loss": 0.8761, "step": 2552 }, { "epoch": 0.4897959183673469, "grad_norm": 0.9291215711754963, "learning_rate": 1.081633970202119e-05, "loss": 0.8199, "step": 2553 }, { "epoch": 0.4899877694908751, "grad_norm": 0.9490077266214385, "learning_rate": 1.0810145465236085e-05, "loss": 0.7473, "step": 2554 }, { "epoch": 0.49017962061440323, "grad_norm": 0.7773058275538275, "learning_rate": 1.0803950915540283e-05, "loss": 0.8338, "step": 2555 }, { "epoch": 0.4903714717379314, "grad_norm": 0.9340457088718913, "learning_rate": 1.0797756055326359e-05, "loss": 0.8062, "step": 2556 }, { "epoch": 0.4905633228614595, "grad_norm": 0.9518210697667142, "learning_rate": 1.0791560886987016e-05, "loss": 0.8073, "step": 2557 }, { "epoch": 0.49075517398498764, "grad_norm": 0.8718635128230946, "learning_rate": 1.0785365412915088e-05, "loss": 0.7796, "step": 2558 }, { "epoch": 0.4909470251085158, "grad_norm": 0.9976409640858809, "learning_rate": 1.0779169635503503e-05, "loss": 0.764, "step": 2559 }, { "epoch": 0.49113887623204394, "grad_norm": 1.0130824190298326, "learning_rate": 1.077297355714533e-05, "loss": 0.8432, "step": 2560 }, { "epoch": 0.4913307273555721, "grad_norm": 1.0453258826810863, "learning_rate": 1.0766777180233736e-05, "loss": 0.7992, "step": 2561 }, { "epoch": 0.4915225784791002, "grad_norm": 1.202278386552204, "learning_rate": 1.0760580507162012e-05, "loss": 0.7985, "step": 2562 }, { "epoch": 0.49171442960262834, "grad_norm": 0.8923373371225946, "learning_rate": 1.0754383540323566e-05, "loss": 0.805, "step": 2563 }, { "epoch": 0.4919062807261565, "grad_norm": 0.8644044916995776, "learning_rate": 1.0748186282111907e-05, "loss": 0.7121, "step": 2564 }, { "epoch": 0.49209813184968465, "grad_norm": 0.9342563915921455, "learning_rate": 1.0741988734920675e-05, "loss": 0.801, "step": 2565 }, { "epoch": 0.4922899829732128, "grad_norm": 0.9138368045081745, "learning_rate": 1.0735790901143603e-05, "loss": 0.7556, "step": 2566 }, { "epoch": 0.49248183409674096, "grad_norm": 0.904659053685598, "learning_rate": 1.0729592783174545e-05, "loss": 0.763, "step": 2567 }, { "epoch": 0.49267368522026905, "grad_norm": 0.8935296796092796, "learning_rate": 1.0723394383407471e-05, "loss": 0.8306, "step": 2568 }, { "epoch": 0.4928655363437972, "grad_norm": 0.949565466913455, "learning_rate": 1.0717195704236438e-05, "loss": 0.7679, "step": 2569 }, { "epoch": 0.49305738746732536, "grad_norm": 0.7168690953384983, "learning_rate": 1.0710996748055638e-05, "loss": 0.3311, "step": 2570 }, { "epoch": 0.4932492385908535, "grad_norm": 0.9965639324623352, "learning_rate": 1.0704797517259346e-05, "loss": 0.7616, "step": 2571 }, { "epoch": 0.49344108971438166, "grad_norm": 0.9104317102332593, "learning_rate": 1.069859801424196e-05, "loss": 0.785, "step": 2572 }, { "epoch": 0.49363294083790976, "grad_norm": 0.7116991341732202, "learning_rate": 1.0692398241397979e-05, "loss": 0.3232, "step": 2573 }, { "epoch": 0.4938247919614379, "grad_norm": 0.935214622595, "learning_rate": 1.0686198201121996e-05, "loss": 0.7411, "step": 2574 }, { "epoch": 0.49401664308496607, "grad_norm": 0.8768160171322854, "learning_rate": 1.0679997895808724e-05, "loss": 0.7635, "step": 2575 }, { "epoch": 0.4942084942084942, "grad_norm": 0.9012414776417942, "learning_rate": 1.0673797327852967e-05, "loss": 0.8919, "step": 2576 }, { "epoch": 0.4944003453320224, "grad_norm": 0.9274523092067009, "learning_rate": 1.0667596499649628e-05, "loss": 0.8349, "step": 2577 }, { "epoch": 0.49459219645555047, "grad_norm": 1.0331561927041266, "learning_rate": 1.0661395413593722e-05, "loss": 0.7939, "step": 2578 }, { "epoch": 0.4947840475790786, "grad_norm": 0.861984842647488, "learning_rate": 1.0655194072080355e-05, "loss": 0.7755, "step": 2579 }, { "epoch": 0.4949758987026068, "grad_norm": 0.9417187088084971, "learning_rate": 1.0648992477504733e-05, "loss": 0.7792, "step": 2580 }, { "epoch": 0.49516774982613493, "grad_norm": 1.0562111418934834, "learning_rate": 1.0642790632262165e-05, "loss": 0.8363, "step": 2581 }, { "epoch": 0.4953596009496631, "grad_norm": 1.0208025771619742, "learning_rate": 1.0636588538748052e-05, "loss": 0.8398, "step": 2582 }, { "epoch": 0.4955514520731912, "grad_norm": 0.8873295181195708, "learning_rate": 1.0630386199357887e-05, "loss": 0.8213, "step": 2583 }, { "epoch": 0.49574330319671933, "grad_norm": 0.7457766663115255, "learning_rate": 1.0624183616487263e-05, "loss": 0.358, "step": 2584 }, { "epoch": 0.4959351543202475, "grad_norm": 1.0105337945024901, "learning_rate": 1.061798079253187e-05, "loss": 0.8051, "step": 2585 }, { "epoch": 0.49612700544377564, "grad_norm": 1.062476587280566, "learning_rate": 1.0611777729887485e-05, "loss": 0.8192, "step": 2586 }, { "epoch": 0.4963188565673038, "grad_norm": 1.0351205159178756, "learning_rate": 1.0605574430949983e-05, "loss": 0.798, "step": 2587 }, { "epoch": 0.4965107076908319, "grad_norm": 0.9680836199604111, "learning_rate": 1.0599370898115324e-05, "loss": 0.7639, "step": 2588 }, { "epoch": 0.49670255881436004, "grad_norm": 0.91900443364394, "learning_rate": 1.0593167133779562e-05, "loss": 0.8532, "step": 2589 }, { "epoch": 0.4968944099378882, "grad_norm": 0.952694641354848, "learning_rate": 1.0586963140338842e-05, "loss": 0.8118, "step": 2590 }, { "epoch": 0.49708626106141635, "grad_norm": 0.9618827894540717, "learning_rate": 1.0580758920189391e-05, "loss": 0.8857, "step": 2591 }, { "epoch": 0.4972781121849445, "grad_norm": 0.696323960737539, "learning_rate": 1.0574554475727531e-05, "loss": 0.3179, "step": 2592 }, { "epoch": 0.49746996330847265, "grad_norm": 1.0232589267937366, "learning_rate": 1.056834980934967e-05, "loss": 0.7775, "step": 2593 }, { "epoch": 0.49766181443200075, "grad_norm": 0.7005514325391103, "learning_rate": 1.056214492345229e-05, "loss": 0.2827, "step": 2594 }, { "epoch": 0.4978536655555289, "grad_norm": 0.971222288140905, "learning_rate": 1.0555939820431978e-05, "loss": 0.8728, "step": 2595 }, { "epoch": 0.49804551667905705, "grad_norm": 0.6448931772599199, "learning_rate": 1.0549734502685389e-05, "loss": 0.2936, "step": 2596 }, { "epoch": 0.4982373678025852, "grad_norm": 0.9194817457423646, "learning_rate": 1.0543528972609264e-05, "loss": 0.7934, "step": 2597 }, { "epoch": 0.49842921892611336, "grad_norm": 1.0485875260206179, "learning_rate": 1.0537323232600429e-05, "loss": 0.7514, "step": 2598 }, { "epoch": 0.49862107004964146, "grad_norm": 0.9809710618998266, "learning_rate": 1.0531117285055794e-05, "loss": 0.8241, "step": 2599 }, { "epoch": 0.4988129211731696, "grad_norm": 1.0021402116276625, "learning_rate": 1.052491113237234e-05, "loss": 0.8251, "step": 2600 }, { "epoch": 0.49900477229669776, "grad_norm": 0.9489304580158687, "learning_rate": 1.0518704776947133e-05, "loss": 0.8076, "step": 2601 }, { "epoch": 0.4991966234202259, "grad_norm": 0.9008272870656507, "learning_rate": 1.0512498221177319e-05, "loss": 0.8357, "step": 2602 }, { "epoch": 0.49938847454375407, "grad_norm": 0.835555246331891, "learning_rate": 1.0506291467460116e-05, "loss": 0.7992, "step": 2603 }, { "epoch": 0.49958032566728217, "grad_norm": 0.9174863536808179, "learning_rate": 1.0500084518192825e-05, "loss": 0.7721, "step": 2604 }, { "epoch": 0.4997721767908103, "grad_norm": 0.9465472154806531, "learning_rate": 1.0493877375772818e-05, "loss": 0.7748, "step": 2605 }, { "epoch": 0.49996402791433847, "grad_norm": 0.8987622792725034, "learning_rate": 1.048767004259754e-05, "loss": 0.7711, "step": 2606 }, { "epoch": 0.5001558790378666, "grad_norm": 1.1048374274034007, "learning_rate": 1.0481462521064514e-05, "loss": 0.7989, "step": 2607 }, { "epoch": 0.5003477301613948, "grad_norm": 0.8235982556088711, "learning_rate": 1.0475254813571335e-05, "loss": 0.8236, "step": 2608 }, { "epoch": 0.5005395812849229, "grad_norm": 0.9398191268798904, "learning_rate": 1.0469046922515667e-05, "loss": 0.7691, "step": 2609 }, { "epoch": 0.5007314324084511, "grad_norm": 0.9347703455092843, "learning_rate": 1.0462838850295247e-05, "loss": 0.794, "step": 2610 }, { "epoch": 0.5009232835319792, "grad_norm": 1.0255170140080914, "learning_rate": 1.0456630599307883e-05, "loss": 0.8223, "step": 2611 }, { "epoch": 0.5011151346555073, "grad_norm": 0.862794342801923, "learning_rate": 1.0450422171951453e-05, "loss": 0.8077, "step": 2612 }, { "epoch": 0.5013069857790354, "grad_norm": 1.0356122482365937, "learning_rate": 1.0444213570623894e-05, "loss": 0.746, "step": 2613 }, { "epoch": 0.5014988369025636, "grad_norm": 0.9129840259629981, "learning_rate": 1.0438004797723222e-05, "loss": 0.8147, "step": 2614 }, { "epoch": 0.5016906880260917, "grad_norm": 0.9527380044077284, "learning_rate": 1.0431795855647513e-05, "loss": 0.7536, "step": 2615 }, { "epoch": 0.5018825391496199, "grad_norm": 0.9030695536139356, "learning_rate": 1.042558674679491e-05, "loss": 0.7983, "step": 2616 }, { "epoch": 0.502074390273148, "grad_norm": 0.9522942034089246, "learning_rate": 1.0419377473563621e-05, "loss": 0.7272, "step": 2617 }, { "epoch": 0.5022662413966762, "grad_norm": 0.7951078305079122, "learning_rate": 1.0413168038351917e-05, "loss": 0.7403, "step": 2618 }, { "epoch": 0.5024580925202043, "grad_norm": 1.0104338047474573, "learning_rate": 1.0406958443558128e-05, "loss": 0.7682, "step": 2619 }, { "epoch": 0.5026499436437325, "grad_norm": 0.9579494233544191, "learning_rate": 1.0400748691580651e-05, "loss": 0.8138, "step": 2620 }, { "epoch": 0.5028417947672607, "grad_norm": 0.8832553739373431, "learning_rate": 1.0394538784817941e-05, "loss": 0.7358, "step": 2621 }, { "epoch": 0.5030336458907887, "grad_norm": 0.9352380109798809, "learning_rate": 1.0388328725668515e-05, "loss": 0.7477, "step": 2622 }, { "epoch": 0.5032254970143168, "grad_norm": 0.9616108849195253, "learning_rate": 1.0382118516530943e-05, "loss": 0.7211, "step": 2623 }, { "epoch": 0.503417348137845, "grad_norm": 0.8806443368755995, "learning_rate": 1.0375908159803862e-05, "loss": 0.3329, "step": 2624 }, { "epoch": 0.5036091992613732, "grad_norm": 0.9236455506739407, "learning_rate": 1.0369697657885957e-05, "loss": 0.8447, "step": 2625 }, { "epoch": 0.5038010503849013, "grad_norm": 0.7864857533704991, "learning_rate": 1.036348701317598e-05, "loss": 0.3321, "step": 2626 }, { "epoch": 0.5039929015084295, "grad_norm": 0.8832846985624091, "learning_rate": 1.035727622807272e-05, "loss": 0.7988, "step": 2627 }, { "epoch": 0.5041847526319576, "grad_norm": 0.8461260888111133, "learning_rate": 1.035106530497504e-05, "loss": 0.8197, "step": 2628 }, { "epoch": 0.5043766037554858, "grad_norm": 0.8873714504521436, "learning_rate": 1.0344854246281845e-05, "loss": 0.7734, "step": 2629 }, { "epoch": 0.5045684548790139, "grad_norm": 0.9384117776307704, "learning_rate": 1.0338643054392098e-05, "loss": 0.7942, "step": 2630 }, { "epoch": 0.5047603060025421, "grad_norm": 0.9250736449927939, "learning_rate": 1.0332431731704806e-05, "loss": 0.8332, "step": 2631 }, { "epoch": 0.5049521571260702, "grad_norm": 1.0049138922382215, "learning_rate": 1.0326220280619036e-05, "loss": 0.8135, "step": 2632 }, { "epoch": 0.5051440082495983, "grad_norm": 0.6569779822129485, "learning_rate": 1.0320008703533897e-05, "loss": 0.3295, "step": 2633 }, { "epoch": 0.5053358593731264, "grad_norm": 0.9290633754657347, "learning_rate": 1.0313797002848551e-05, "loss": 0.7842, "step": 2634 }, { "epoch": 0.5055277104966546, "grad_norm": 0.8704616822578104, "learning_rate": 1.0307585180962201e-05, "loss": 0.7717, "step": 2635 }, { "epoch": 0.5057195616201827, "grad_norm": 0.8510345485352698, "learning_rate": 1.0301373240274111e-05, "loss": 0.7807, "step": 2636 }, { "epoch": 0.5059114127437109, "grad_norm": 0.9735786672588188, "learning_rate": 1.0295161183183571e-05, "loss": 0.777, "step": 2637 }, { "epoch": 0.506103263867239, "grad_norm": 0.8529399099798998, "learning_rate": 1.0288949012089936e-05, "loss": 0.7486, "step": 2638 }, { "epoch": 0.5062951149907672, "grad_norm": 0.8727138461646282, "learning_rate": 1.0282736729392598e-05, "loss": 0.7273, "step": 2639 }, { "epoch": 0.5064869661142953, "grad_norm": 1.062102079249283, "learning_rate": 1.0276524337490979e-05, "loss": 0.8042, "step": 2640 }, { "epoch": 0.5066788172378235, "grad_norm": 0.9677562370054686, "learning_rate": 1.027031183878456e-05, "loss": 0.7706, "step": 2641 }, { "epoch": 0.5068706683613516, "grad_norm": 0.8347457333348517, "learning_rate": 1.0264099235672855e-05, "loss": 0.7557, "step": 2642 }, { "epoch": 0.5070625194848797, "grad_norm": 0.9650575253868149, "learning_rate": 1.0257886530555428e-05, "loss": 0.7748, "step": 2643 }, { "epoch": 0.5072543706084078, "grad_norm": 0.8967004558290896, "learning_rate": 1.0251673725831868e-05, "loss": 0.831, "step": 2644 }, { "epoch": 0.507446221731936, "grad_norm": 1.0627999030703268, "learning_rate": 1.024546082390181e-05, "loss": 0.8287, "step": 2645 }, { "epoch": 0.5076380728554641, "grad_norm": 0.8421909236885694, "learning_rate": 1.0239247827164931e-05, "loss": 0.8029, "step": 2646 }, { "epoch": 0.5078299239789923, "grad_norm": 0.9738977864633446, "learning_rate": 1.0233034738020933e-05, "loss": 0.7306, "step": 2647 }, { "epoch": 0.5080217751025204, "grad_norm": 0.9500291973870937, "learning_rate": 1.0226821558869565e-05, "loss": 0.7332, "step": 2648 }, { "epoch": 0.5082136262260486, "grad_norm": 0.9415725019315142, "learning_rate": 1.0220608292110605e-05, "loss": 0.7141, "step": 2649 }, { "epoch": 0.5084054773495768, "grad_norm": 0.8308213554349301, "learning_rate": 1.0214394940143866e-05, "loss": 0.7921, "step": 2650 }, { "epoch": 0.5085973284731049, "grad_norm": 0.9549690509090061, "learning_rate": 1.0208181505369197e-05, "loss": 0.7611, "step": 2651 }, { "epoch": 0.5087891795966331, "grad_norm": 0.9805959313954081, "learning_rate": 1.0201967990186468e-05, "loss": 0.8247, "step": 2652 }, { "epoch": 0.5089810307201612, "grad_norm": 1.0702096292333074, "learning_rate": 1.01957543969956e-05, "loss": 0.7771, "step": 2653 }, { "epoch": 0.5091728818436893, "grad_norm": 1.0102962875651553, "learning_rate": 1.0189540728196521e-05, "loss": 0.7631, "step": 2654 }, { "epoch": 0.5093647329672174, "grad_norm": 0.707412328970801, "learning_rate": 1.0183326986189204e-05, "loss": 0.3081, "step": 2655 }, { "epoch": 0.5095565840907456, "grad_norm": 0.9116431162598988, "learning_rate": 1.0177113173373645e-05, "loss": 0.8249, "step": 2656 }, { "epoch": 0.5097484352142737, "grad_norm": 0.8924289375163117, "learning_rate": 1.0170899292149868e-05, "loss": 0.7665, "step": 2657 }, { "epoch": 0.5099402863378019, "grad_norm": 0.8697872155772257, "learning_rate": 1.0164685344917923e-05, "loss": 0.7716, "step": 2658 }, { "epoch": 0.51013213746133, "grad_norm": 0.9887154136645314, "learning_rate": 1.0158471334077889e-05, "loss": 0.7038, "step": 2659 }, { "epoch": 0.5103239885848582, "grad_norm": 0.8841974102311807, "learning_rate": 1.0152257262029866e-05, "loss": 0.7794, "step": 2660 }, { "epoch": 0.5105158397083863, "grad_norm": 0.8752671065639877, "learning_rate": 1.0146043131173972e-05, "loss": 0.7761, "step": 2661 }, { "epoch": 0.5107076908319145, "grad_norm": 0.9589285789873054, "learning_rate": 1.0139828943910358e-05, "loss": 0.8289, "step": 2662 }, { "epoch": 0.5108995419554426, "grad_norm": 0.8892044304251289, "learning_rate": 1.0133614702639194e-05, "loss": 0.7442, "step": 2663 }, { "epoch": 0.5110913930789707, "grad_norm": 0.9424549707630481, "learning_rate": 1.0127400409760664e-05, "loss": 0.803, "step": 2664 }, { "epoch": 0.5112832442024988, "grad_norm": 1.0974692264778791, "learning_rate": 1.0121186067674985e-05, "loss": 0.8209, "step": 2665 }, { "epoch": 0.511475095326027, "grad_norm": 0.836958474508117, "learning_rate": 1.0114971678782381e-05, "loss": 0.8708, "step": 2666 }, { "epoch": 0.5116669464495551, "grad_norm": 0.875674186242219, "learning_rate": 1.01087572454831e-05, "loss": 0.7914, "step": 2667 }, { "epoch": 0.5118587975730833, "grad_norm": 0.8748250820883998, "learning_rate": 1.0102542770177404e-05, "loss": 0.7581, "step": 2668 }, { "epoch": 0.5120506486966114, "grad_norm": 0.9831089269594985, "learning_rate": 1.0096328255265573e-05, "loss": 0.808, "step": 2669 }, { "epoch": 0.5122424998201396, "grad_norm": 0.9224629839551967, "learning_rate": 1.0090113703147903e-05, "loss": 0.8074, "step": 2670 }, { "epoch": 0.5124343509436677, "grad_norm": 0.9183516049071706, "learning_rate": 1.0083899116224702e-05, "loss": 0.7777, "step": 2671 }, { "epoch": 0.5126262020671959, "grad_norm": 0.8521860072900299, "learning_rate": 1.0077684496896296e-05, "loss": 0.83, "step": 2672 }, { "epoch": 0.512818053190724, "grad_norm": 0.9306412435804811, "learning_rate": 1.0071469847563017e-05, "loss": 0.7539, "step": 2673 }, { "epoch": 0.5130099043142521, "grad_norm": 1.0187830004787088, "learning_rate": 1.0065255170625216e-05, "loss": 0.8226, "step": 2674 }, { "epoch": 0.5132017554377802, "grad_norm": 0.8904343517837598, "learning_rate": 1.0059040468483248e-05, "loss": 0.8437, "step": 2675 }, { "epoch": 0.5133936065613084, "grad_norm": 1.0361641427038868, "learning_rate": 1.0052825743537478e-05, "loss": 0.7579, "step": 2676 }, { "epoch": 0.5135854576848365, "grad_norm": 0.9030577174309609, "learning_rate": 1.004661099818829e-05, "loss": 0.7987, "step": 2677 }, { "epoch": 0.5137773088083647, "grad_norm": 0.9243813356529595, "learning_rate": 1.0040396234836058e-05, "loss": 0.8276, "step": 2678 }, { "epoch": 0.5139691599318928, "grad_norm": 0.9954397262824876, "learning_rate": 1.003418145588118e-05, "loss": 0.8544, "step": 2679 }, { "epoch": 0.514161011055421, "grad_norm": 1.0111955158489128, "learning_rate": 1.0027966663724053e-05, "loss": 0.7585, "step": 2680 }, { "epoch": 0.5143528621789492, "grad_norm": 0.862325425823517, "learning_rate": 1.0021751860765078e-05, "loss": 0.8549, "step": 2681 }, { "epoch": 0.5145447133024773, "grad_norm": 0.9538393312748105, "learning_rate": 1.001553704940466e-05, "loss": 0.8181, "step": 2682 }, { "epoch": 0.5147365644260055, "grad_norm": 0.9260926990241053, "learning_rate": 1.000932223204321e-05, "loss": 0.726, "step": 2683 }, { "epoch": 0.5149284155495336, "grad_norm": 1.006595499310225, "learning_rate": 1.0003107411081139e-05, "loss": 0.8009, "step": 2684 }, { "epoch": 0.5151202666730617, "grad_norm": 0.9528718281586366, "learning_rate": 9.996892588918861e-06, "loss": 0.7715, "step": 2685 }, { "epoch": 0.5153121177965898, "grad_norm": 0.9626913512329349, "learning_rate": 9.990677767956792e-06, "loss": 0.8193, "step": 2686 }, { "epoch": 0.515503968920118, "grad_norm": 0.88664453193322, "learning_rate": 9.984462950595341e-06, "loss": 0.8794, "step": 2687 }, { "epoch": 0.5156958200436461, "grad_norm": 0.988809721526057, "learning_rate": 9.978248139234925e-06, "loss": 0.8708, "step": 2688 }, { "epoch": 0.5158876711671743, "grad_norm": 0.94260829483042, "learning_rate": 9.97203333627595e-06, "loss": 0.7943, "step": 2689 }, { "epoch": 0.5160795222907024, "grad_norm": 0.9168137916974638, "learning_rate": 9.96581854411882e-06, "loss": 0.8468, "step": 2690 }, { "epoch": 0.5162713734142306, "grad_norm": 0.9396932113418981, "learning_rate": 9.959603765163944e-06, "loss": 0.8056, "step": 2691 }, { "epoch": 0.5164632245377587, "grad_norm": 0.9743315164274721, "learning_rate": 9.953389001811716e-06, "loss": 0.7505, "step": 2692 }, { "epoch": 0.5166550756612869, "grad_norm": 1.063856287550626, "learning_rate": 9.947174256462526e-06, "loss": 0.8279, "step": 2693 }, { "epoch": 0.516846926784815, "grad_norm": 1.0441344889548894, "learning_rate": 9.940959531516757e-06, "loss": 0.7756, "step": 2694 }, { "epoch": 0.5170387779083431, "grad_norm": 1.070584200512275, "learning_rate": 9.934744829374787e-06, "loss": 0.7931, "step": 2695 }, { "epoch": 0.5172306290318712, "grad_norm": 1.001234809677614, "learning_rate": 9.928530152436984e-06, "loss": 0.7989, "step": 2696 }, { "epoch": 0.5174224801553994, "grad_norm": 0.8819457893890098, "learning_rate": 9.922315503103707e-06, "loss": 0.8029, "step": 2697 }, { "epoch": 0.5176143312789275, "grad_norm": 0.8863514959945281, "learning_rate": 9.916100883775302e-06, "loss": 0.7862, "step": 2698 }, { "epoch": 0.5178061824024557, "grad_norm": 0.8994648141656919, "learning_rate": 9.909886296852103e-06, "loss": 0.7796, "step": 2699 }, { "epoch": 0.5179980335259838, "grad_norm": 1.0338264783504592, "learning_rate": 9.903671744734429e-06, "loss": 0.831, "step": 2700 }, { "epoch": 0.518189884649512, "grad_norm": 0.9784793726573869, "learning_rate": 9.897457229822597e-06, "loss": 0.7599, "step": 2701 }, { "epoch": 0.5183817357730401, "grad_norm": 0.9144842329999032, "learning_rate": 9.891242754516903e-06, "loss": 0.8041, "step": 2702 }, { "epoch": 0.5185735868965683, "grad_norm": 1.077669000270288, "learning_rate": 9.88502832121762e-06, "loss": 0.8013, "step": 2703 }, { "epoch": 0.5187654380200964, "grad_norm": 0.8515075322173373, "learning_rate": 9.878813932325016e-06, "loss": 0.7844, "step": 2704 }, { "epoch": 0.5189572891436246, "grad_norm": 1.0443424191806034, "learning_rate": 9.872599590239337e-06, "loss": 0.7635, "step": 2705 }, { "epoch": 0.5191491402671526, "grad_norm": 1.013587594204505, "learning_rate": 9.86638529736081e-06, "loss": 0.8406, "step": 2706 }, { "epoch": 0.5193409913906808, "grad_norm": 1.0059254431587246, "learning_rate": 9.860171056089646e-06, "loss": 0.7047, "step": 2707 }, { "epoch": 0.519532842514209, "grad_norm": 0.9353208578198793, "learning_rate": 9.853956868826033e-06, "loss": 0.7888, "step": 2708 }, { "epoch": 0.5197246936377371, "grad_norm": 0.9480505301639789, "learning_rate": 9.847742737970139e-06, "loss": 0.7721, "step": 2709 }, { "epoch": 0.5199165447612653, "grad_norm": 0.860382302515147, "learning_rate": 9.841528665922113e-06, "loss": 0.8601, "step": 2710 }, { "epoch": 0.5201083958847934, "grad_norm": 0.8808743229070503, "learning_rate": 9.835314655082078e-06, "loss": 0.7861, "step": 2711 }, { "epoch": 0.5203002470083216, "grad_norm": 1.0629333728644388, "learning_rate": 9.829100707850134e-06, "loss": 0.7313, "step": 2712 }, { "epoch": 0.5204920981318497, "grad_norm": 1.047002335921625, "learning_rate": 9.822886826626359e-06, "loss": 0.8425, "step": 2713 }, { "epoch": 0.5206839492553779, "grad_norm": 0.8542381903492118, "learning_rate": 9.816673013810798e-06, "loss": 0.8204, "step": 2714 }, { "epoch": 0.520875800378906, "grad_norm": 0.9257551643803855, "learning_rate": 9.81045927180348e-06, "loss": 0.7565, "step": 2715 }, { "epoch": 0.5210676515024341, "grad_norm": 0.8816466255318163, "learning_rate": 9.804245603004403e-06, "loss": 0.7882, "step": 2716 }, { "epoch": 0.5212595026259622, "grad_norm": 0.9959226057115165, "learning_rate": 9.798032009813533e-06, "loss": 0.7968, "step": 2717 }, { "epoch": 0.5214513537494904, "grad_norm": 0.7113214202347383, "learning_rate": 9.79181849463081e-06, "loss": 0.3354, "step": 2718 }, { "epoch": 0.5216432048730185, "grad_norm": 1.0173188761224436, "learning_rate": 9.785605059856134e-06, "loss": 0.7261, "step": 2719 }, { "epoch": 0.5218350559965467, "grad_norm": 1.1526442461175677, "learning_rate": 9.779391707889397e-06, "loss": 0.8321, "step": 2720 }, { "epoch": 0.5220269071200748, "grad_norm": 1.0271327279788003, "learning_rate": 9.773178441130437e-06, "loss": 0.7697, "step": 2721 }, { "epoch": 0.522218758243603, "grad_norm": 0.9633517405453311, "learning_rate": 9.766965261979072e-06, "loss": 0.7017, "step": 2722 }, { "epoch": 0.5224106093671311, "grad_norm": 1.0173588337733215, "learning_rate": 9.760752172835076e-06, "loss": 0.7874, "step": 2723 }, { "epoch": 0.5226024604906593, "grad_norm": 0.8247554999720509, "learning_rate": 9.754539176098192e-06, "loss": 0.7361, "step": 2724 }, { "epoch": 0.5227943116141874, "grad_norm": 0.9367073663772533, "learning_rate": 9.748326274168137e-06, "loss": 0.7868, "step": 2725 }, { "epoch": 0.5229861627377156, "grad_norm": 0.970887011229685, "learning_rate": 9.742113469444577e-06, "loss": 0.8615, "step": 2726 }, { "epoch": 0.5231780138612436, "grad_norm": 0.9881937543614701, "learning_rate": 9.735900764327147e-06, "loss": 0.8064, "step": 2727 }, { "epoch": 0.5233698649847718, "grad_norm": 0.8961473572246715, "learning_rate": 9.729688161215445e-06, "loss": 0.8028, "step": 2728 }, { "epoch": 0.5235617161082999, "grad_norm": 0.8292081384720851, "learning_rate": 9.723475662509023e-06, "loss": 0.8243, "step": 2729 }, { "epoch": 0.5237535672318281, "grad_norm": 0.8673307518533928, "learning_rate": 9.717263270607406e-06, "loss": 0.7409, "step": 2730 }, { "epoch": 0.5239454183553562, "grad_norm": 1.1211470232101417, "learning_rate": 9.711050987910065e-06, "loss": 0.6748, "step": 2731 }, { "epoch": 0.5241372694788844, "grad_norm": 0.8422926024055768, "learning_rate": 9.70483881681643e-06, "loss": 0.811, "step": 2732 }, { "epoch": 0.5243291206024125, "grad_norm": 0.9265894059795668, "learning_rate": 9.698626759725895e-06, "loss": 0.7563, "step": 2733 }, { "epoch": 0.5245209717259407, "grad_norm": 0.8565869535606533, "learning_rate": 9.692414819037799e-06, "loss": 0.7718, "step": 2734 }, { "epoch": 0.5247128228494689, "grad_norm": 0.8816696310002354, "learning_rate": 9.686202997151452e-06, "loss": 0.8304, "step": 2735 }, { "epoch": 0.524904673972997, "grad_norm": 0.9391570957688468, "learning_rate": 9.679991296466106e-06, "loss": 0.7988, "step": 2736 }, { "epoch": 0.525096525096525, "grad_norm": 0.8127104502351838, "learning_rate": 9.673779719380967e-06, "loss": 0.7369, "step": 2737 }, { "epoch": 0.5252883762200532, "grad_norm": 0.8668574201355018, "learning_rate": 9.667568268295197e-06, "loss": 0.8324, "step": 2738 }, { "epoch": 0.5254802273435814, "grad_norm": 0.9101717789075285, "learning_rate": 9.661356945607904e-06, "loss": 0.7696, "step": 2739 }, { "epoch": 0.5256720784671095, "grad_norm": 0.9245577525267695, "learning_rate": 9.655145753718156e-06, "loss": 0.7408, "step": 2740 }, { "epoch": 0.5258639295906377, "grad_norm": 0.8631948482270653, "learning_rate": 9.648934695024963e-06, "loss": 0.7408, "step": 2741 }, { "epoch": 0.5260557807141658, "grad_norm": 0.9862153518420792, "learning_rate": 9.642723771927284e-06, "loss": 0.7877, "step": 2742 }, { "epoch": 0.526247631837694, "grad_norm": 0.8720056790894981, "learning_rate": 9.636512986824022e-06, "loss": 0.7298, "step": 2743 }, { "epoch": 0.5264394829612221, "grad_norm": 0.8686300945103707, "learning_rate": 9.630302342114045e-06, "loss": 0.7672, "step": 2744 }, { "epoch": 0.5266313340847503, "grad_norm": 1.040816327451329, "learning_rate": 9.624091840196141e-06, "loss": 0.7883, "step": 2745 }, { "epoch": 0.5268231852082784, "grad_norm": 0.7988871186755419, "learning_rate": 9.61788148346906e-06, "loss": 0.6915, "step": 2746 }, { "epoch": 0.5270150363318065, "grad_norm": 0.938616217021399, "learning_rate": 9.61167127433149e-06, "loss": 0.827, "step": 2747 }, { "epoch": 0.5272068874553346, "grad_norm": 0.9568029156533372, "learning_rate": 9.605461215182059e-06, "loss": 0.8073, "step": 2748 }, { "epoch": 0.5273987385788628, "grad_norm": 0.9308841159106792, "learning_rate": 9.59925130841935e-06, "loss": 0.771, "step": 2749 }, { "epoch": 0.5275905897023909, "grad_norm": 1.0029359578691648, "learning_rate": 9.593041556441874e-06, "loss": 0.7983, "step": 2750 }, { "epoch": 0.5277824408259191, "grad_norm": 0.9133255412490716, "learning_rate": 9.586831961648087e-06, "loss": 0.7765, "step": 2751 }, { "epoch": 0.5279742919494472, "grad_norm": 0.8259650789801666, "learning_rate": 9.580622526436382e-06, "loss": 0.7707, "step": 2752 }, { "epoch": 0.5281661430729754, "grad_norm": 0.9516867674031698, "learning_rate": 9.57441325320509e-06, "loss": 0.7587, "step": 2753 }, { "epoch": 0.5283579941965035, "grad_norm": 0.8984031024300566, "learning_rate": 9.568204144352488e-06, "loss": 0.8042, "step": 2754 }, { "epoch": 0.5285498453200317, "grad_norm": 0.9660040844907455, "learning_rate": 9.561995202276781e-06, "loss": 0.8224, "step": 2755 }, { "epoch": 0.5287416964435598, "grad_norm": 1.0169372314608343, "learning_rate": 9.555786429376111e-06, "loss": 0.7945, "step": 2756 }, { "epoch": 0.528933547567088, "grad_norm": 0.6647491070282907, "learning_rate": 9.549577828048553e-06, "loss": 0.3238, "step": 2757 }, { "epoch": 0.529125398690616, "grad_norm": 0.9007463725334591, "learning_rate": 9.543369400692118e-06, "loss": 0.8347, "step": 2758 }, { "epoch": 0.5293172498141442, "grad_norm": 0.9558774175715178, "learning_rate": 9.537161149704756e-06, "loss": 0.8199, "step": 2759 }, { "epoch": 0.5295091009376723, "grad_norm": 0.9368083036994069, "learning_rate": 9.530953077484337e-06, "loss": 0.7609, "step": 2760 }, { "epoch": 0.5297009520612005, "grad_norm": 1.047800952217349, "learning_rate": 9.52474518642867e-06, "loss": 0.8543, "step": 2761 }, { "epoch": 0.5298928031847286, "grad_norm": 0.9340742313149175, "learning_rate": 9.51853747893549e-06, "loss": 0.7658, "step": 2762 }, { "epoch": 0.5300846543082568, "grad_norm": 0.9515955928867161, "learning_rate": 9.512329957402461e-06, "loss": 0.82, "step": 2763 }, { "epoch": 0.530276505431785, "grad_norm": 0.8953720680897773, "learning_rate": 9.506122624227184e-06, "loss": 0.7536, "step": 2764 }, { "epoch": 0.5304683565553131, "grad_norm": 0.8615813181706489, "learning_rate": 9.499915481807177e-06, "loss": 0.8337, "step": 2765 }, { "epoch": 0.5306602076788413, "grad_norm": 0.9834149153985678, "learning_rate": 9.493708532539886e-06, "loss": 0.8487, "step": 2766 }, { "epoch": 0.5308520588023694, "grad_norm": 0.9315566812741642, "learning_rate": 9.487501778822685e-06, "loss": 0.7696, "step": 2767 }, { "epoch": 0.5310439099258975, "grad_norm": 0.8781087471474932, "learning_rate": 9.481295223052869e-06, "loss": 0.799, "step": 2768 }, { "epoch": 0.5312357610494256, "grad_norm": 0.8566398634442628, "learning_rate": 9.475088867627663e-06, "loss": 0.7762, "step": 2769 }, { "epoch": 0.5314276121729538, "grad_norm": 0.8678330464542824, "learning_rate": 9.46888271494421e-06, "loss": 0.7541, "step": 2770 }, { "epoch": 0.5316194632964819, "grad_norm": 0.6701739767165201, "learning_rate": 9.462676767399573e-06, "loss": 0.3252, "step": 2771 }, { "epoch": 0.5318113144200101, "grad_norm": 0.9156974363530349, "learning_rate": 9.45647102739074e-06, "loss": 0.7991, "step": 2772 }, { "epoch": 0.5320031655435382, "grad_norm": 0.9478041680660129, "learning_rate": 9.450265497314615e-06, "loss": 0.8251, "step": 2773 }, { "epoch": 0.5321950166670664, "grad_norm": 0.9842648218258434, "learning_rate": 9.444060179568026e-06, "loss": 0.7506, "step": 2774 }, { "epoch": 0.5323868677905945, "grad_norm": 1.0375119363898304, "learning_rate": 9.437855076547713e-06, "loss": 0.7548, "step": 2775 }, { "epoch": 0.5325787189141227, "grad_norm": 1.0137391159966533, "learning_rate": 9.431650190650336e-06, "loss": 0.7766, "step": 2776 }, { "epoch": 0.5327705700376508, "grad_norm": 0.9203739248616742, "learning_rate": 9.425445524272474e-06, "loss": 0.8497, "step": 2777 }, { "epoch": 0.532962421161179, "grad_norm": 0.8199126534533842, "learning_rate": 9.419241079810609e-06, "loss": 0.7539, "step": 2778 }, { "epoch": 0.533154272284707, "grad_norm": 0.866816137492599, "learning_rate": 9.41303685966116e-06, "loss": 0.7386, "step": 2779 }, { "epoch": 0.5333461234082352, "grad_norm": 1.063255707360014, "learning_rate": 9.40683286622044e-06, "loss": 0.7936, "step": 2780 }, { "epoch": 0.5335379745317633, "grad_norm": 0.904851496822895, "learning_rate": 9.400629101884681e-06, "loss": 0.7565, "step": 2781 }, { "epoch": 0.5337298256552915, "grad_norm": 0.868375928235535, "learning_rate": 9.394425569050018e-06, "loss": 0.8145, "step": 2782 }, { "epoch": 0.5339216767788196, "grad_norm": 0.9050071465438458, "learning_rate": 9.388222270112515e-06, "loss": 0.7971, "step": 2783 }, { "epoch": 0.5341135279023478, "grad_norm": 0.9620735208091423, "learning_rate": 9.382019207468131e-06, "loss": 0.8006, "step": 2784 }, { "epoch": 0.5343053790258759, "grad_norm": 1.0587077800840832, "learning_rate": 9.375816383512739e-06, "loss": 0.8235, "step": 2785 }, { "epoch": 0.5344972301494041, "grad_norm": 0.9609083544614371, "learning_rate": 9.369613800642118e-06, "loss": 0.795, "step": 2786 }, { "epoch": 0.5346890812729322, "grad_norm": 0.8718132932431697, "learning_rate": 9.363411461251952e-06, "loss": 0.7693, "step": 2787 }, { "epoch": 0.5348809323964604, "grad_norm": 0.7104882729030234, "learning_rate": 9.357209367737837e-06, "loss": 0.3158, "step": 2788 }, { "epoch": 0.5350727835199884, "grad_norm": 0.8670143580834353, "learning_rate": 9.351007522495269e-06, "loss": 0.7993, "step": 2789 }, { "epoch": 0.5352646346435166, "grad_norm": 0.9427140201677168, "learning_rate": 9.34480592791965e-06, "loss": 0.8538, "step": 2790 }, { "epoch": 0.5354564857670447, "grad_norm": 0.9790275750085707, "learning_rate": 9.338604586406283e-06, "loss": 0.812, "step": 2791 }, { "epoch": 0.5356483368905729, "grad_norm": 1.0008885571003203, "learning_rate": 9.332403500350374e-06, "loss": 0.7972, "step": 2792 }, { "epoch": 0.535840188014101, "grad_norm": 0.8395405557012073, "learning_rate": 9.326202672147037e-06, "loss": 0.7586, "step": 2793 }, { "epoch": 0.5360320391376292, "grad_norm": 0.9647482627833007, "learning_rate": 9.320002104191278e-06, "loss": 0.7397, "step": 2794 }, { "epoch": 0.5362238902611574, "grad_norm": 0.8933081126511769, "learning_rate": 9.313801798878008e-06, "loss": 0.8141, "step": 2795 }, { "epoch": 0.5364157413846855, "grad_norm": 0.9491566410438889, "learning_rate": 9.307601758602028e-06, "loss": 0.7579, "step": 2796 }, { "epoch": 0.5366075925082137, "grad_norm": 1.0158984715674255, "learning_rate": 9.30140198575804e-06, "loss": 0.8033, "step": 2797 }, { "epoch": 0.5367994436317418, "grad_norm": 1.0924039078327243, "learning_rate": 9.295202482740656e-06, "loss": 0.8098, "step": 2798 }, { "epoch": 0.53699129475527, "grad_norm": 0.9835428855470322, "learning_rate": 9.289003251944367e-06, "loss": 0.755, "step": 2799 }, { "epoch": 0.537183145878798, "grad_norm": 0.9351849131123314, "learning_rate": 9.282804295763564e-06, "loss": 0.7529, "step": 2800 }, { "epoch": 0.5373749970023262, "grad_norm": 0.9331464229262019, "learning_rate": 9.276605616592535e-06, "loss": 0.7645, "step": 2801 }, { "epoch": 0.5375668481258543, "grad_norm": 0.9108265159672521, "learning_rate": 9.270407216825455e-06, "loss": 0.7487, "step": 2802 }, { "epoch": 0.5377586992493825, "grad_norm": 0.9371476457652066, "learning_rate": 9.2642090988564e-06, "loss": 0.814, "step": 2803 }, { "epoch": 0.5379505503729106, "grad_norm": 0.6520007911573343, "learning_rate": 9.25801126507933e-06, "loss": 0.3474, "step": 2804 }, { "epoch": 0.5381424014964388, "grad_norm": 0.8816910899143285, "learning_rate": 9.251813717888094e-06, "loss": 0.7328, "step": 2805 }, { "epoch": 0.5383342526199669, "grad_norm": 0.8686786855225311, "learning_rate": 9.24561645967644e-06, "loss": 0.8238, "step": 2806 }, { "epoch": 0.5385261037434951, "grad_norm": 0.9037502539772884, "learning_rate": 9.239419492837986e-06, "loss": 0.7448, "step": 2807 }, { "epoch": 0.5387179548670232, "grad_norm": 0.9703512386815079, "learning_rate": 9.233222819766267e-06, "loss": 0.7708, "step": 2808 }, { "epoch": 0.5389098059905514, "grad_norm": 0.9938997216428477, "learning_rate": 9.227026442854674e-06, "loss": 0.8285, "step": 2809 }, { "epoch": 0.5391016571140794, "grad_norm": 0.6548789158481714, "learning_rate": 9.220830364496499e-06, "loss": 0.3109, "step": 2810 }, { "epoch": 0.5392935082376076, "grad_norm": 0.8831963095480878, "learning_rate": 9.214634587084918e-06, "loss": 0.7083, "step": 2811 }, { "epoch": 0.5394853593611357, "grad_norm": 0.9785108707336979, "learning_rate": 9.208439113012984e-06, "loss": 0.7712, "step": 2812 }, { "epoch": 0.5396772104846639, "grad_norm": 0.9616146806625306, "learning_rate": 9.202243944673646e-06, "loss": 0.7838, "step": 2813 }, { "epoch": 0.539869061608192, "grad_norm": 0.8998197682120522, "learning_rate": 9.196049084459722e-06, "loss": 0.7467, "step": 2814 }, { "epoch": 0.5400609127317202, "grad_norm": 0.9293805881896106, "learning_rate": 9.189854534763916e-06, "loss": 0.8417, "step": 2815 }, { "epoch": 0.5402527638552483, "grad_norm": 0.793858133948736, "learning_rate": 9.183660297978812e-06, "loss": 0.8072, "step": 2816 }, { "epoch": 0.5404446149787765, "grad_norm": 0.9842341960563407, "learning_rate": 9.177466376496872e-06, "loss": 0.8373, "step": 2817 }, { "epoch": 0.5406364661023046, "grad_norm": 0.8720496139996997, "learning_rate": 9.17127277271044e-06, "loss": 0.7158, "step": 2818 }, { "epoch": 0.5408283172258328, "grad_norm": 0.9654466699788401, "learning_rate": 9.165079489011732e-06, "loss": 0.7875, "step": 2819 }, { "epoch": 0.5410201683493608, "grad_norm": 0.9630888828943269, "learning_rate": 9.158886527792843e-06, "loss": 0.7251, "step": 2820 }, { "epoch": 0.541212019472889, "grad_norm": 0.6244205335849262, "learning_rate": 9.15269389144574e-06, "loss": 0.3219, "step": 2821 }, { "epoch": 0.5414038705964171, "grad_norm": 0.8250570611707712, "learning_rate": 9.146501582362277e-06, "loss": 0.8221, "step": 2822 }, { "epoch": 0.5415957217199453, "grad_norm": 1.0009037579016271, "learning_rate": 9.140309602934165e-06, "loss": 0.8117, "step": 2823 }, { "epoch": 0.5417875728434735, "grad_norm": 0.8059321594911115, "learning_rate": 9.134117955552997e-06, "loss": 0.6663, "step": 2824 }, { "epoch": 0.5419794239670016, "grad_norm": 0.8464969692802665, "learning_rate": 9.127926642610235e-06, "loss": 0.8003, "step": 2825 }, { "epoch": 0.5421712750905298, "grad_norm": 0.9094840263122685, "learning_rate": 9.121735666497207e-06, "loss": 0.7414, "step": 2826 }, { "epoch": 0.5423631262140579, "grad_norm": 0.8546178291534903, "learning_rate": 9.115545029605129e-06, "loss": 0.773, "step": 2827 }, { "epoch": 0.5425549773375861, "grad_norm": 0.907816986668264, "learning_rate": 9.109354734325068e-06, "loss": 0.755, "step": 2828 }, { "epoch": 0.5427468284611142, "grad_norm": 0.8432593208328849, "learning_rate": 9.103164783047963e-06, "loss": 0.7473, "step": 2829 }, { "epoch": 0.5429386795846424, "grad_norm": 0.8880548685950895, "learning_rate": 9.096975178164622e-06, "loss": 0.7797, "step": 2830 }, { "epoch": 0.5431305307081704, "grad_norm": 0.8554573560465046, "learning_rate": 9.090785922065716e-06, "loss": 0.7481, "step": 2831 }, { "epoch": 0.5433223818316986, "grad_norm": 0.903044176152488, "learning_rate": 9.08459701714179e-06, "loss": 0.7898, "step": 2832 }, { "epoch": 0.5435142329552267, "grad_norm": 0.8593836742791526, "learning_rate": 9.078408465783244e-06, "loss": 0.7983, "step": 2833 }, { "epoch": 0.5437060840787549, "grad_norm": 1.0148517060984035, "learning_rate": 9.07222027038035e-06, "loss": 0.6666, "step": 2834 }, { "epoch": 0.543897935202283, "grad_norm": 1.0700710401740272, "learning_rate": 9.066032433323228e-06, "loss": 0.7456, "step": 2835 }, { "epoch": 0.5440897863258112, "grad_norm": 0.9140397648309331, "learning_rate": 9.059844957001876e-06, "loss": 0.8074, "step": 2836 }, { "epoch": 0.5442816374493393, "grad_norm": 0.8680518177478176, "learning_rate": 9.053657843806145e-06, "loss": 0.8282, "step": 2837 }, { "epoch": 0.5444734885728675, "grad_norm": 0.9044112103894274, "learning_rate": 9.047471096125746e-06, "loss": 0.7949, "step": 2838 }, { "epoch": 0.5446653396963956, "grad_norm": 0.8705336495803605, "learning_rate": 9.04128471635025e-06, "loss": 0.7912, "step": 2839 }, { "epoch": 0.5448571908199238, "grad_norm": 0.9003573110212638, "learning_rate": 9.035098706869082e-06, "loss": 0.8502, "step": 2840 }, { "epoch": 0.5450490419434518, "grad_norm": 0.8859763680700047, "learning_rate": 9.028913070071527e-06, "loss": 0.7921, "step": 2841 }, { "epoch": 0.54524089306698, "grad_norm": 1.0209784693031934, "learning_rate": 9.022727808346731e-06, "loss": 0.7491, "step": 2842 }, { "epoch": 0.5454327441905081, "grad_norm": 1.092467756521133, "learning_rate": 9.01654292408369e-06, "loss": 0.7276, "step": 2843 }, { "epoch": 0.5456245953140363, "grad_norm": 0.6663550051699159, "learning_rate": 9.010358419671252e-06, "loss": 0.2927, "step": 2844 }, { "epoch": 0.5458164464375644, "grad_norm": 1.1441788905946424, "learning_rate": 9.004174297498122e-06, "loss": 0.7634, "step": 2845 }, { "epoch": 0.5460082975610926, "grad_norm": 0.9999615271836322, "learning_rate": 8.997990559952855e-06, "loss": 0.7819, "step": 2846 }, { "epoch": 0.5462001486846207, "grad_norm": 0.8518850622171086, "learning_rate": 8.991807209423858e-06, "loss": 0.8098, "step": 2847 }, { "epoch": 0.5463919998081489, "grad_norm": 0.9400221877392888, "learning_rate": 8.985624248299392e-06, "loss": 0.7177, "step": 2848 }, { "epoch": 0.546583850931677, "grad_norm": 0.8653914518645651, "learning_rate": 8.979441678967565e-06, "loss": 0.7615, "step": 2849 }, { "epoch": 0.5467757020552052, "grad_norm": 0.9304796170244753, "learning_rate": 8.973259503816333e-06, "loss": 0.8267, "step": 2850 }, { "epoch": 0.5469675531787334, "grad_norm": 0.9486908728311266, "learning_rate": 8.967077725233494e-06, "loss": 0.8275, "step": 2851 }, { "epoch": 0.5471594043022614, "grad_norm": 1.0569307053898416, "learning_rate": 8.960896345606708e-06, "loss": 0.7937, "step": 2852 }, { "epoch": 0.5473512554257896, "grad_norm": 0.8937333202089487, "learning_rate": 8.954715367323468e-06, "loss": 0.8093, "step": 2853 }, { "epoch": 0.5475431065493177, "grad_norm": 1.042513148088481, "learning_rate": 8.948534792771116e-06, "loss": 0.8071, "step": 2854 }, { "epoch": 0.5477349576728459, "grad_norm": 0.9559475693027952, "learning_rate": 8.942354624336837e-06, "loss": 0.8227, "step": 2855 }, { "epoch": 0.547926808796374, "grad_norm": 0.9352803752778485, "learning_rate": 8.93617486440766e-06, "loss": 0.7866, "step": 2856 }, { "epoch": 0.5481186599199022, "grad_norm": 0.785343086658244, "learning_rate": 8.92999551537046e-06, "loss": 0.7924, "step": 2857 }, { "epoch": 0.5483105110434303, "grad_norm": 0.872486730181275, "learning_rate": 8.923816579611947e-06, "loss": 0.8164, "step": 2858 }, { "epoch": 0.5485023621669585, "grad_norm": 0.8859038924519255, "learning_rate": 8.917638059518677e-06, "loss": 0.8668, "step": 2859 }, { "epoch": 0.5486942132904866, "grad_norm": 0.894195579846685, "learning_rate": 8.911459957477035e-06, "loss": 0.8203, "step": 2860 }, { "epoch": 0.5488860644140148, "grad_norm": 0.9531207598320045, "learning_rate": 8.905282275873259e-06, "loss": 0.8246, "step": 2861 }, { "epoch": 0.5490779155375428, "grad_norm": 0.9660166121295988, "learning_rate": 8.899105017093418e-06, "loss": 0.8338, "step": 2862 }, { "epoch": 0.549269766661071, "grad_norm": 0.8897208962223918, "learning_rate": 8.892928183523412e-06, "loss": 0.8091, "step": 2863 }, { "epoch": 0.5494616177845991, "grad_norm": 0.9938044833399118, "learning_rate": 8.886751777548988e-06, "loss": 0.8032, "step": 2864 }, { "epoch": 0.5496534689081273, "grad_norm": 0.831109140539632, "learning_rate": 8.880575801555715e-06, "loss": 0.7681, "step": 2865 }, { "epoch": 0.5498453200316554, "grad_norm": 1.0476761974177575, "learning_rate": 8.87440025792901e-06, "loss": 0.8523, "step": 2866 }, { "epoch": 0.5500371711551836, "grad_norm": 0.8517782969984445, "learning_rate": 8.868225149054112e-06, "loss": 0.8462, "step": 2867 }, { "epoch": 0.5502290222787117, "grad_norm": 0.9855585936072686, "learning_rate": 8.862050477316096e-06, "loss": 0.6816, "step": 2868 }, { "epoch": 0.5504208734022399, "grad_norm": 0.69113878523221, "learning_rate": 8.85587624509987e-06, "loss": 0.3064, "step": 2869 }, { "epoch": 0.550612724525768, "grad_norm": 0.843870584604629, "learning_rate": 8.849702454790163e-06, "loss": 0.7939, "step": 2870 }, { "epoch": 0.5508045756492962, "grad_norm": 0.9929150339705245, "learning_rate": 8.843529108771553e-06, "loss": 0.8401, "step": 2871 }, { "epoch": 0.5509964267728243, "grad_norm": 0.9357040651803142, "learning_rate": 8.837356209428428e-06, "loss": 0.7496, "step": 2872 }, { "epoch": 0.5511882778963524, "grad_norm": 1.061503114644708, "learning_rate": 8.83118375914501e-06, "loss": 0.701, "step": 2873 }, { "epoch": 0.5513801290198805, "grad_norm": 1.1688323093038175, "learning_rate": 8.825011760305345e-06, "loss": 0.7499, "step": 2874 }, { "epoch": 0.5515719801434087, "grad_norm": 0.943714237214012, "learning_rate": 8.818840215293304e-06, "loss": 0.8117, "step": 2875 }, { "epoch": 0.5517638312669368, "grad_norm": 0.9551109561370014, "learning_rate": 8.812669126492593e-06, "loss": 0.786, "step": 2876 }, { "epoch": 0.551955682390465, "grad_norm": 0.8744701731868323, "learning_rate": 8.806498496286732e-06, "loss": 0.8204, "step": 2877 }, { "epoch": 0.5521475335139931, "grad_norm": 0.9579155838770494, "learning_rate": 8.800328327059065e-06, "loss": 0.7348, "step": 2878 }, { "epoch": 0.5523393846375213, "grad_norm": 0.9001461256245802, "learning_rate": 8.794158621192757e-06, "loss": 0.788, "step": 2879 }, { "epoch": 0.5525312357610495, "grad_norm": 1.066765600992486, "learning_rate": 8.787989381070798e-06, "loss": 0.7774, "step": 2880 }, { "epoch": 0.5527230868845776, "grad_norm": 0.9959019336435556, "learning_rate": 8.781820609075998e-06, "loss": 0.8117, "step": 2881 }, { "epoch": 0.5529149380081058, "grad_norm": 0.9160871129436619, "learning_rate": 8.775652307590982e-06, "loss": 0.7972, "step": 2882 }, { "epoch": 0.5531067891316338, "grad_norm": 0.8605669901376307, "learning_rate": 8.769484478998198e-06, "loss": 0.7645, "step": 2883 }, { "epoch": 0.553298640255162, "grad_norm": 1.031896169228868, "learning_rate": 8.763317125679907e-06, "loss": 0.7785, "step": 2884 }, { "epoch": 0.5534904913786901, "grad_norm": 1.0844424222762836, "learning_rate": 8.757150250018183e-06, "loss": 0.7394, "step": 2885 }, { "epoch": 0.5536823425022183, "grad_norm": 0.9967701650686702, "learning_rate": 8.750983854394934e-06, "loss": 0.8498, "step": 2886 }, { "epoch": 0.5538741936257464, "grad_norm": 0.9108345155921679, "learning_rate": 8.744817941191862e-06, "loss": 0.7996, "step": 2887 }, { "epoch": 0.5540660447492746, "grad_norm": 0.8750898590258364, "learning_rate": 8.738652512790487e-06, "loss": 0.756, "step": 2888 }, { "epoch": 0.5542578958728027, "grad_norm": 0.941170427371995, "learning_rate": 8.73248757157215e-06, "loss": 0.7839, "step": 2889 }, { "epoch": 0.5544497469963309, "grad_norm": 0.8838805570392082, "learning_rate": 8.726323119917993e-06, "loss": 0.8393, "step": 2890 }, { "epoch": 0.554641598119859, "grad_norm": 0.6498248636429949, "learning_rate": 8.720159160208983e-06, "loss": 0.324, "step": 2891 }, { "epoch": 0.5548334492433872, "grad_norm": 0.8292777636856444, "learning_rate": 8.713995694825887e-06, "loss": 0.7652, "step": 2892 }, { "epoch": 0.5550253003669152, "grad_norm": 1.0126060708226827, "learning_rate": 8.707832726149276e-06, "loss": 0.7479, "step": 2893 }, { "epoch": 0.5552171514904434, "grad_norm": 1.0175363458098456, "learning_rate": 8.701670256559544e-06, "loss": 0.7847, "step": 2894 }, { "epoch": 0.5554090026139715, "grad_norm": 0.9613372544164417, "learning_rate": 8.695508288436877e-06, "loss": 0.7856, "step": 2895 }, { "epoch": 0.5556008537374997, "grad_norm": 1.0504612351339098, "learning_rate": 8.68934682416128e-06, "loss": 0.8372, "step": 2896 }, { "epoch": 0.5557927048610278, "grad_norm": 0.8300648563508627, "learning_rate": 8.683185866112557e-06, "loss": 0.7285, "step": 2897 }, { "epoch": 0.555984555984556, "grad_norm": 1.0213387762108865, "learning_rate": 8.677025416670315e-06, "loss": 0.765, "step": 2898 }, { "epoch": 0.5561764071080841, "grad_norm": 0.9302497247539466, "learning_rate": 8.670865478213966e-06, "loss": 0.7979, "step": 2899 }, { "epoch": 0.5563682582316123, "grad_norm": 0.8237101648161651, "learning_rate": 8.664706053122728e-06, "loss": 0.8367, "step": 2900 }, { "epoch": 0.5565601093551404, "grad_norm": 0.8869001238723005, "learning_rate": 8.65854714377562e-06, "loss": 0.8114, "step": 2901 }, { "epoch": 0.5567519604786686, "grad_norm": 0.9326954171943256, "learning_rate": 8.652388752551458e-06, "loss": 0.7434, "step": 2902 }, { "epoch": 0.5569438116021967, "grad_norm": 0.8711228165315885, "learning_rate": 8.646230881828858e-06, "loss": 0.7839, "step": 2903 }, { "epoch": 0.5571356627257248, "grad_norm": 0.958348035327056, "learning_rate": 8.640073533986236e-06, "loss": 0.7103, "step": 2904 }, { "epoch": 0.5573275138492529, "grad_norm": 0.8310265310548249, "learning_rate": 8.633916711401813e-06, "loss": 0.7299, "step": 2905 }, { "epoch": 0.5575193649727811, "grad_norm": 0.8651111902686416, "learning_rate": 8.627760416453599e-06, "loss": 0.7875, "step": 2906 }, { "epoch": 0.5577112160963092, "grad_norm": 0.9892938769595614, "learning_rate": 8.621604651519398e-06, "loss": 0.8163, "step": 2907 }, { "epoch": 0.5579030672198374, "grad_norm": 0.9099904639625201, "learning_rate": 8.615449418976819e-06, "loss": 0.7537, "step": 2908 }, { "epoch": 0.5580949183433656, "grad_norm": 1.020832427223164, "learning_rate": 8.609294721203255e-06, "loss": 0.7949, "step": 2909 }, { "epoch": 0.5582867694668937, "grad_norm": 0.9462414473432282, "learning_rate": 8.6031405605759e-06, "loss": 0.7915, "step": 2910 }, { "epoch": 0.5584786205904219, "grad_norm": 0.8667719995735174, "learning_rate": 8.596986939471739e-06, "loss": 0.7713, "step": 2911 }, { "epoch": 0.55867047171395, "grad_norm": 0.9002326072063798, "learning_rate": 8.590833860267545e-06, "loss": 0.8093, "step": 2912 }, { "epoch": 0.5588623228374782, "grad_norm": 0.9592349915520704, "learning_rate": 8.584681325339886e-06, "loss": 0.7151, "step": 2913 }, { "epoch": 0.5590541739610062, "grad_norm": 0.8669044580292722, "learning_rate": 8.578529337065115e-06, "loss": 0.7768, "step": 2914 }, { "epoch": 0.5592460250845344, "grad_norm": 1.0823035417636873, "learning_rate": 8.57237789781938e-06, "loss": 0.797, "step": 2915 }, { "epoch": 0.5594378762080625, "grad_norm": 0.9693014166642359, "learning_rate": 8.566227009978616e-06, "loss": 0.8371, "step": 2916 }, { "epoch": 0.5596297273315907, "grad_norm": 0.8462884904975686, "learning_rate": 8.560076675918537e-06, "loss": 0.7206, "step": 2917 }, { "epoch": 0.5598215784551188, "grad_norm": 0.901816252704017, "learning_rate": 8.553926898014651e-06, "loss": 0.8397, "step": 2918 }, { "epoch": 0.560013429578647, "grad_norm": 0.9299895716698582, "learning_rate": 8.547777678642247e-06, "loss": 0.7536, "step": 2919 }, { "epoch": 0.5602052807021751, "grad_norm": 1.2588454931284023, "learning_rate": 8.541629020176407e-06, "loss": 0.7344, "step": 2920 }, { "epoch": 0.5603971318257033, "grad_norm": 0.9089255496500519, "learning_rate": 8.535480924991986e-06, "loss": 0.6999, "step": 2921 }, { "epoch": 0.5605889829492314, "grad_norm": 0.8597857715975257, "learning_rate": 8.529333395463625e-06, "loss": 0.8464, "step": 2922 }, { "epoch": 0.5607808340727596, "grad_norm": 0.9130862340226344, "learning_rate": 8.523186433965748e-06, "loss": 0.742, "step": 2923 }, { "epoch": 0.5609726851962877, "grad_norm": 1.120640618870176, "learning_rate": 8.517040042872553e-06, "loss": 0.7886, "step": 2924 }, { "epoch": 0.5611645363198158, "grad_norm": 1.0589533076994497, "learning_rate": 8.51089422455803e-06, "loss": 0.7591, "step": 2925 }, { "epoch": 0.5613563874433439, "grad_norm": 0.9213065363686382, "learning_rate": 8.504748981395937e-06, "loss": 0.7586, "step": 2926 }, { "epoch": 0.5615482385668721, "grad_norm": 1.02268675024518, "learning_rate": 8.498604315759815e-06, "loss": 0.7192, "step": 2927 }, { "epoch": 0.5617400896904002, "grad_norm": 0.9119021671266796, "learning_rate": 8.49246023002298e-06, "loss": 0.8251, "step": 2928 }, { "epoch": 0.5619319408139284, "grad_norm": 0.8496575601882785, "learning_rate": 8.486316726558522e-06, "loss": 0.78, "step": 2929 }, { "epoch": 0.5621237919374565, "grad_norm": 1.0521623494207606, "learning_rate": 8.480173807739312e-06, "loss": 0.8014, "step": 2930 }, { "epoch": 0.5623156430609847, "grad_norm": 0.9398488045175017, "learning_rate": 8.474031475937995e-06, "loss": 0.7276, "step": 2931 }, { "epoch": 0.5625074941845128, "grad_norm": 1.0238982086767705, "learning_rate": 8.467889733526977e-06, "loss": 0.7517, "step": 2932 }, { "epoch": 0.562699345308041, "grad_norm": 0.8926195062464386, "learning_rate": 8.461748582878454e-06, "loss": 0.8039, "step": 2933 }, { "epoch": 0.5628911964315692, "grad_norm": 1.0782468615765597, "learning_rate": 8.455608026364376e-06, "loss": 0.8459, "step": 2934 }, { "epoch": 0.5630830475550972, "grad_norm": 0.8829912696027562, "learning_rate": 8.449468066356483e-06, "loss": 0.8391, "step": 2935 }, { "epoch": 0.5632748986786253, "grad_norm": 0.9372746898675145, "learning_rate": 8.443328705226268e-06, "loss": 0.7624, "step": 2936 }, { "epoch": 0.5634667498021535, "grad_norm": 1.482715379566216, "learning_rate": 8.437189945345003e-06, "loss": 0.8365, "step": 2937 }, { "epoch": 0.5636586009256817, "grad_norm": 0.9251725193039372, "learning_rate": 8.431051789083712e-06, "loss": 0.7192, "step": 2938 }, { "epoch": 0.5638504520492098, "grad_norm": 0.8987015978762027, "learning_rate": 8.424914238813212e-06, "loss": 0.7397, "step": 2939 }, { "epoch": 0.564042303172738, "grad_norm": 0.93993825603621, "learning_rate": 8.418777296904064e-06, "loss": 0.8163, "step": 2940 }, { "epoch": 0.5642341542962661, "grad_norm": 0.8771242920114376, "learning_rate": 8.412640965726603e-06, "loss": 0.7547, "step": 2941 }, { "epoch": 0.5644260054197943, "grad_norm": 0.9731914809841313, "learning_rate": 8.406505247650926e-06, "loss": 0.8261, "step": 2942 }, { "epoch": 0.5646178565433224, "grad_norm": 0.9414864997239081, "learning_rate": 8.40037014504689e-06, "loss": 0.7894, "step": 2943 }, { "epoch": 0.5648097076668506, "grad_norm": 0.9472353951554812, "learning_rate": 8.394235660284123e-06, "loss": 0.7963, "step": 2944 }, { "epoch": 0.5650015587903786, "grad_norm": 0.846112947411606, "learning_rate": 8.388101795732008e-06, "loss": 0.8184, "step": 2945 }, { "epoch": 0.5651934099139068, "grad_norm": 0.8108174479831086, "learning_rate": 8.38196855375969e-06, "loss": 0.8205, "step": 2946 }, { "epoch": 0.5653852610374349, "grad_norm": 1.1415618587062106, "learning_rate": 8.375835936736072e-06, "loss": 0.7552, "step": 2947 }, { "epoch": 0.5655771121609631, "grad_norm": 0.929027672492886, "learning_rate": 8.369703947029814e-06, "loss": 0.7678, "step": 2948 }, { "epoch": 0.5657689632844912, "grad_norm": 0.9203497344457163, "learning_rate": 8.363572587009342e-06, "loss": 0.7361, "step": 2949 }, { "epoch": 0.5659608144080194, "grad_norm": 1.1019296093827868, "learning_rate": 8.357441859042837e-06, "loss": 0.8081, "step": 2950 }, { "epoch": 0.5661526655315475, "grad_norm": 0.9265109365631473, "learning_rate": 8.351311765498222e-06, "loss": 0.7585, "step": 2951 }, { "epoch": 0.5663445166550757, "grad_norm": 0.8322890827871442, "learning_rate": 8.345182308743191e-06, "loss": 0.8292, "step": 2952 }, { "epoch": 0.5665363677786038, "grad_norm": 0.6025469635372046, "learning_rate": 8.339053491145181e-06, "loss": 0.3223, "step": 2953 }, { "epoch": 0.566728218902132, "grad_norm": 0.9721229590100139, "learning_rate": 8.332925315071394e-06, "loss": 0.7908, "step": 2954 }, { "epoch": 0.5669200700256601, "grad_norm": 0.8560255024544486, "learning_rate": 8.326797782888777e-06, "loss": 0.7905, "step": 2955 }, { "epoch": 0.5671119211491882, "grad_norm": 0.9992535514729405, "learning_rate": 8.320670896964028e-06, "loss": 0.837, "step": 2956 }, { "epoch": 0.5673037722727163, "grad_norm": 1.0619376183277927, "learning_rate": 8.314544659663593e-06, "loss": 0.7737, "step": 2957 }, { "epoch": 0.5674956233962445, "grad_norm": 0.8403125947534451, "learning_rate": 8.308419073353672e-06, "loss": 0.7886, "step": 2958 }, { "epoch": 0.5676874745197726, "grad_norm": 0.8242754030385275, "learning_rate": 8.302294140400216e-06, "loss": 0.7478, "step": 2959 }, { "epoch": 0.5678793256433008, "grad_norm": 0.9004710504274498, "learning_rate": 8.296169863168917e-06, "loss": 0.7896, "step": 2960 }, { "epoch": 0.568071176766829, "grad_norm": 0.8414978296555234, "learning_rate": 8.290046244025218e-06, "loss": 0.751, "step": 2961 }, { "epoch": 0.5682630278903571, "grad_norm": 0.8984412693138433, "learning_rate": 8.283923285334304e-06, "loss": 0.7616, "step": 2962 }, { "epoch": 0.5684548790138853, "grad_norm": 0.9616233968597606, "learning_rate": 8.277800989461106e-06, "loss": 0.816, "step": 2963 }, { "epoch": 0.5686467301374134, "grad_norm": 0.9704420711068914, "learning_rate": 8.27167935877031e-06, "loss": 0.7445, "step": 2964 }, { "epoch": 0.5688385812609416, "grad_norm": 1.0489291299119912, "learning_rate": 8.26555839562633e-06, "loss": 0.7274, "step": 2965 }, { "epoch": 0.5690304323844696, "grad_norm": 0.8673939914927322, "learning_rate": 8.259438102393322e-06, "loss": 0.7867, "step": 2966 }, { "epoch": 0.5692222835079978, "grad_norm": 0.8952916128661716, "learning_rate": 8.253318481435198e-06, "loss": 0.7747, "step": 2967 }, { "epoch": 0.5694141346315259, "grad_norm": 0.9247876772728624, "learning_rate": 8.24719953511559e-06, "loss": 0.7718, "step": 2968 }, { "epoch": 0.5696059857550541, "grad_norm": 1.0888786970167998, "learning_rate": 8.241081265797893e-06, "loss": 0.7539, "step": 2969 }, { "epoch": 0.5697978368785822, "grad_norm": 0.8234452244101961, "learning_rate": 8.234963675845224e-06, "loss": 0.8226, "step": 2970 }, { "epoch": 0.5699896880021104, "grad_norm": 0.8706277950610906, "learning_rate": 8.22884676762044e-06, "loss": 0.7633, "step": 2971 }, { "epoch": 0.5701815391256385, "grad_norm": 1.042046281339909, "learning_rate": 8.222730543486138e-06, "loss": 0.8448, "step": 2972 }, { "epoch": 0.5703733902491667, "grad_norm": 0.9514067682632046, "learning_rate": 8.216615005804647e-06, "loss": 0.7595, "step": 2973 }, { "epoch": 0.5705652413726948, "grad_norm": 1.058939387166013, "learning_rate": 8.210500156938036e-06, "loss": 0.802, "step": 2974 }, { "epoch": 0.570757092496223, "grad_norm": 1.0004034717503063, "learning_rate": 8.204385999248105e-06, "loss": 0.7615, "step": 2975 }, { "epoch": 0.5709489436197511, "grad_norm": 0.8947684100553345, "learning_rate": 8.198272535096386e-06, "loss": 0.7974, "step": 2976 }, { "epoch": 0.5711407947432792, "grad_norm": 1.1881439739632063, "learning_rate": 8.19215976684414e-06, "loss": 0.7133, "step": 2977 }, { "epoch": 0.5713326458668073, "grad_norm": 0.9619555807814664, "learning_rate": 8.186047696852372e-06, "loss": 0.8225, "step": 2978 }, { "epoch": 0.5715244969903355, "grad_norm": 0.9458939792883696, "learning_rate": 8.179936327481805e-06, "loss": 0.7925, "step": 2979 }, { "epoch": 0.5717163481138636, "grad_norm": 0.8322932247731413, "learning_rate": 8.173825661092896e-06, "loss": 0.8296, "step": 2980 }, { "epoch": 0.5719081992373918, "grad_norm": 1.0198371645548356, "learning_rate": 8.167715700045827e-06, "loss": 0.7849, "step": 2981 }, { "epoch": 0.5721000503609199, "grad_norm": 0.8498994173154865, "learning_rate": 8.161606446700507e-06, "loss": 0.7605, "step": 2982 }, { "epoch": 0.5722919014844481, "grad_norm": 0.9086316123358482, "learning_rate": 8.155497903416585e-06, "loss": 0.7577, "step": 2983 }, { "epoch": 0.5724837526079762, "grad_norm": 0.8649686359865526, "learning_rate": 8.14939007255342e-06, "loss": 0.7569, "step": 2984 }, { "epoch": 0.5726756037315044, "grad_norm": 0.9539990862596147, "learning_rate": 8.143282956470102e-06, "loss": 0.7989, "step": 2985 }, { "epoch": 0.5728674548550325, "grad_norm": 0.8830518990029244, "learning_rate": 8.137176557525446e-06, "loss": 0.7828, "step": 2986 }, { "epoch": 0.5730593059785606, "grad_norm": 0.8067311118439666, "learning_rate": 8.131070878077982e-06, "loss": 0.7496, "step": 2987 }, { "epoch": 0.5732511571020887, "grad_norm": 1.0148652528273916, "learning_rate": 8.124965920485976e-06, "loss": 0.8397, "step": 2988 }, { "epoch": 0.5734430082256169, "grad_norm": 0.8751032086398067, "learning_rate": 8.118861687107405e-06, "loss": 0.7556, "step": 2989 }, { "epoch": 0.573634859349145, "grad_norm": 0.6902030812995175, "learning_rate": 8.11275818029997e-06, "loss": 0.3177, "step": 2990 }, { "epoch": 0.5738267104726732, "grad_norm": 0.9633137647272534, "learning_rate": 8.10665540242109e-06, "loss": 0.8032, "step": 2991 }, { "epoch": 0.5740185615962013, "grad_norm": 0.8899108052027019, "learning_rate": 8.100553355827897e-06, "loss": 0.6963, "step": 2992 }, { "epoch": 0.5742104127197295, "grad_norm": 0.9008732586322282, "learning_rate": 8.094452042877254e-06, "loss": 0.753, "step": 2993 }, { "epoch": 0.5744022638432577, "grad_norm": 0.9729060430564221, "learning_rate": 8.088351465925732e-06, "loss": 0.8527, "step": 2994 }, { "epoch": 0.5745941149667858, "grad_norm": 0.8272437576559137, "learning_rate": 8.082251627329617e-06, "loss": 0.7135, "step": 2995 }, { "epoch": 0.574785966090314, "grad_norm": 0.941166934396968, "learning_rate": 8.07615252944491e-06, "loss": 0.8054, "step": 2996 }, { "epoch": 0.5749778172138421, "grad_norm": 0.8217452567444686, "learning_rate": 8.070054174627326e-06, "loss": 0.7534, "step": 2997 }, { "epoch": 0.5751696683373702, "grad_norm": 0.8164405059658671, "learning_rate": 8.063956565232303e-06, "loss": 0.7648, "step": 2998 }, { "epoch": 0.5753615194608983, "grad_norm": 0.8672843133119181, "learning_rate": 8.057859703614976e-06, "loss": 0.818, "step": 2999 }, { "epoch": 0.5755533705844265, "grad_norm": 0.9397035600777677, "learning_rate": 8.0517635921302e-06, "loss": 0.7461, "step": 3000 }, { "epoch": 0.5757452217079546, "grad_norm": 0.8589021894169262, "learning_rate": 8.045668233132538e-06, "loss": 0.7702, "step": 3001 }, { "epoch": 0.5759370728314828, "grad_norm": 0.8345248309936567, "learning_rate": 8.039573628976254e-06, "loss": 0.7705, "step": 3002 }, { "epoch": 0.5761289239550109, "grad_norm": 0.9264399842005846, "learning_rate": 8.033479782015341e-06, "loss": 0.8024, "step": 3003 }, { "epoch": 0.5763207750785391, "grad_norm": 0.8874262489385941, "learning_rate": 8.027386694603482e-06, "loss": 0.7814, "step": 3004 }, { "epoch": 0.5765126262020672, "grad_norm": 0.9299730733228742, "learning_rate": 8.021294369094072e-06, "loss": 0.7697, "step": 3005 }, { "epoch": 0.5767044773255954, "grad_norm": 0.8706510546288548, "learning_rate": 8.015202807840214e-06, "loss": 0.7207, "step": 3006 }, { "epoch": 0.5768963284491235, "grad_norm": 0.8991934694372651, "learning_rate": 8.009112013194707e-06, "loss": 0.7598, "step": 3007 }, { "epoch": 0.5770881795726516, "grad_norm": 0.9391196781301323, "learning_rate": 8.003021987510069e-06, "loss": 0.7919, "step": 3008 }, { "epoch": 0.5772800306961797, "grad_norm": 0.8632875851809921, "learning_rate": 7.996932733138506e-06, "loss": 0.7647, "step": 3009 }, { "epoch": 0.5774718818197079, "grad_norm": 1.007190521536914, "learning_rate": 7.990844252431936e-06, "loss": 0.9119, "step": 3010 }, { "epoch": 0.577663732943236, "grad_norm": 1.0779015062688038, "learning_rate": 7.984756547741973e-06, "loss": 0.8238, "step": 3011 }, { "epoch": 0.5778555840667642, "grad_norm": 0.9483994514436493, "learning_rate": 7.97866962141993e-06, "loss": 0.8649, "step": 3012 }, { "epoch": 0.5780474351902923, "grad_norm": 0.9854949201915273, "learning_rate": 7.972583475816829e-06, "loss": 0.7537, "step": 3013 }, { "epoch": 0.5782392863138205, "grad_norm": 0.8911949205891055, "learning_rate": 7.966498113283381e-06, "loss": 0.7721, "step": 3014 }, { "epoch": 0.5784311374373486, "grad_norm": 0.8202364958559896, "learning_rate": 7.960413536169999e-06, "loss": 0.7724, "step": 3015 }, { "epoch": 0.5786229885608768, "grad_norm": 1.012835982081383, "learning_rate": 7.954329746826782e-06, "loss": 0.8275, "step": 3016 }, { "epoch": 0.578814839684405, "grad_norm": 0.9144401575355111, "learning_rate": 7.948246747603545e-06, "loss": 0.7547, "step": 3017 }, { "epoch": 0.579006690807933, "grad_norm": 1.0626402158969492, "learning_rate": 7.942164540849781e-06, "loss": 0.8047, "step": 3018 }, { "epoch": 0.5791985419314611, "grad_norm": 0.8367599029335975, "learning_rate": 7.936083128914685e-06, "loss": 0.7135, "step": 3019 }, { "epoch": 0.5793903930549893, "grad_norm": 1.1444353348825342, "learning_rate": 7.93000251414714e-06, "loss": 0.7772, "step": 3020 }, { "epoch": 0.5795822441785174, "grad_norm": 0.9620103790583504, "learning_rate": 7.92392269889572e-06, "loss": 0.7924, "step": 3021 }, { "epoch": 0.5797740953020456, "grad_norm": 1.0098975737426432, "learning_rate": 7.917843685508702e-06, "loss": 0.7379, "step": 3022 }, { "epoch": 0.5799659464255738, "grad_norm": 0.9836826709789264, "learning_rate": 7.911765476334042e-06, "loss": 0.7344, "step": 3023 }, { "epoch": 0.5801577975491019, "grad_norm": 0.9642507567796399, "learning_rate": 7.905688073719387e-06, "loss": 0.767, "step": 3024 }, { "epoch": 0.5803496486726301, "grad_norm": 0.8165894411299709, "learning_rate": 7.899611480012073e-06, "loss": 0.7797, "step": 3025 }, { "epoch": 0.5805414997961582, "grad_norm": 0.9972162216123259, "learning_rate": 7.893535697559122e-06, "loss": 0.7821, "step": 3026 }, { "epoch": 0.5807333509196864, "grad_norm": 0.8981511570641907, "learning_rate": 7.887460728707253e-06, "loss": 0.7391, "step": 3027 }, { "epoch": 0.5809252020432145, "grad_norm": 0.9093763498774431, "learning_rate": 7.88138657580286e-06, "loss": 0.7918, "step": 3028 }, { "epoch": 0.5811170531667426, "grad_norm": 0.9329916695365638, "learning_rate": 7.875313241192023e-06, "loss": 0.7433, "step": 3029 }, { "epoch": 0.5813089042902707, "grad_norm": 0.835907807995617, "learning_rate": 7.869240727220506e-06, "loss": 0.8255, "step": 3030 }, { "epoch": 0.5815007554137989, "grad_norm": 0.8951426792501571, "learning_rate": 7.863169036233757e-06, "loss": 0.7751, "step": 3031 }, { "epoch": 0.581692606537327, "grad_norm": 0.9628916041570018, "learning_rate": 7.857098170576914e-06, "loss": 0.7788, "step": 3032 }, { "epoch": 0.5818844576608552, "grad_norm": 0.890386937890739, "learning_rate": 7.851028132594783e-06, "loss": 0.8175, "step": 3033 }, { "epoch": 0.5820763087843833, "grad_norm": 0.8821608613488175, "learning_rate": 7.844958924631859e-06, "loss": 0.8259, "step": 3034 }, { "epoch": 0.5822681599079115, "grad_norm": 1.0168102064908562, "learning_rate": 7.838890549032313e-06, "loss": 0.7158, "step": 3035 }, { "epoch": 0.5824600110314396, "grad_norm": 1.139363092734792, "learning_rate": 7.832823008139992e-06, "loss": 0.8014, "step": 3036 }, { "epoch": 0.5826518621549678, "grad_norm": 0.9751575613070085, "learning_rate": 7.826756304298428e-06, "loss": 0.8325, "step": 3037 }, { "epoch": 0.5828437132784959, "grad_norm": 0.9054521544516887, "learning_rate": 7.820690439850824e-06, "loss": 0.7216, "step": 3038 }, { "epoch": 0.583035564402024, "grad_norm": 0.8361460588586144, "learning_rate": 7.814625417140062e-06, "loss": 0.7992, "step": 3039 }, { "epoch": 0.5832274155255521, "grad_norm": 0.937456239206878, "learning_rate": 7.808561238508696e-06, "loss": 0.8636, "step": 3040 }, { "epoch": 0.5834192666490803, "grad_norm": 0.989220562369272, "learning_rate": 7.802497906298954e-06, "loss": 0.7735, "step": 3041 }, { "epoch": 0.5836111177726084, "grad_norm": 1.0456130684506344, "learning_rate": 7.79643542285274e-06, "loss": 0.7901, "step": 3042 }, { "epoch": 0.5838029688961366, "grad_norm": 0.8767104599643625, "learning_rate": 7.790373790511628e-06, "loss": 0.7942, "step": 3043 }, { "epoch": 0.5839948200196647, "grad_norm": 1.1101703639585667, "learning_rate": 7.784313011616865e-06, "loss": 0.7568, "step": 3044 }, { "epoch": 0.5841866711431929, "grad_norm": 1.0777376123923572, "learning_rate": 7.778253088509366e-06, "loss": 0.7779, "step": 3045 }, { "epoch": 0.584378522266721, "grad_norm": 0.8820818035698555, "learning_rate": 7.772194023529711e-06, "loss": 0.8177, "step": 3046 }, { "epoch": 0.5845703733902492, "grad_norm": 0.9138786718876188, "learning_rate": 7.766135819018163e-06, "loss": 0.7696, "step": 3047 }, { "epoch": 0.5847622245137774, "grad_norm": 0.8665189103709204, "learning_rate": 7.760078477314642e-06, "loss": 0.726, "step": 3048 }, { "epoch": 0.5849540756373055, "grad_norm": 0.8660988396110288, "learning_rate": 7.754022000758734e-06, "loss": 0.7585, "step": 3049 }, { "epoch": 0.5851459267608335, "grad_norm": 0.8836334803844351, "learning_rate": 7.747966391689695e-06, "loss": 0.7741, "step": 3050 }, { "epoch": 0.5853377778843617, "grad_norm": 0.8016071671175735, "learning_rate": 7.741911652446441e-06, "loss": 0.8169, "step": 3051 }, { "epoch": 0.5855296290078899, "grad_norm": 0.6986244543445917, "learning_rate": 7.73585778536756e-06, "loss": 0.3009, "step": 3052 }, { "epoch": 0.585721480131418, "grad_norm": 0.8299821511859315, "learning_rate": 7.729804792791298e-06, "loss": 0.6936, "step": 3053 }, { "epoch": 0.5859133312549462, "grad_norm": 1.0055809553975499, "learning_rate": 7.72375267705556e-06, "loss": 0.6997, "step": 3054 }, { "epoch": 0.5861051823784743, "grad_norm": 1.021867586813852, "learning_rate": 7.717701440497917e-06, "loss": 0.8387, "step": 3055 }, { "epoch": 0.5862970335020025, "grad_norm": 0.9843900008071895, "learning_rate": 7.711651085455604e-06, "loss": 0.7847, "step": 3056 }, { "epoch": 0.5864888846255306, "grad_norm": 0.8568144559761239, "learning_rate": 7.705601614265507e-06, "loss": 0.8129, "step": 3057 }, { "epoch": 0.5866807357490588, "grad_norm": 0.9366739163759615, "learning_rate": 7.699553029264175e-06, "loss": 0.7619, "step": 3058 }, { "epoch": 0.5868725868725869, "grad_norm": 0.6873802216718777, "learning_rate": 7.693505332787814e-06, "loss": 0.3011, "step": 3059 }, { "epoch": 0.587064437996115, "grad_norm": 0.9266923894903778, "learning_rate": 7.687458527172285e-06, "loss": 0.7969, "step": 3060 }, { "epoch": 0.5872562891196431, "grad_norm": 0.8252630917625818, "learning_rate": 7.681412614753115e-06, "loss": 0.7718, "step": 3061 }, { "epoch": 0.5874481402431713, "grad_norm": 1.0090422939605115, "learning_rate": 7.67536759786547e-06, "loss": 0.6915, "step": 3062 }, { "epoch": 0.5876399913666994, "grad_norm": 1.010186290264374, "learning_rate": 7.669323478844184e-06, "loss": 0.7128, "step": 3063 }, { "epoch": 0.5878318424902276, "grad_norm": 1.019605511245048, "learning_rate": 7.663280260023735e-06, "loss": 0.8183, "step": 3064 }, { "epoch": 0.5880236936137557, "grad_norm": 0.8983467386934939, "learning_rate": 7.657237943738255e-06, "loss": 0.7859, "step": 3065 }, { "epoch": 0.5882155447372839, "grad_norm": 1.0361911640502406, "learning_rate": 7.651196532321535e-06, "loss": 0.7958, "step": 3066 }, { "epoch": 0.588407395860812, "grad_norm": 0.8687712340886441, "learning_rate": 7.645156028107005e-06, "loss": 0.7894, "step": 3067 }, { "epoch": 0.5885992469843402, "grad_norm": 0.9004521746647272, "learning_rate": 7.639116433427753e-06, "loss": 0.7543, "step": 3068 }, { "epoch": 0.5887910981078683, "grad_norm": 0.8920247363676627, "learning_rate": 7.633077750616512e-06, "loss": 0.8144, "step": 3069 }, { "epoch": 0.5889829492313965, "grad_norm": 0.9137258835377088, "learning_rate": 7.627039982005663e-06, "loss": 0.7629, "step": 3070 }, { "epoch": 0.5891748003549245, "grad_norm": 1.0294757134463839, "learning_rate": 7.621003129927237e-06, "loss": 0.8301, "step": 3071 }, { "epoch": 0.5893666514784527, "grad_norm": 0.86620506573031, "learning_rate": 7.614967196712908e-06, "loss": 0.7486, "step": 3072 }, { "epoch": 0.5895585026019808, "grad_norm": 1.0749632522749408, "learning_rate": 7.6089321846939935e-06, "loss": 0.7301, "step": 3073 }, { "epoch": 0.589750353725509, "grad_norm": 0.8340474440328237, "learning_rate": 7.6028980962014595e-06, "loss": 0.8213, "step": 3074 }, { "epoch": 0.5899422048490371, "grad_norm": 0.8999794223349568, "learning_rate": 7.596864933565909e-06, "loss": 0.7934, "step": 3075 }, { "epoch": 0.5901340559725653, "grad_norm": 0.8966136740456354, "learning_rate": 7.5908326991176e-06, "loss": 0.7399, "step": 3076 }, { "epoch": 0.5903259070960934, "grad_norm": 0.9772681331001039, "learning_rate": 7.584801395186417e-06, "loss": 0.85, "step": 3077 }, { "epoch": 0.5905177582196216, "grad_norm": 0.8440734702885578, "learning_rate": 7.578771024101896e-06, "loss": 0.7673, "step": 3078 }, { "epoch": 0.5907096093431498, "grad_norm": 0.8400896014242799, "learning_rate": 7.572741588193206e-06, "loss": 0.779, "step": 3079 }, { "epoch": 0.5909014604666779, "grad_norm": 1.0306494688532462, "learning_rate": 7.56671308978915e-06, "loss": 0.6859, "step": 3080 }, { "epoch": 0.591093311590206, "grad_norm": 0.8349850636758761, "learning_rate": 7.56068553121819e-06, "loss": 0.8022, "step": 3081 }, { "epoch": 0.5912851627137341, "grad_norm": 0.8963690817332143, "learning_rate": 7.554658914808404e-06, "loss": 0.7659, "step": 3082 }, { "epoch": 0.5914770138372623, "grad_norm": 1.0037474942967957, "learning_rate": 7.548633242887512e-06, "loss": 0.7947, "step": 3083 }, { "epoch": 0.5916688649607904, "grad_norm": 1.0526294627370818, "learning_rate": 7.542608517782873e-06, "loss": 0.8431, "step": 3084 }, { "epoch": 0.5918607160843186, "grad_norm": 0.9250763223908397, "learning_rate": 7.536584741821473e-06, "loss": 0.7839, "step": 3085 }, { "epoch": 0.5920525672078467, "grad_norm": 0.9941874283258822, "learning_rate": 7.530561917329944e-06, "loss": 0.7831, "step": 3086 }, { "epoch": 0.5922444183313749, "grad_norm": 1.029407134408433, "learning_rate": 7.5245400466345364e-06, "loss": 0.788, "step": 3087 }, { "epoch": 0.592436269454903, "grad_norm": 0.9956819027340472, "learning_rate": 7.518519132061142e-06, "loss": 0.8233, "step": 3088 }, { "epoch": 0.5926281205784312, "grad_norm": 1.08012407179835, "learning_rate": 7.512499175935277e-06, "loss": 0.8157, "step": 3089 }, { "epoch": 0.5928199717019593, "grad_norm": 0.8968542192432832, "learning_rate": 7.5064801805820875e-06, "loss": 0.7338, "step": 3090 }, { "epoch": 0.5930118228254874, "grad_norm": 0.8000345365809143, "learning_rate": 7.500462148326358e-06, "loss": 0.7479, "step": 3091 }, { "epoch": 0.5932036739490155, "grad_norm": 0.9974713384578598, "learning_rate": 7.494445081492494e-06, "loss": 0.7927, "step": 3092 }, { "epoch": 0.5933955250725437, "grad_norm": 0.8210574225840291, "learning_rate": 7.488428982404524e-06, "loss": 0.7615, "step": 3093 }, { "epoch": 0.5935873761960718, "grad_norm": 1.035601542798168, "learning_rate": 7.482413853386104e-06, "loss": 0.829, "step": 3094 }, { "epoch": 0.5937792273196, "grad_norm": 0.971115850169227, "learning_rate": 7.476399696760525e-06, "loss": 0.739, "step": 3095 }, { "epoch": 0.5939710784431281, "grad_norm": 0.8748326821486969, "learning_rate": 7.4703865148506935e-06, "loss": 0.8137, "step": 3096 }, { "epoch": 0.5941629295666563, "grad_norm": 0.9338994385159481, "learning_rate": 7.464374309979143e-06, "loss": 0.7486, "step": 3097 }, { "epoch": 0.5943547806901844, "grad_norm": 0.943728042067691, "learning_rate": 7.4583630844680255e-06, "loss": 0.7849, "step": 3098 }, { "epoch": 0.5945466318137126, "grad_norm": 0.8372915371993153, "learning_rate": 7.452352840639118e-06, "loss": 0.7823, "step": 3099 }, { "epoch": 0.5947384829372407, "grad_norm": 1.2480521057429645, "learning_rate": 7.446343580813819e-06, "loss": 0.7915, "step": 3100 }, { "epoch": 0.5949303340607689, "grad_norm": 0.950327860935551, "learning_rate": 7.440335307313149e-06, "loss": 0.7956, "step": 3101 }, { "epoch": 0.5951221851842969, "grad_norm": 0.8739176437268675, "learning_rate": 7.4343280224577395e-06, "loss": 0.8716, "step": 3102 }, { "epoch": 0.5953140363078251, "grad_norm": 0.8752044987973704, "learning_rate": 7.428321728567846e-06, "loss": 0.878, "step": 3103 }, { "epoch": 0.5955058874313532, "grad_norm": 0.976636633576474, "learning_rate": 7.422316427963339e-06, "loss": 0.7257, "step": 3104 }, { "epoch": 0.5956977385548814, "grad_norm": 0.9239452931628158, "learning_rate": 7.416312122963711e-06, "loss": 0.793, "step": 3105 }, { "epoch": 0.5958895896784095, "grad_norm": 0.8619234746399663, "learning_rate": 7.410308815888068e-06, "loss": 0.7711, "step": 3106 }, { "epoch": 0.5960814408019377, "grad_norm": 0.8703015613888299, "learning_rate": 7.404306509055119e-06, "loss": 0.712, "step": 3107 }, { "epoch": 0.5962732919254659, "grad_norm": 0.897278323422213, "learning_rate": 7.398305204783203e-06, "loss": 0.8302, "step": 3108 }, { "epoch": 0.596465143048994, "grad_norm": 0.6930896257225101, "learning_rate": 7.392304905390256e-06, "loss": 0.3533, "step": 3109 }, { "epoch": 0.5966569941725222, "grad_norm": 0.9877130628599933, "learning_rate": 7.386305613193846e-06, "loss": 0.807, "step": 3110 }, { "epoch": 0.5968488452960503, "grad_norm": 0.9911147005091527, "learning_rate": 7.380307330511134e-06, "loss": 0.8217, "step": 3111 }, { "epoch": 0.5970406964195784, "grad_norm": 0.9620896829880197, "learning_rate": 7.3743100596589e-06, "loss": 0.7715, "step": 3112 }, { "epoch": 0.5972325475431065, "grad_norm": 1.0153652115013025, "learning_rate": 7.368313802953528e-06, "loss": 0.7353, "step": 3113 }, { "epoch": 0.5974243986666347, "grad_norm": 0.8844271751819744, "learning_rate": 7.362318562711015e-06, "loss": 0.7598, "step": 3114 }, { "epoch": 0.5976162497901628, "grad_norm": 1.0366392116220682, "learning_rate": 7.356324341246963e-06, "loss": 0.7694, "step": 3115 }, { "epoch": 0.597808100913691, "grad_norm": 0.8534070847742359, "learning_rate": 7.350331140876582e-06, "loss": 0.7934, "step": 3116 }, { "epoch": 0.5979999520372191, "grad_norm": 1.0579160000787902, "learning_rate": 7.344338963914686e-06, "loss": 0.7957, "step": 3117 }, { "epoch": 0.5981918031607473, "grad_norm": 0.9022483061800479, "learning_rate": 7.3383478126756925e-06, "loss": 0.8132, "step": 3118 }, { "epoch": 0.5983836542842754, "grad_norm": 0.9018317450322239, "learning_rate": 7.332357689473624e-06, "loss": 0.8168, "step": 3119 }, { "epoch": 0.5985755054078036, "grad_norm": 0.888697445497931, "learning_rate": 7.326368596622111e-06, "loss": 0.7838, "step": 3120 }, { "epoch": 0.5987673565313317, "grad_norm": 0.9992119780648965, "learning_rate": 7.32038053643438e-06, "loss": 0.7554, "step": 3121 }, { "epoch": 0.5989592076548599, "grad_norm": 0.9542016808974907, "learning_rate": 7.314393511223259e-06, "loss": 0.7833, "step": 3122 }, { "epoch": 0.5991510587783879, "grad_norm": 0.9031860085516614, "learning_rate": 7.3084075233011774e-06, "loss": 0.7749, "step": 3123 }, { "epoch": 0.5993429099019161, "grad_norm": 0.9081854162427645, "learning_rate": 7.30242257498016e-06, "loss": 0.8003, "step": 3124 }, { "epoch": 0.5995347610254442, "grad_norm": 1.0545866845760226, "learning_rate": 7.296438668571843e-06, "loss": 0.8745, "step": 3125 }, { "epoch": 0.5997266121489724, "grad_norm": 0.7622971147696606, "learning_rate": 7.290455806387447e-06, "loss": 0.7915, "step": 3126 }, { "epoch": 0.5999184632725005, "grad_norm": 0.9301603917839388, "learning_rate": 7.284473990737795e-06, "loss": 0.6883, "step": 3127 }, { "epoch": 0.6001103143960287, "grad_norm": 0.9723820422781234, "learning_rate": 7.2784932239333e-06, "loss": 0.7437, "step": 3128 }, { "epoch": 0.6003021655195568, "grad_norm": 0.8487895963046447, "learning_rate": 7.2725135082839785e-06, "loss": 0.7791, "step": 3129 }, { "epoch": 0.600494016643085, "grad_norm": 1.1534562499947656, "learning_rate": 7.266534846099434e-06, "loss": 0.6827, "step": 3130 }, { "epoch": 0.6006858677666131, "grad_norm": 0.9715163935434901, "learning_rate": 7.260557239688868e-06, "loss": 0.7663, "step": 3131 }, { "epoch": 0.6008777188901413, "grad_norm": 0.9981373707654666, "learning_rate": 7.254580691361073e-06, "loss": 0.7783, "step": 3132 }, { "epoch": 0.6010695700136693, "grad_norm": 0.967995557718147, "learning_rate": 7.248605203424426e-06, "loss": 0.8172, "step": 3133 }, { "epoch": 0.6012614211371975, "grad_norm": 0.9420217760100301, "learning_rate": 7.2426307781869075e-06, "loss": 0.8014, "step": 3134 }, { "epoch": 0.6014532722607256, "grad_norm": 0.8463946878148825, "learning_rate": 7.236657417956076e-06, "loss": 0.8049, "step": 3135 }, { "epoch": 0.6016451233842538, "grad_norm": 1.075729294362773, "learning_rate": 7.230685125039085e-06, "loss": 0.7183, "step": 3136 }, { "epoch": 0.601836974507782, "grad_norm": 0.837506256828689, "learning_rate": 7.224713901742675e-06, "loss": 0.6965, "step": 3137 }, { "epoch": 0.6020288256313101, "grad_norm": 1.0715937115528147, "learning_rate": 7.218743750373164e-06, "loss": 0.8194, "step": 3138 }, { "epoch": 0.6022206767548383, "grad_norm": 0.9979954620208676, "learning_rate": 7.212774673236474e-06, "loss": 0.7783, "step": 3139 }, { "epoch": 0.6024125278783664, "grad_norm": 1.127174613281061, "learning_rate": 7.2068066726381e-06, "loss": 0.7237, "step": 3140 }, { "epoch": 0.6026043790018946, "grad_norm": 0.9282195116077885, "learning_rate": 7.200839750883122e-06, "loss": 0.7567, "step": 3141 }, { "epoch": 0.6027962301254227, "grad_norm": 0.6977713516439529, "learning_rate": 7.194873910276205e-06, "loss": 0.3195, "step": 3142 }, { "epoch": 0.6029880812489509, "grad_norm": 0.8765893155070623, "learning_rate": 7.188909153121593e-06, "loss": 0.7985, "step": 3143 }, { "epoch": 0.6031799323724789, "grad_norm": 0.8753415260148941, "learning_rate": 7.182945481723119e-06, "loss": 0.7636, "step": 3144 }, { "epoch": 0.6033717834960071, "grad_norm": 0.90883744699569, "learning_rate": 7.176982898384193e-06, "loss": 0.7652, "step": 3145 }, { "epoch": 0.6035636346195352, "grad_norm": 0.9519533640209369, "learning_rate": 7.1710214054078e-06, "loss": 0.763, "step": 3146 }, { "epoch": 0.6037554857430634, "grad_norm": 0.975515763329654, "learning_rate": 7.165061005096509e-06, "loss": 0.6973, "step": 3147 }, { "epoch": 0.6039473368665915, "grad_norm": 0.9369593917695858, "learning_rate": 7.159101699752466e-06, "loss": 0.7838, "step": 3148 }, { "epoch": 0.6041391879901197, "grad_norm": 0.6548180428974554, "learning_rate": 7.153143491677398e-06, "loss": 0.3285, "step": 3149 }, { "epoch": 0.6043310391136478, "grad_norm": 0.8808353977201406, "learning_rate": 7.147186383172599e-06, "loss": 0.748, "step": 3150 }, { "epoch": 0.604522890237176, "grad_norm": 0.6793462679966018, "learning_rate": 7.1412303765389455e-06, "loss": 0.3306, "step": 3151 }, { "epoch": 0.6047147413607041, "grad_norm": 0.8758785811410463, "learning_rate": 7.135275474076887e-06, "loss": 0.7589, "step": 3152 }, { "epoch": 0.6049065924842323, "grad_norm": 0.8537838448523389, "learning_rate": 7.12932167808644e-06, "loss": 0.7534, "step": 3153 }, { "epoch": 0.6050984436077603, "grad_norm": 1.1194498268619462, "learning_rate": 7.12336899086721e-06, "loss": 0.746, "step": 3154 }, { "epoch": 0.6052902947312885, "grad_norm": 0.8721564210783417, "learning_rate": 7.117417414718357e-06, "loss": 0.7753, "step": 3155 }, { "epoch": 0.6054821458548166, "grad_norm": 0.6288496377163598, "learning_rate": 7.111466951938622e-06, "loss": 0.3117, "step": 3156 }, { "epoch": 0.6056739969783448, "grad_norm": 0.8899163684080424, "learning_rate": 7.1055176048263085e-06, "loss": 0.7244, "step": 3157 }, { "epoch": 0.6058658481018729, "grad_norm": 0.9747690165130573, "learning_rate": 7.099569375679292e-06, "loss": 0.7728, "step": 3158 }, { "epoch": 0.6060576992254011, "grad_norm": 0.9024847580197395, "learning_rate": 7.093622266795025e-06, "loss": 0.7922, "step": 3159 }, { "epoch": 0.6062495503489292, "grad_norm": 0.8663185033352283, "learning_rate": 7.087676280470514e-06, "loss": 0.835, "step": 3160 }, { "epoch": 0.6064414014724574, "grad_norm": 0.9702664764729133, "learning_rate": 7.08173141900234e-06, "loss": 0.8253, "step": 3161 }, { "epoch": 0.6066332525959856, "grad_norm": 0.8900202076689616, "learning_rate": 7.0757876846866454e-06, "loss": 0.7298, "step": 3162 }, { "epoch": 0.6068251037195137, "grad_norm": 0.6363020838946801, "learning_rate": 7.069845079819139e-06, "loss": 0.3009, "step": 3163 }, { "epoch": 0.6070169548430417, "grad_norm": 0.9403250281280113, "learning_rate": 7.063903606695095e-06, "loss": 0.7502, "step": 3164 }, { "epoch": 0.6072088059665699, "grad_norm": 0.883929300876691, "learning_rate": 7.057963267609348e-06, "loss": 0.7981, "step": 3165 }, { "epoch": 0.607400657090098, "grad_norm": 1.046785539712624, "learning_rate": 7.052024064856296e-06, "loss": 0.7844, "step": 3166 }, { "epoch": 0.6075925082136262, "grad_norm": 0.8465845829871155, "learning_rate": 7.046086000729896e-06, "loss": 0.7159, "step": 3167 }, { "epoch": 0.6077843593371544, "grad_norm": 0.6177684793946082, "learning_rate": 7.0401490775236635e-06, "loss": 0.3357, "step": 3168 }, { "epoch": 0.6079762104606825, "grad_norm": 0.9481551071086912, "learning_rate": 7.034213297530685e-06, "loss": 0.8532, "step": 3169 }, { "epoch": 0.6081680615842107, "grad_norm": 1.0442095640759517, "learning_rate": 7.028278663043596e-06, "loss": 0.7297, "step": 3170 }, { "epoch": 0.6083599127077388, "grad_norm": 0.9580608399053732, "learning_rate": 7.022345176354585e-06, "loss": 0.7817, "step": 3171 }, { "epoch": 0.608551763831267, "grad_norm": 0.7798285286994202, "learning_rate": 7.0164128397554e-06, "loss": 0.7853, "step": 3172 }, { "epoch": 0.6087436149547951, "grad_norm": 0.9048751513691451, "learning_rate": 7.010481655537358e-06, "loss": 0.77, "step": 3173 }, { "epoch": 0.6089354660783233, "grad_norm": 0.9587737929074723, "learning_rate": 7.004551625991314e-06, "loss": 0.8375, "step": 3174 }, { "epoch": 0.6091273172018513, "grad_norm": 0.863182067283264, "learning_rate": 6.998622753407685e-06, "loss": 0.77, "step": 3175 }, { "epoch": 0.6093191683253795, "grad_norm": 0.9379575312596161, "learning_rate": 6.9926950400764386e-06, "loss": 0.7884, "step": 3176 }, { "epoch": 0.6095110194489076, "grad_norm": 0.9113544998125256, "learning_rate": 6.986768488287095e-06, "loss": 0.7887, "step": 3177 }, { "epoch": 0.6097028705724358, "grad_norm": 0.8356564501672418, "learning_rate": 6.9808431003287295e-06, "loss": 0.7992, "step": 3178 }, { "epoch": 0.6098947216959639, "grad_norm": 0.8902922626541936, "learning_rate": 6.974918878489963e-06, "loss": 0.7809, "step": 3179 }, { "epoch": 0.6100865728194921, "grad_norm": 0.9247380317361142, "learning_rate": 6.968995825058966e-06, "loss": 0.7429, "step": 3180 }, { "epoch": 0.6102784239430202, "grad_norm": 0.8631453817254204, "learning_rate": 6.963073942323464e-06, "loss": 0.775, "step": 3181 }, { "epoch": 0.6104702750665484, "grad_norm": 0.9190472400556672, "learning_rate": 6.957153232570717e-06, "loss": 0.7745, "step": 3182 }, { "epoch": 0.6106621261900765, "grad_norm": 0.8558974514555979, "learning_rate": 6.951233698087554e-06, "loss": 0.8364, "step": 3183 }, { "epoch": 0.6108539773136047, "grad_norm": 0.9627133281053208, "learning_rate": 6.945315341160328e-06, "loss": 0.7211, "step": 3184 }, { "epoch": 0.6110458284371327, "grad_norm": 0.9571572702634109, "learning_rate": 6.9393981640749486e-06, "loss": 0.7953, "step": 3185 }, { "epoch": 0.6112376795606609, "grad_norm": 0.8749357279905733, "learning_rate": 6.933482169116865e-06, "loss": 0.8046, "step": 3186 }, { "epoch": 0.611429530684189, "grad_norm": 1.068954524073571, "learning_rate": 6.92756735857107e-06, "loss": 0.7695, "step": 3187 }, { "epoch": 0.6116213818077172, "grad_norm": 0.8863983382758621, "learning_rate": 6.921653734722107e-06, "loss": 0.7106, "step": 3188 }, { "epoch": 0.6118132329312453, "grad_norm": 0.9621595125860718, "learning_rate": 6.915741299854053e-06, "loss": 0.8022, "step": 3189 }, { "epoch": 0.6120050840547735, "grad_norm": 0.938655773315364, "learning_rate": 6.909830056250527e-06, "loss": 0.7748, "step": 3190 }, { "epoch": 0.6121969351783016, "grad_norm": 0.8827842403534227, "learning_rate": 6.903920006194689e-06, "loss": 0.7769, "step": 3191 }, { "epoch": 0.6123887863018298, "grad_norm": 0.9282205243744734, "learning_rate": 6.898011151969233e-06, "loss": 0.7249, "step": 3192 }, { "epoch": 0.612580637425358, "grad_norm": 0.9834496635903734, "learning_rate": 6.892103495856404e-06, "loss": 0.7695, "step": 3193 }, { "epoch": 0.6127724885488861, "grad_norm": 0.8834940215613155, "learning_rate": 6.8861970401379695e-06, "loss": 0.7714, "step": 3194 }, { "epoch": 0.6129643396724143, "grad_norm": 0.9836984979788683, "learning_rate": 6.880291787095244e-06, "loss": 0.7687, "step": 3195 }, { "epoch": 0.6131561907959423, "grad_norm": 0.9030647385475231, "learning_rate": 6.8743877390090695e-06, "loss": 0.8168, "step": 3196 }, { "epoch": 0.6133480419194705, "grad_norm": 1.1283539146991612, "learning_rate": 6.868484898159829e-06, "loss": 0.7041, "step": 3197 }, { "epoch": 0.6135398930429986, "grad_norm": 0.9849098759393817, "learning_rate": 6.862583266827434e-06, "loss": 0.7491, "step": 3198 }, { "epoch": 0.6137317441665268, "grad_norm": 0.9725890601697472, "learning_rate": 6.856682847291336e-06, "loss": 0.7558, "step": 3199 }, { "epoch": 0.6139235952900549, "grad_norm": 0.7798750709910784, "learning_rate": 6.85078364183051e-06, "loss": 0.7827, "step": 3200 }, { "epoch": 0.6141154464135831, "grad_norm": 0.8496212852033217, "learning_rate": 6.844885652723467e-06, "loss": 0.7413, "step": 3201 }, { "epoch": 0.6143072975371112, "grad_norm": 0.8951485482590021, "learning_rate": 6.838988882248243e-06, "loss": 0.7262, "step": 3202 }, { "epoch": 0.6144991486606394, "grad_norm": 0.896405474781808, "learning_rate": 6.833093332682417e-06, "loss": 0.7907, "step": 3203 }, { "epoch": 0.6146909997841675, "grad_norm": 0.9207195597005413, "learning_rate": 6.82719900630308e-06, "loss": 0.7671, "step": 3204 }, { "epoch": 0.6148828509076957, "grad_norm": 0.8651655971411383, "learning_rate": 6.821305905386861e-06, "loss": 0.7606, "step": 3205 }, { "epoch": 0.6150747020312237, "grad_norm": 0.9583302918928183, "learning_rate": 6.815414032209909e-06, "loss": 0.6958, "step": 3206 }, { "epoch": 0.6152665531547519, "grad_norm": 1.003469715459424, "learning_rate": 6.809523389047902e-06, "loss": 0.8076, "step": 3207 }, { "epoch": 0.61545840427828, "grad_norm": 0.9708387243206748, "learning_rate": 6.803633978176045e-06, "loss": 0.8305, "step": 3208 }, { "epoch": 0.6156502554018082, "grad_norm": 0.9362567166195315, "learning_rate": 6.797745801869066e-06, "loss": 0.751, "step": 3209 }, { "epoch": 0.6158421065253363, "grad_norm": 0.9148268265935959, "learning_rate": 6.791858862401212e-06, "loss": 0.8162, "step": 3210 }, { "epoch": 0.6160339576488645, "grad_norm": 0.8918808752975992, "learning_rate": 6.785973162046253e-06, "loss": 0.705, "step": 3211 }, { "epoch": 0.6162258087723926, "grad_norm": 0.9081520705501233, "learning_rate": 6.78008870307749e-06, "loss": 0.8187, "step": 3212 }, { "epoch": 0.6164176598959208, "grad_norm": 0.9939346114637052, "learning_rate": 6.774205487767733e-06, "loss": 0.6826, "step": 3213 }, { "epoch": 0.6166095110194489, "grad_norm": 0.8752151120869861, "learning_rate": 6.768323518389317e-06, "loss": 0.7044, "step": 3214 }, { "epoch": 0.6168013621429771, "grad_norm": 0.834382314423898, "learning_rate": 6.762442797214093e-06, "loss": 0.785, "step": 3215 }, { "epoch": 0.6169932132665052, "grad_norm": 1.0533594020953383, "learning_rate": 6.75656332651343e-06, "loss": 0.7619, "step": 3216 }, { "epoch": 0.6171850643900333, "grad_norm": 0.8803804257055634, "learning_rate": 6.750685108558221e-06, "loss": 0.7405, "step": 3217 }, { "epoch": 0.6173769155135614, "grad_norm": 0.9908232445579598, "learning_rate": 6.744808145618869e-06, "loss": 0.8065, "step": 3218 }, { "epoch": 0.6175687666370896, "grad_norm": 0.893691540293989, "learning_rate": 6.7389324399652885e-06, "loss": 0.7743, "step": 3219 }, { "epoch": 0.6177606177606177, "grad_norm": 0.6778896888582897, "learning_rate": 6.733057993866917e-06, "loss": 0.3272, "step": 3220 }, { "epoch": 0.6179524688841459, "grad_norm": 0.9623750551273063, "learning_rate": 6.727184809592697e-06, "loss": 0.7731, "step": 3221 }, { "epoch": 0.618144320007674, "grad_norm": 0.8706249375189854, "learning_rate": 6.721312889411095e-06, "loss": 0.8384, "step": 3222 }, { "epoch": 0.6183361711312022, "grad_norm": 0.8716024035126194, "learning_rate": 6.715442235590077e-06, "loss": 0.6919, "step": 3223 }, { "epoch": 0.6185280222547304, "grad_norm": 0.9575915099655588, "learning_rate": 6.709572850397126e-06, "loss": 0.7556, "step": 3224 }, { "epoch": 0.6187198733782585, "grad_norm": 0.9806996689306866, "learning_rate": 6.703704736099236e-06, "loss": 0.8453, "step": 3225 }, { "epoch": 0.6189117245017867, "grad_norm": 0.8844793505548446, "learning_rate": 6.697837894962904e-06, "loss": 0.7716, "step": 3226 }, { "epoch": 0.6191035756253147, "grad_norm": 0.8560543404369942, "learning_rate": 6.691972329254144e-06, "loss": 0.785, "step": 3227 }, { "epoch": 0.6192954267488429, "grad_norm": 1.1440269875997369, "learning_rate": 6.686108041238473e-06, "loss": 0.7558, "step": 3228 }, { "epoch": 0.619487277872371, "grad_norm": 0.9054324975252125, "learning_rate": 6.680245033180912e-06, "loss": 0.785, "step": 3229 }, { "epoch": 0.6196791289958992, "grad_norm": 0.8830338148437887, "learning_rate": 6.674383307345991e-06, "loss": 0.7505, "step": 3230 }, { "epoch": 0.6198709801194273, "grad_norm": 0.9009402993560128, "learning_rate": 6.66852286599774e-06, "loss": 0.7938, "step": 3231 }, { "epoch": 0.6200628312429555, "grad_norm": 0.9885364756143281, "learning_rate": 6.662663711399705e-06, "loss": 0.8057, "step": 3232 }, { "epoch": 0.6202546823664836, "grad_norm": 1.0287498218668367, "learning_rate": 6.656805845814922e-06, "loss": 0.8082, "step": 3233 }, { "epoch": 0.6204465334900118, "grad_norm": 1.0050175269711379, "learning_rate": 6.650949271505937e-06, "loss": 0.8114, "step": 3234 }, { "epoch": 0.6206383846135399, "grad_norm": 1.0115833646439294, "learning_rate": 6.645093990734788e-06, "loss": 0.8262, "step": 3235 }, { "epoch": 0.6208302357370681, "grad_norm": 1.0535765132330375, "learning_rate": 6.639240005763019e-06, "loss": 0.8192, "step": 3236 }, { "epoch": 0.6210220868605961, "grad_norm": 1.0655959583358223, "learning_rate": 6.633387318851683e-06, "loss": 0.7114, "step": 3237 }, { "epoch": 0.6212139379841243, "grad_norm": 0.9102299228765509, "learning_rate": 6.6275359322613156e-06, "loss": 0.8695, "step": 3238 }, { "epoch": 0.6214057891076524, "grad_norm": 1.0409413044498539, "learning_rate": 6.62168584825196e-06, "loss": 0.7461, "step": 3239 }, { "epoch": 0.6215976402311806, "grad_norm": 0.8501738479713359, "learning_rate": 6.615837069083153e-06, "loss": 0.7089, "step": 3240 }, { "epoch": 0.6217894913547087, "grad_norm": 0.6543290649799384, "learning_rate": 6.609989597013924e-06, "loss": 0.3148, "step": 3241 }, { "epoch": 0.6219813424782369, "grad_norm": 0.707965298047595, "learning_rate": 6.604143434302807e-06, "loss": 0.3107, "step": 3242 }, { "epoch": 0.622173193601765, "grad_norm": 0.9109388860015597, "learning_rate": 6.598298583207825e-06, "loss": 0.7562, "step": 3243 }, { "epoch": 0.6223650447252932, "grad_norm": 0.9807936984548264, "learning_rate": 6.59245504598649e-06, "loss": 0.781, "step": 3244 }, { "epoch": 0.6225568958488213, "grad_norm": 1.0413512754614298, "learning_rate": 6.586612824895812e-06, "loss": 0.7556, "step": 3245 }, { "epoch": 0.6227487469723495, "grad_norm": 0.9051167290756142, "learning_rate": 6.5807719221922885e-06, "loss": 0.7482, "step": 3246 }, { "epoch": 0.6229405980958777, "grad_norm": 0.601081012138233, "learning_rate": 6.574932340131917e-06, "loss": 0.3257, "step": 3247 }, { "epoch": 0.6231324492194057, "grad_norm": 1.0319873770365042, "learning_rate": 6.569094080970179e-06, "loss": 0.7839, "step": 3248 }, { "epoch": 0.6233243003429338, "grad_norm": 0.940194991812362, "learning_rate": 6.5632571469620385e-06, "loss": 0.8063, "step": 3249 }, { "epoch": 0.623516151466462, "grad_norm": 1.0500863672699987, "learning_rate": 6.5574215403619515e-06, "loss": 0.7481, "step": 3250 }, { "epoch": 0.6237080025899902, "grad_norm": 0.9773015684234635, "learning_rate": 6.551587263423872e-06, "loss": 0.8633, "step": 3251 }, { "epoch": 0.6238998537135183, "grad_norm": 0.8297377008943956, "learning_rate": 6.545754318401228e-06, "loss": 0.8187, "step": 3252 }, { "epoch": 0.6240917048370465, "grad_norm": 0.9675400116036936, "learning_rate": 6.539922707546939e-06, "loss": 0.8178, "step": 3253 }, { "epoch": 0.6242835559605746, "grad_norm": 0.9213349079274392, "learning_rate": 6.534092433113404e-06, "loss": 0.7735, "step": 3254 }, { "epoch": 0.6244754070841028, "grad_norm": 0.9895678423426124, "learning_rate": 6.528263497352509e-06, "loss": 0.7873, "step": 3255 }, { "epoch": 0.6246672582076309, "grad_norm": 0.9442079437152588, "learning_rate": 6.522435902515626e-06, "loss": 0.7991, "step": 3256 }, { "epoch": 0.6248591093311591, "grad_norm": 0.9319698079257015, "learning_rate": 6.516609650853605e-06, "loss": 0.8073, "step": 3257 }, { "epoch": 0.6250509604546871, "grad_norm": 0.6487334858733212, "learning_rate": 6.510784744616777e-06, "loss": 0.3301, "step": 3258 }, { "epoch": 0.6252428115782153, "grad_norm": 1.0130704706623128, "learning_rate": 6.504961186054956e-06, "loss": 0.7834, "step": 3259 }, { "epoch": 0.6254346627017434, "grad_norm": 0.6581419409661257, "learning_rate": 6.499138977417429e-06, "loss": 0.3278, "step": 3260 }, { "epoch": 0.6256265138252716, "grad_norm": 0.8418208623807205, "learning_rate": 6.49331812095298e-06, "loss": 0.768, "step": 3261 }, { "epoch": 0.6258183649487997, "grad_norm": 0.8735667811839357, "learning_rate": 6.487498618909845e-06, "loss": 0.7699, "step": 3262 }, { "epoch": 0.6260102160723279, "grad_norm": 0.9493806757346556, "learning_rate": 6.481680473535752e-06, "loss": 0.744, "step": 3263 }, { "epoch": 0.626202067195856, "grad_norm": 0.8137093554430677, "learning_rate": 6.4758636870779035e-06, "loss": 0.7195, "step": 3264 }, { "epoch": 0.6263939183193842, "grad_norm": 0.6288899089874866, "learning_rate": 6.470048261782973e-06, "loss": 0.3148, "step": 3265 }, { "epoch": 0.6265857694429123, "grad_norm": 1.0842102208264122, "learning_rate": 6.464234199897117e-06, "loss": 0.8142, "step": 3266 }, { "epoch": 0.6267776205664405, "grad_norm": 0.9283680808168555, "learning_rate": 6.458421503665958e-06, "loss": 0.713, "step": 3267 }, { "epoch": 0.6269694716899686, "grad_norm": 0.9659234317883474, "learning_rate": 6.452610175334591e-06, "loss": 0.8222, "step": 3268 }, { "epoch": 0.6271613228134967, "grad_norm": 0.8527090713389308, "learning_rate": 6.446800217147584e-06, "loss": 0.7565, "step": 3269 }, { "epoch": 0.6273531739370248, "grad_norm": 0.9378630653183178, "learning_rate": 6.440991631348974e-06, "loss": 0.7513, "step": 3270 }, { "epoch": 0.627545025060553, "grad_norm": 0.626475745907296, "learning_rate": 6.435184420182274e-06, "loss": 0.3288, "step": 3271 }, { "epoch": 0.6277368761840811, "grad_norm": 0.9069985759771475, "learning_rate": 6.429378585890463e-06, "loss": 0.7767, "step": 3272 }, { "epoch": 0.6279287273076093, "grad_norm": 1.0097525168178305, "learning_rate": 6.423574130715984e-06, "loss": 0.7925, "step": 3273 }, { "epoch": 0.6281205784311374, "grad_norm": 1.0055107696099348, "learning_rate": 6.4177710569007524e-06, "loss": 0.7359, "step": 3274 }, { "epoch": 0.6283124295546656, "grad_norm": 0.9526632005261854, "learning_rate": 6.411969366686144e-06, "loss": 0.7926, "step": 3275 }, { "epoch": 0.6285042806781937, "grad_norm": 0.8984498338213734, "learning_rate": 6.406169062313011e-06, "loss": 0.8218, "step": 3276 }, { "epoch": 0.6286961318017219, "grad_norm": 0.6473914195912503, "learning_rate": 6.400370146021662e-06, "loss": 0.3303, "step": 3277 }, { "epoch": 0.62888798292525, "grad_norm": 0.9995668958839475, "learning_rate": 6.394572620051868e-06, "loss": 0.8103, "step": 3278 }, { "epoch": 0.6290798340487781, "grad_norm": 0.9113321421368262, "learning_rate": 6.388776486642869e-06, "loss": 0.7525, "step": 3279 }, { "epoch": 0.6292716851723062, "grad_norm": 0.8735688342590706, "learning_rate": 6.382981748033359e-06, "loss": 0.7589, "step": 3280 }, { "epoch": 0.6294635362958344, "grad_norm": 0.6291192275009087, "learning_rate": 6.377188406461509e-06, "loss": 0.3039, "step": 3281 }, { "epoch": 0.6296553874193626, "grad_norm": 0.9781718538209928, "learning_rate": 6.371396464164934e-06, "loss": 0.6555, "step": 3282 }, { "epoch": 0.6298472385428907, "grad_norm": 1.0276077891010802, "learning_rate": 6.365605923380714e-06, "loss": 0.7815, "step": 3283 }, { "epoch": 0.6300390896664189, "grad_norm": 0.8914922727832749, "learning_rate": 6.359816786345391e-06, "loss": 0.7986, "step": 3284 }, { "epoch": 0.630230940789947, "grad_norm": 0.9829174210671483, "learning_rate": 6.354029055294957e-06, "loss": 0.8052, "step": 3285 }, { "epoch": 0.6304227919134752, "grad_norm": 0.9027063943868051, "learning_rate": 6.348242732464873e-06, "loss": 0.784, "step": 3286 }, { "epoch": 0.6306146430370033, "grad_norm": 0.8472439668571184, "learning_rate": 6.342457820090045e-06, "loss": 0.727, "step": 3287 }, { "epoch": 0.6308064941605315, "grad_norm": 0.9666989873532759, "learning_rate": 6.336674320404841e-06, "loss": 0.7777, "step": 3288 }, { "epoch": 0.6309983452840596, "grad_norm": 0.92597597849188, "learning_rate": 6.330892235643076e-06, "loss": 0.6984, "step": 3289 }, { "epoch": 0.6311901964075877, "grad_norm": 0.9196491266024992, "learning_rate": 6.325111568038027e-06, "loss": 0.7677, "step": 3290 }, { "epoch": 0.6313820475311158, "grad_norm": 0.8971064287769993, "learning_rate": 6.31933231982242e-06, "loss": 0.7641, "step": 3291 }, { "epoch": 0.631573898654644, "grad_norm": 0.9260461391380932, "learning_rate": 6.313554493228431e-06, "loss": 0.7343, "step": 3292 }, { "epoch": 0.6317657497781721, "grad_norm": 0.9787782907176563, "learning_rate": 6.307778090487691e-06, "loss": 0.7666, "step": 3293 }, { "epoch": 0.6319576009017003, "grad_norm": 0.9047954033697142, "learning_rate": 6.302003113831272e-06, "loss": 0.7292, "step": 3294 }, { "epoch": 0.6321494520252284, "grad_norm": 0.9538266462294526, "learning_rate": 6.296229565489711e-06, "loss": 0.8079, "step": 3295 }, { "epoch": 0.6323413031487566, "grad_norm": 1.0402723608965663, "learning_rate": 6.29045744769298e-06, "loss": 0.7366, "step": 3296 }, { "epoch": 0.6325331542722847, "grad_norm": 1.0299248205225573, "learning_rate": 6.284686762670503e-06, "loss": 0.7763, "step": 3297 }, { "epoch": 0.6327250053958129, "grad_norm": 1.078732705092462, "learning_rate": 6.278917512651152e-06, "loss": 0.8583, "step": 3298 }, { "epoch": 0.632916856519341, "grad_norm": 0.8374487969203426, "learning_rate": 6.273149699863234e-06, "loss": 0.7743, "step": 3299 }, { "epoch": 0.6331087076428691, "grad_norm": 0.8828476572006976, "learning_rate": 6.26738332653452e-06, "loss": 0.7368, "step": 3300 }, { "epoch": 0.6333005587663972, "grad_norm": 0.9331758545993669, "learning_rate": 6.261618394892213e-06, "loss": 0.7211, "step": 3301 }, { "epoch": 0.6334924098899254, "grad_norm": 0.989907677196942, "learning_rate": 6.2558549071629595e-06, "loss": 0.8042, "step": 3302 }, { "epoch": 0.6336842610134535, "grad_norm": 1.0387095515259432, "learning_rate": 6.25009286557285e-06, "loss": 0.7659, "step": 3303 }, { "epoch": 0.6338761121369817, "grad_norm": 1.05922709263834, "learning_rate": 6.244332272347413e-06, "loss": 0.6661, "step": 3304 }, { "epoch": 0.6340679632605098, "grad_norm": 0.9537667474381579, "learning_rate": 6.238573129711627e-06, "loss": 0.8021, "step": 3305 }, { "epoch": 0.634259814384038, "grad_norm": 0.962074956700076, "learning_rate": 6.232815439889901e-06, "loss": 0.8088, "step": 3306 }, { "epoch": 0.6344516655075662, "grad_norm": 0.7967971699649035, "learning_rate": 6.227059205106085e-06, "loss": 0.7505, "step": 3307 }, { "epoch": 0.6346435166310943, "grad_norm": 0.9649550658287743, "learning_rate": 6.22130442758347e-06, "loss": 0.7166, "step": 3308 }, { "epoch": 0.6348353677546225, "grad_norm": 1.0424679466775668, "learning_rate": 6.2155511095447775e-06, "loss": 0.7267, "step": 3309 }, { "epoch": 0.6350272188781505, "grad_norm": 0.6452703152552469, "learning_rate": 6.209799253212177e-06, "loss": 0.2929, "step": 3310 }, { "epoch": 0.6352190700016787, "grad_norm": 0.9825872972568207, "learning_rate": 6.204048860807261e-06, "loss": 0.8377, "step": 3311 }, { "epoch": 0.6354109211252068, "grad_norm": 0.881868811507342, "learning_rate": 6.198299934551068e-06, "loss": 0.8337, "step": 3312 }, { "epoch": 0.635602772248735, "grad_norm": 0.8920667302489882, "learning_rate": 6.1925524766640575e-06, "loss": 0.7798, "step": 3313 }, { "epoch": 0.6357946233722631, "grad_norm": 0.8618111104101117, "learning_rate": 6.186806489366127e-06, "loss": 0.8334, "step": 3314 }, { "epoch": 0.6359864744957913, "grad_norm": 0.7593416633705272, "learning_rate": 6.181061974876615e-06, "loss": 0.7064, "step": 3315 }, { "epoch": 0.6361783256193194, "grad_norm": 0.9856975867043388, "learning_rate": 6.175318935414281e-06, "loss": 0.847, "step": 3316 }, { "epoch": 0.6363701767428476, "grad_norm": 0.9432695183059883, "learning_rate": 6.169577373197315e-06, "loss": 0.8472, "step": 3317 }, { "epoch": 0.6365620278663757, "grad_norm": 0.9445727838664794, "learning_rate": 6.163837290443339e-06, "loss": 0.7358, "step": 3318 }, { "epoch": 0.6367538789899039, "grad_norm": 0.8727320614038497, "learning_rate": 6.158098689369405e-06, "loss": 0.8529, "step": 3319 }, { "epoch": 0.636945730113432, "grad_norm": 0.9044729558604774, "learning_rate": 6.1523615721919916e-06, "loss": 0.7912, "step": 3320 }, { "epoch": 0.6371375812369601, "grad_norm": 0.9126144035163343, "learning_rate": 6.146625941127002e-06, "loss": 0.7843, "step": 3321 }, { "epoch": 0.6373294323604882, "grad_norm": 0.8809639269733749, "learning_rate": 6.14089179838977e-06, "loss": 0.7734, "step": 3322 }, { "epoch": 0.6375212834840164, "grad_norm": 0.8008164510073675, "learning_rate": 6.135159146195048e-06, "loss": 0.776, "step": 3323 }, { "epoch": 0.6377131346075445, "grad_norm": 1.0436313092702911, "learning_rate": 6.1294279867570135e-06, "loss": 0.7268, "step": 3324 }, { "epoch": 0.6379049857310727, "grad_norm": 0.9390564397594438, "learning_rate": 6.123698322289282e-06, "loss": 0.7789, "step": 3325 }, { "epoch": 0.6380968368546008, "grad_norm": 0.9130744079934245, "learning_rate": 6.117970155004871e-06, "loss": 0.7639, "step": 3326 }, { "epoch": 0.638288687978129, "grad_norm": 0.9454955607146492, "learning_rate": 6.112243487116232e-06, "loss": 0.7048, "step": 3327 }, { "epoch": 0.6384805391016571, "grad_norm": 1.0234476804631152, "learning_rate": 6.1065183208352264e-06, "loss": 0.715, "step": 3328 }, { "epoch": 0.6386723902251853, "grad_norm": 0.9127735865017049, "learning_rate": 6.100794658373154e-06, "loss": 0.7971, "step": 3329 }, { "epoch": 0.6388642413487134, "grad_norm": 1.0073332030101976, "learning_rate": 6.095072501940718e-06, "loss": 0.7504, "step": 3330 }, { "epoch": 0.6390560924722415, "grad_norm": 0.8770280569515939, "learning_rate": 6.089351853748046e-06, "loss": 0.7872, "step": 3331 }, { "epoch": 0.6392479435957696, "grad_norm": 0.9621392456568046, "learning_rate": 6.083632716004682e-06, "loss": 0.7986, "step": 3332 }, { "epoch": 0.6394397947192978, "grad_norm": 0.9206866698414788, "learning_rate": 6.077915090919582e-06, "loss": 0.7799, "step": 3333 }, { "epoch": 0.639631645842826, "grad_norm": 0.868715735774113, "learning_rate": 6.072198980701131e-06, "loss": 0.7903, "step": 3334 }, { "epoch": 0.6398234969663541, "grad_norm": 0.8802205136631157, "learning_rate": 6.066484387557114e-06, "loss": 0.7055, "step": 3335 }, { "epoch": 0.6400153480898823, "grad_norm": 0.8560964976790216, "learning_rate": 6.060771313694737e-06, "loss": 0.6956, "step": 3336 }, { "epoch": 0.6402071992134104, "grad_norm": 0.912420996406227, "learning_rate": 6.0550597613206205e-06, "loss": 0.8263, "step": 3337 }, { "epoch": 0.6403990503369386, "grad_norm": 0.8375942928369031, "learning_rate": 6.04934973264079e-06, "loss": 0.7878, "step": 3338 }, { "epoch": 0.6405909014604667, "grad_norm": 0.8970459672229418, "learning_rate": 6.043641229860699e-06, "loss": 0.7662, "step": 3339 }, { "epoch": 0.6407827525839949, "grad_norm": 0.9598918760288384, "learning_rate": 6.037934255185191e-06, "loss": 0.7447, "step": 3340 }, { "epoch": 0.640974603707523, "grad_norm": 0.9445330830645512, "learning_rate": 6.03222881081853e-06, "loss": 0.784, "step": 3341 }, { "epoch": 0.6411664548310511, "grad_norm": 1.0392049288601681, "learning_rate": 6.0265248989643895e-06, "loss": 0.8222, "step": 3342 }, { "epoch": 0.6413583059545792, "grad_norm": 1.3007314359786883, "learning_rate": 6.020822521825844e-06, "loss": 0.7516, "step": 3343 }, { "epoch": 0.6415501570781074, "grad_norm": 0.8801931662337148, "learning_rate": 6.015121681605389e-06, "loss": 0.7959, "step": 3344 }, { "epoch": 0.6417420082016355, "grad_norm": 0.9297164529348795, "learning_rate": 6.0094223805049145e-06, "loss": 0.7904, "step": 3345 }, { "epoch": 0.6419338593251637, "grad_norm": 0.9615726806406588, "learning_rate": 6.00372462072572e-06, "loss": 0.8744, "step": 3346 }, { "epoch": 0.6421257104486918, "grad_norm": 0.8469559431623446, "learning_rate": 5.998028404468506e-06, "loss": 0.758, "step": 3347 }, { "epoch": 0.64231756157222, "grad_norm": 0.9361093724740446, "learning_rate": 5.992333733933378e-06, "loss": 0.7818, "step": 3348 }, { "epoch": 0.6425094126957481, "grad_norm": 0.8934190269229507, "learning_rate": 5.986640611319855e-06, "loss": 0.7643, "step": 3349 }, { "epoch": 0.6427012638192763, "grad_norm": 1.0561844113554901, "learning_rate": 5.980949038826843e-06, "loss": 0.7077, "step": 3350 }, { "epoch": 0.6428931149428044, "grad_norm": 0.9929772393773099, "learning_rate": 5.975259018652658e-06, "loss": 0.7974, "step": 3351 }, { "epoch": 0.6430849660663325, "grad_norm": 0.9518722659843469, "learning_rate": 5.969570552995014e-06, "loss": 0.7504, "step": 3352 }, { "epoch": 0.6432768171898606, "grad_norm": 1.1164809084978127, "learning_rate": 5.963883644051022e-06, "loss": 0.7928, "step": 3353 }, { "epoch": 0.6434686683133888, "grad_norm": 0.87239186290849, "learning_rate": 5.958198294017199e-06, "loss": 0.7434, "step": 3354 }, { "epoch": 0.6436605194369169, "grad_norm": 0.9573150094521797, "learning_rate": 5.952514505089453e-06, "loss": 0.7698, "step": 3355 }, { "epoch": 0.6438523705604451, "grad_norm": 0.9893038163781337, "learning_rate": 5.946832279463092e-06, "loss": 0.7589, "step": 3356 }, { "epoch": 0.6440442216839732, "grad_norm": 0.972342149737334, "learning_rate": 5.941151619332819e-06, "loss": 0.8261, "step": 3357 }, { "epoch": 0.6442360728075014, "grad_norm": 0.9543129023855278, "learning_rate": 5.935472526892729e-06, "loss": 0.7729, "step": 3358 }, { "epoch": 0.6444279239310295, "grad_norm": 1.0752066279876693, "learning_rate": 5.929795004336325e-06, "loss": 0.7328, "step": 3359 }, { "epoch": 0.6446197750545577, "grad_norm": 0.9601928993339033, "learning_rate": 5.924119053856489e-06, "loss": 0.8271, "step": 3360 }, { "epoch": 0.6448116261780859, "grad_norm": 1.000741788638824, "learning_rate": 5.918444677645501e-06, "loss": 0.7532, "step": 3361 }, { "epoch": 0.6450034773016139, "grad_norm": 0.9407779637364475, "learning_rate": 5.9127718778950325e-06, "loss": 0.7779, "step": 3362 }, { "epoch": 0.645195328425142, "grad_norm": 1.0344161487494705, "learning_rate": 5.907100656796145e-06, "loss": 0.8042, "step": 3363 }, { "epoch": 0.6453871795486702, "grad_norm": 0.940156403461497, "learning_rate": 5.901431016539296e-06, "loss": 0.7536, "step": 3364 }, { "epoch": 0.6455790306721984, "grad_norm": 0.8150334058439651, "learning_rate": 5.895762959314326e-06, "loss": 0.7418, "step": 3365 }, { "epoch": 0.6457708817957265, "grad_norm": 0.9512636028055365, "learning_rate": 5.890096487310465e-06, "loss": 0.7282, "step": 3366 }, { "epoch": 0.6459627329192547, "grad_norm": 0.8624780639734595, "learning_rate": 5.8844316027163315e-06, "loss": 0.7314, "step": 3367 }, { "epoch": 0.6461545840427828, "grad_norm": 0.965187720314896, "learning_rate": 5.8787683077199355e-06, "loss": 0.7946, "step": 3368 }, { "epoch": 0.646346435166311, "grad_norm": 1.02164702349399, "learning_rate": 5.873106604508665e-06, "loss": 0.7647, "step": 3369 }, { "epoch": 0.6465382862898391, "grad_norm": 0.8814260725565847, "learning_rate": 5.8674464952693e-06, "loss": 0.7694, "step": 3370 }, { "epoch": 0.6467301374133673, "grad_norm": 0.8544203175750248, "learning_rate": 5.861787982187998e-06, "loss": 0.6758, "step": 3371 }, { "epoch": 0.6469219885368954, "grad_norm": 0.9784541136313502, "learning_rate": 5.856131067450305e-06, "loss": 0.7862, "step": 3372 }, { "epoch": 0.6471138396604235, "grad_norm": 0.9805138090354415, "learning_rate": 5.850475753241152e-06, "loss": 0.8175, "step": 3373 }, { "epoch": 0.6473056907839516, "grad_norm": 0.8571206923048871, "learning_rate": 5.844822041744846e-06, "loss": 0.7513, "step": 3374 }, { "epoch": 0.6474975419074798, "grad_norm": 1.057005037200379, "learning_rate": 5.839169935145078e-06, "loss": 0.6848, "step": 3375 }, { "epoch": 0.6476893930310079, "grad_norm": 0.8699480299985536, "learning_rate": 5.833519435624917e-06, "loss": 0.7584, "step": 3376 }, { "epoch": 0.6478812441545361, "grad_norm": 0.9905197571946462, "learning_rate": 5.827870545366815e-06, "loss": 0.6834, "step": 3377 }, { "epoch": 0.6480730952780642, "grad_norm": 0.8773567315544473, "learning_rate": 5.822223266552598e-06, "loss": 0.7767, "step": 3378 }, { "epoch": 0.6482649464015924, "grad_norm": 0.9650569275308871, "learning_rate": 5.816577601363474e-06, "loss": 0.7625, "step": 3379 }, { "epoch": 0.6484567975251205, "grad_norm": 1.1644227253787347, "learning_rate": 5.810933551980022e-06, "loss": 0.8143, "step": 3380 }, { "epoch": 0.6486486486486487, "grad_norm": 0.8973213250523882, "learning_rate": 5.805291120582204e-06, "loss": 0.759, "step": 3381 }, { "epoch": 0.6488404997721768, "grad_norm": 0.8977866493198617, "learning_rate": 5.799650309349348e-06, "loss": 0.7748, "step": 3382 }, { "epoch": 0.6490323508957049, "grad_norm": 0.9452786159862487, "learning_rate": 5.79401112046017e-06, "loss": 0.7531, "step": 3383 }, { "epoch": 0.649224202019233, "grad_norm": 0.7974639071488384, "learning_rate": 5.788373556092748e-06, "loss": 0.6943, "step": 3384 }, { "epoch": 0.6494160531427612, "grad_norm": 1.0324000136411517, "learning_rate": 5.782737618424533e-06, "loss": 0.8045, "step": 3385 }, { "epoch": 0.6496079042662893, "grad_norm": 1.066539511959713, "learning_rate": 5.777103309632353e-06, "loss": 0.7412, "step": 3386 }, { "epoch": 0.6497997553898175, "grad_norm": 0.9534956112041648, "learning_rate": 5.7714706318924e-06, "loss": 0.7972, "step": 3387 }, { "epoch": 0.6499916065133456, "grad_norm": 0.9981688597312814, "learning_rate": 5.765839587380246e-06, "loss": 0.765, "step": 3388 }, { "epoch": 0.6501834576368738, "grad_norm": 0.8989051169050645, "learning_rate": 5.760210178270827e-06, "loss": 0.8035, "step": 3389 }, { "epoch": 0.650375308760402, "grad_norm": 0.9471288402985915, "learning_rate": 5.754582406738441e-06, "loss": 0.7146, "step": 3390 }, { "epoch": 0.6505671598839301, "grad_norm": 0.9402054067411625, "learning_rate": 5.748956274956763e-06, "loss": 0.7985, "step": 3391 }, { "epoch": 0.6507590110074583, "grad_norm": 1.0312203478237523, "learning_rate": 5.74333178509883e-06, "loss": 0.7896, "step": 3392 }, { "epoch": 0.6509508621309864, "grad_norm": 0.9881686887325661, "learning_rate": 5.737708939337046e-06, "loss": 0.7806, "step": 3393 }, { "epoch": 0.6511427132545144, "grad_norm": 1.0537442014667813, "learning_rate": 5.732087739843179e-06, "loss": 0.8063, "step": 3394 }, { "epoch": 0.6513345643780426, "grad_norm": 0.8883992602810735, "learning_rate": 5.7264681887883644e-06, "loss": 0.8124, "step": 3395 }, { "epoch": 0.6515264155015708, "grad_norm": 0.9264574844833916, "learning_rate": 5.7208502883430964e-06, "loss": 0.7896, "step": 3396 }, { "epoch": 0.6517182666250989, "grad_norm": 0.914963042180145, "learning_rate": 5.715234040677229e-06, "loss": 0.7532, "step": 3397 }, { "epoch": 0.6519101177486271, "grad_norm": 0.9977419725983296, "learning_rate": 5.709619447959991e-06, "loss": 0.7266, "step": 3398 }, { "epoch": 0.6521019688721552, "grad_norm": 1.0113410838794683, "learning_rate": 5.704006512359961e-06, "loss": 0.7981, "step": 3399 }, { "epoch": 0.6522938199956834, "grad_norm": 1.0164590839484438, "learning_rate": 5.698395236045078e-06, "loss": 0.7809, "step": 3400 }, { "epoch": 0.6524856711192115, "grad_norm": 0.9444735503005691, "learning_rate": 5.6927856211826425e-06, "loss": 0.7215, "step": 3401 }, { "epoch": 0.6526775222427397, "grad_norm": 0.8426105445117565, "learning_rate": 5.687177669939311e-06, "loss": 0.7407, "step": 3402 }, { "epoch": 0.6528693733662678, "grad_norm": 0.877656502903459, "learning_rate": 5.681571384481106e-06, "loss": 0.7296, "step": 3403 }, { "epoch": 0.6530612244897959, "grad_norm": 0.9441713189803799, "learning_rate": 5.675966766973395e-06, "loss": 0.7558, "step": 3404 }, { "epoch": 0.653253075613324, "grad_norm": 0.8727000450105501, "learning_rate": 5.670363819580909e-06, "loss": 0.7546, "step": 3405 }, { "epoch": 0.6534449267368522, "grad_norm": 0.9920851232658054, "learning_rate": 5.664762544467728e-06, "loss": 0.7394, "step": 3406 }, { "epoch": 0.6536367778603803, "grad_norm": 1.0696907182692201, "learning_rate": 5.659162943797291e-06, "loss": 0.7613, "step": 3407 }, { "epoch": 0.6538286289839085, "grad_norm": 0.8404121005702052, "learning_rate": 5.653565019732388e-06, "loss": 0.7624, "step": 3408 }, { "epoch": 0.6540204801074366, "grad_norm": 0.7913816729405508, "learning_rate": 5.647968774435163e-06, "loss": 0.7295, "step": 3409 }, { "epoch": 0.6542123312309648, "grad_norm": 0.9625483562973537, "learning_rate": 5.642374210067109e-06, "loss": 0.8205, "step": 3410 }, { "epoch": 0.6544041823544929, "grad_norm": 0.8626667039356664, "learning_rate": 5.6367813287890705e-06, "loss": 0.7129, "step": 3411 }, { "epoch": 0.6545960334780211, "grad_norm": 0.9227148279995369, "learning_rate": 5.631190132761247e-06, "loss": 0.7866, "step": 3412 }, { "epoch": 0.6547878846015492, "grad_norm": 1.0041401849200484, "learning_rate": 5.625600624143181e-06, "loss": 0.7303, "step": 3413 }, { "epoch": 0.6549797357250774, "grad_norm": 0.9656351164350183, "learning_rate": 5.620012805093764e-06, "loss": 0.8028, "step": 3414 }, { "epoch": 0.6551715868486054, "grad_norm": 0.6645722571389527, "learning_rate": 5.614426677771239e-06, "loss": 0.3319, "step": 3415 }, { "epoch": 0.6553634379721336, "grad_norm": 0.9304844516651068, "learning_rate": 5.608842244333184e-06, "loss": 0.7515, "step": 3416 }, { "epoch": 0.6555552890956617, "grad_norm": 0.8771819123377412, "learning_rate": 5.603259506936541e-06, "loss": 0.7651, "step": 3417 }, { "epoch": 0.6557471402191899, "grad_norm": 0.852319077815337, "learning_rate": 5.597678467737585e-06, "loss": 0.7382, "step": 3418 }, { "epoch": 0.655938991342718, "grad_norm": 1.119478003285629, "learning_rate": 5.592099128891937e-06, "loss": 0.7803, "step": 3419 }, { "epoch": 0.6561308424662462, "grad_norm": 0.9637705969349062, "learning_rate": 5.5865214925545595e-06, "loss": 0.7509, "step": 3420 }, { "epoch": 0.6563226935897744, "grad_norm": 0.874092775684078, "learning_rate": 5.580945560879763e-06, "loss": 0.7426, "step": 3421 }, { "epoch": 0.6565145447133025, "grad_norm": 0.9424419626733748, "learning_rate": 5.575371336021192e-06, "loss": 0.8085, "step": 3422 }, { "epoch": 0.6567063958368307, "grad_norm": 0.9580215396498964, "learning_rate": 5.56979882013184e-06, "loss": 0.7208, "step": 3423 }, { "epoch": 0.6568982469603588, "grad_norm": 0.8719288140637488, "learning_rate": 5.564228015364032e-06, "loss": 0.7797, "step": 3424 }, { "epoch": 0.6570900980838869, "grad_norm": 0.9776256361711574, "learning_rate": 5.558658923869442e-06, "loss": 0.7793, "step": 3425 }, { "epoch": 0.657281949207415, "grad_norm": 1.0010860410577394, "learning_rate": 5.553091547799067e-06, "loss": 0.7567, "step": 3426 }, { "epoch": 0.6574738003309432, "grad_norm": 0.8957377079331191, "learning_rate": 5.547525889303265e-06, "loss": 0.7703, "step": 3427 }, { "epoch": 0.6576656514544713, "grad_norm": 0.6058054874754073, "learning_rate": 5.541961950531707e-06, "loss": 0.3147, "step": 3428 }, { "epoch": 0.6578575025779995, "grad_norm": 0.9391318744596869, "learning_rate": 5.536399733633413e-06, "loss": 0.7826, "step": 3429 }, { "epoch": 0.6580493537015276, "grad_norm": 0.9622975870996286, "learning_rate": 5.530839240756732e-06, "loss": 0.7999, "step": 3430 }, { "epoch": 0.6582412048250558, "grad_norm": 0.6669086228270559, "learning_rate": 5.5252804740493485e-06, "loss": 0.325, "step": 3431 }, { "epoch": 0.6584330559485839, "grad_norm": 0.8772074012952105, "learning_rate": 5.519723435658289e-06, "loss": 0.7519, "step": 3432 }, { "epoch": 0.6586249070721121, "grad_norm": 0.8308780216234058, "learning_rate": 5.514168127729897e-06, "loss": 0.7652, "step": 3433 }, { "epoch": 0.6588167581956402, "grad_norm": 1.053005973434276, "learning_rate": 5.508614552409859e-06, "loss": 0.6699, "step": 3434 }, { "epoch": 0.6590086093191683, "grad_norm": 0.8548017145846767, "learning_rate": 5.503062711843189e-06, "loss": 0.8008, "step": 3435 }, { "epoch": 0.6592004604426964, "grad_norm": 1.1428439621818764, "learning_rate": 5.497512608174229e-06, "loss": 0.757, "step": 3436 }, { "epoch": 0.6593923115662246, "grad_norm": 0.9206639748609394, "learning_rate": 5.491964243546652e-06, "loss": 0.758, "step": 3437 }, { "epoch": 0.6595841626897527, "grad_norm": 0.9817669041032157, "learning_rate": 5.4864176201034605e-06, "loss": 0.7827, "step": 3438 }, { "epoch": 0.6597760138132809, "grad_norm": 1.0572444750572603, "learning_rate": 5.480872739986982e-06, "loss": 0.823, "step": 3439 }, { "epoch": 0.659967864936809, "grad_norm": 1.0724078337354503, "learning_rate": 5.475329605338873e-06, "loss": 0.769, "step": 3440 }, { "epoch": 0.6601597160603372, "grad_norm": 0.6580268488132195, "learning_rate": 5.469788218300107e-06, "loss": 0.3353, "step": 3441 }, { "epoch": 0.6603515671838653, "grad_norm": 0.8663383672538837, "learning_rate": 5.464248581011002e-06, "loss": 0.7304, "step": 3442 }, { "epoch": 0.6605434183073935, "grad_norm": 0.951922045785611, "learning_rate": 5.4587106956111845e-06, "loss": 0.733, "step": 3443 }, { "epoch": 0.6607352694309216, "grad_norm": 0.9356834664041968, "learning_rate": 5.453174564239606e-06, "loss": 0.7717, "step": 3444 }, { "epoch": 0.6609271205544498, "grad_norm": 1.0440351407518518, "learning_rate": 5.44764018903454e-06, "loss": 0.748, "step": 3445 }, { "epoch": 0.6611189716779778, "grad_norm": 0.9331926416001137, "learning_rate": 5.442107572133591e-06, "loss": 0.8533, "step": 3446 }, { "epoch": 0.661310822801506, "grad_norm": 0.8330038420160147, "learning_rate": 5.436576715673676e-06, "loss": 0.775, "step": 3447 }, { "epoch": 0.6615026739250341, "grad_norm": 0.903576622792032, "learning_rate": 5.4310476217910305e-06, "loss": 0.8098, "step": 3448 }, { "epoch": 0.6616945250485623, "grad_norm": 0.9751523575347487, "learning_rate": 5.425520292621216e-06, "loss": 0.7389, "step": 3449 }, { "epoch": 0.6618863761720905, "grad_norm": 0.9113225232420114, "learning_rate": 5.419994730299106e-06, "loss": 0.8147, "step": 3450 }, { "epoch": 0.6620782272956186, "grad_norm": 0.9541410270931333, "learning_rate": 5.414470936958898e-06, "loss": 0.7952, "step": 3451 }, { "epoch": 0.6622700784191468, "grad_norm": 0.9003459289012171, "learning_rate": 5.408948914734098e-06, "loss": 0.6911, "step": 3452 }, { "epoch": 0.6624619295426749, "grad_norm": 0.9355529474242211, "learning_rate": 5.403428665757537e-06, "loss": 0.768, "step": 3453 }, { "epoch": 0.6626537806662031, "grad_norm": 0.6531299187376136, "learning_rate": 5.397910192161354e-06, "loss": 0.2873, "step": 3454 }, { "epoch": 0.6628456317897312, "grad_norm": 0.9969354237397866, "learning_rate": 5.392393496077003e-06, "loss": 0.7566, "step": 3455 }, { "epoch": 0.6630374829132593, "grad_norm": 1.0838289355077075, "learning_rate": 5.38687857963526e-06, "loss": 0.7677, "step": 3456 }, { "epoch": 0.6632293340367874, "grad_norm": 0.9267671738857783, "learning_rate": 5.381365444966205e-06, "loss": 0.8214, "step": 3457 }, { "epoch": 0.6634211851603156, "grad_norm": 0.8542794604884708, "learning_rate": 5.375854094199229e-06, "loss": 0.7445, "step": 3458 }, { "epoch": 0.6636130362838437, "grad_norm": 0.8871186407820897, "learning_rate": 5.370344529463038e-06, "loss": 0.7331, "step": 3459 }, { "epoch": 0.6638048874073719, "grad_norm": 0.9426429260466963, "learning_rate": 5.364836752885645e-06, "loss": 0.7761, "step": 3460 }, { "epoch": 0.6639967385309, "grad_norm": 0.9039131287477105, "learning_rate": 5.359330766594381e-06, "loss": 0.7868, "step": 3461 }, { "epoch": 0.6641885896544282, "grad_norm": 1.003645273019176, "learning_rate": 5.353826572715875e-06, "loss": 0.8058, "step": 3462 }, { "epoch": 0.6643804407779563, "grad_norm": 0.8471726924835484, "learning_rate": 5.348324173376067e-06, "loss": 0.7238, "step": 3463 }, { "epoch": 0.6645722919014845, "grad_norm": 1.0120896173181253, "learning_rate": 5.342823570700206e-06, "loss": 0.7916, "step": 3464 }, { "epoch": 0.6647641430250126, "grad_norm": 0.6529941537772453, "learning_rate": 5.337324766812845e-06, "loss": 0.2958, "step": 3465 }, { "epoch": 0.6649559941485408, "grad_norm": 0.915517922177806, "learning_rate": 5.331827763837842e-06, "loss": 0.8333, "step": 3466 }, { "epoch": 0.6651478452720688, "grad_norm": 0.8785563677713996, "learning_rate": 5.326332563898361e-06, "loss": 0.7568, "step": 3467 }, { "epoch": 0.665339696395597, "grad_norm": 0.9718021842653344, "learning_rate": 5.3208391691168686e-06, "loss": 0.7652, "step": 3468 }, { "epoch": 0.6655315475191251, "grad_norm": 0.9433577680877988, "learning_rate": 5.315347581615133e-06, "loss": 0.7652, "step": 3469 }, { "epoch": 0.6657233986426533, "grad_norm": 0.9908932245909152, "learning_rate": 5.3098578035142245e-06, "loss": 0.7661, "step": 3470 }, { "epoch": 0.6659152497661814, "grad_norm": 0.8840603974026723, "learning_rate": 5.304369836934523e-06, "loss": 0.7765, "step": 3471 }, { "epoch": 0.6661071008897096, "grad_norm": 0.9382585249946159, "learning_rate": 5.298883683995697e-06, "loss": 0.7418, "step": 3472 }, { "epoch": 0.6662989520132377, "grad_norm": 0.6751527644498634, "learning_rate": 5.293399346816717e-06, "loss": 0.3249, "step": 3473 }, { "epoch": 0.6664908031367659, "grad_norm": 0.8574546948689119, "learning_rate": 5.2879168275158574e-06, "loss": 0.7471, "step": 3474 }, { "epoch": 0.666682654260294, "grad_norm": 1.1082308169075397, "learning_rate": 5.282436128210682e-06, "loss": 0.7103, "step": 3475 }, { "epoch": 0.6668745053838222, "grad_norm": 0.844616675568229, "learning_rate": 5.276957251018065e-06, "loss": 0.798, "step": 3476 }, { "epoch": 0.6670663565073502, "grad_norm": 1.000070463271283, "learning_rate": 5.271480198054164e-06, "loss": 0.754, "step": 3477 }, { "epoch": 0.6672582076308784, "grad_norm": 1.1051590984008859, "learning_rate": 5.2660049714344385e-06, "loss": 0.8325, "step": 3478 }, { "epoch": 0.6674500587544065, "grad_norm": 0.9158019052931307, "learning_rate": 5.260531573273644e-06, "loss": 0.6845, "step": 3479 }, { "epoch": 0.6676419098779347, "grad_norm": 0.9150760825894125, "learning_rate": 5.2550600056858125e-06, "loss": 0.7918, "step": 3480 }, { "epoch": 0.6678337610014629, "grad_norm": 0.8991427584632209, "learning_rate": 5.249590270784297e-06, "loss": 0.7569, "step": 3481 }, { "epoch": 0.668025612124991, "grad_norm": 0.9324625484825427, "learning_rate": 5.244122370681726e-06, "loss": 0.7531, "step": 3482 }, { "epoch": 0.6682174632485192, "grad_norm": 0.8498876504717134, "learning_rate": 5.23865630749002e-06, "loss": 0.6861, "step": 3483 }, { "epoch": 0.6684093143720473, "grad_norm": 0.9455418789991995, "learning_rate": 5.233192083320389e-06, "loss": 0.6869, "step": 3484 }, { "epoch": 0.6686011654955755, "grad_norm": 0.9373653522185497, "learning_rate": 5.2277297002833415e-06, "loss": 0.6727, "step": 3485 }, { "epoch": 0.6687930166191036, "grad_norm": 0.8599123423284492, "learning_rate": 5.2222691604886684e-06, "loss": 0.775, "step": 3486 }, { "epoch": 0.6689848677426318, "grad_norm": 0.950564425403336, "learning_rate": 5.216810466045448e-06, "loss": 0.7891, "step": 3487 }, { "epoch": 0.6691767188661598, "grad_norm": 1.0940280463493592, "learning_rate": 5.211353619062047e-06, "loss": 0.8727, "step": 3488 }, { "epoch": 0.669368569989688, "grad_norm": 1.0198865060305402, "learning_rate": 5.205898621646115e-06, "loss": 0.7581, "step": 3489 }, { "epoch": 0.6695604211132161, "grad_norm": 1.1050556834101584, "learning_rate": 5.200445475904598e-06, "loss": 0.7301, "step": 3490 }, { "epoch": 0.6697522722367443, "grad_norm": 1.2615175104586827, "learning_rate": 5.1949941839437175e-06, "loss": 0.7122, "step": 3491 }, { "epoch": 0.6699441233602724, "grad_norm": 0.8818490609638928, "learning_rate": 5.189544747868981e-06, "loss": 0.8591, "step": 3492 }, { "epoch": 0.6701359744838006, "grad_norm": 0.8359984979398913, "learning_rate": 5.184097169785183e-06, "loss": 0.7591, "step": 3493 }, { "epoch": 0.6703278256073287, "grad_norm": 0.9440276430954507, "learning_rate": 5.178651451796384e-06, "loss": 0.7759, "step": 3494 }, { "epoch": 0.6705196767308569, "grad_norm": 0.9053564838344972, "learning_rate": 5.173207596005951e-06, "loss": 0.7798, "step": 3495 }, { "epoch": 0.670711527854385, "grad_norm": 0.9418258271900726, "learning_rate": 5.167765604516519e-06, "loss": 0.797, "step": 3496 }, { "epoch": 0.6709033789779132, "grad_norm": 0.8415777176320329, "learning_rate": 5.162325479429999e-06, "loss": 0.7076, "step": 3497 }, { "epoch": 0.6710952301014412, "grad_norm": 0.9503436322188151, "learning_rate": 5.156887222847589e-06, "loss": 0.7422, "step": 3498 }, { "epoch": 0.6712870812249694, "grad_norm": 1.0281519293920796, "learning_rate": 5.151450836869756e-06, "loss": 0.7625, "step": 3499 }, { "epoch": 0.6714789323484975, "grad_norm": 1.0458252839554756, "learning_rate": 5.146016323596259e-06, "loss": 0.7409, "step": 3500 }, { "epoch": 0.6716707834720257, "grad_norm": 0.9698369226697802, "learning_rate": 5.140583685126122e-06, "loss": 0.7026, "step": 3501 }, { "epoch": 0.6718626345955538, "grad_norm": 0.8381027391333904, "learning_rate": 5.135152923557647e-06, "loss": 0.7687, "step": 3502 }, { "epoch": 0.672054485719082, "grad_norm": 0.9507757362471732, "learning_rate": 5.129724040988413e-06, "loss": 0.7863, "step": 3503 }, { "epoch": 0.6722463368426101, "grad_norm": 1.0684106109456348, "learning_rate": 5.124297039515267e-06, "loss": 0.7668, "step": 3504 }, { "epoch": 0.6724381879661383, "grad_norm": 0.6757213664641767, "learning_rate": 5.1188719212343456e-06, "loss": 0.2993, "step": 3505 }, { "epoch": 0.6726300390896665, "grad_norm": 0.6750836009469288, "learning_rate": 5.113448688241045e-06, "loss": 0.3262, "step": 3506 }, { "epoch": 0.6728218902131946, "grad_norm": 0.8773310433707446, "learning_rate": 5.108027342630027e-06, "loss": 0.7117, "step": 3507 }, { "epoch": 0.6730137413367226, "grad_norm": 0.9693395472188857, "learning_rate": 5.102607886495239e-06, "loss": 0.7916, "step": 3508 }, { "epoch": 0.6732055924602508, "grad_norm": 0.9100903653227824, "learning_rate": 5.097190321929887e-06, "loss": 0.7555, "step": 3509 }, { "epoch": 0.673397443583779, "grad_norm": 0.9321068131551233, "learning_rate": 5.09177465102646e-06, "loss": 0.8031, "step": 3510 }, { "epoch": 0.6735892947073071, "grad_norm": 0.9058070354367402, "learning_rate": 5.086360875876704e-06, "loss": 0.7166, "step": 3511 }, { "epoch": 0.6737811458308353, "grad_norm": 0.8338439340734162, "learning_rate": 5.080948998571638e-06, "loss": 0.7565, "step": 3512 }, { "epoch": 0.6739729969543634, "grad_norm": 0.998726727155288, "learning_rate": 5.075539021201543e-06, "loss": 0.8328, "step": 3513 }, { "epoch": 0.6741648480778916, "grad_norm": 0.9622830897427179, "learning_rate": 5.0701309458559665e-06, "loss": 0.7597, "step": 3514 }, { "epoch": 0.6743566992014197, "grad_norm": 1.0382261925909855, "learning_rate": 5.0647247746237325e-06, "loss": 0.75, "step": 3515 }, { "epoch": 0.6745485503249479, "grad_norm": 1.1029934099971452, "learning_rate": 5.05932050959292e-06, "loss": 0.7151, "step": 3516 }, { "epoch": 0.674740401448476, "grad_norm": 1.3063389616027996, "learning_rate": 5.053918152850868e-06, "loss": 0.7194, "step": 3517 }, { "epoch": 0.6749322525720042, "grad_norm": 0.8649768606142115, "learning_rate": 5.048517706484188e-06, "loss": 0.7785, "step": 3518 }, { "epoch": 0.6751241036955322, "grad_norm": 0.8670324948006858, "learning_rate": 5.043119172578747e-06, "loss": 0.7861, "step": 3519 }, { "epoch": 0.6753159548190604, "grad_norm": 1.0873686042215756, "learning_rate": 5.037722553219675e-06, "loss": 0.7265, "step": 3520 }, { "epoch": 0.6755078059425885, "grad_norm": 0.9519955179384274, "learning_rate": 5.032327850491366e-06, "loss": 0.787, "step": 3521 }, { "epoch": 0.6756996570661167, "grad_norm": 0.7043015581975327, "learning_rate": 5.026935066477467e-06, "loss": 0.3112, "step": 3522 }, { "epoch": 0.6758915081896448, "grad_norm": 0.8666987589214176, "learning_rate": 5.021544203260885e-06, "loss": 0.8088, "step": 3523 }, { "epoch": 0.676083359313173, "grad_norm": 0.9018140040802359, "learning_rate": 5.016155262923798e-06, "loss": 0.7544, "step": 3524 }, { "epoch": 0.6762752104367011, "grad_norm": 0.8747957903843298, "learning_rate": 5.010768247547624e-06, "loss": 0.7648, "step": 3525 }, { "epoch": 0.6764670615602293, "grad_norm": 0.872106390643657, "learning_rate": 5.0053831592130465e-06, "loss": 0.7403, "step": 3526 }, { "epoch": 0.6766589126837574, "grad_norm": 0.9641881497311378, "learning_rate": 5.000000000000003e-06, "loss": 0.7247, "step": 3527 }, { "epoch": 0.6768507638072856, "grad_norm": 0.9331756100538198, "learning_rate": 4.994618771987679e-06, "loss": 0.7195, "step": 3528 }, { "epoch": 0.6770426149308136, "grad_norm": 0.8495831027564065, "learning_rate": 4.989239477254533e-06, "loss": 0.7172, "step": 3529 }, { "epoch": 0.6772344660543418, "grad_norm": 0.8823200676666972, "learning_rate": 4.983862117878257e-06, "loss": 0.7604, "step": 3530 }, { "epoch": 0.6774263171778699, "grad_norm": 0.8389844925633922, "learning_rate": 4.978486695935805e-06, "loss": 0.7773, "step": 3531 }, { "epoch": 0.6776181683013981, "grad_norm": 1.1077503464518246, "learning_rate": 4.973113213503379e-06, "loss": 0.79, "step": 3532 }, { "epoch": 0.6778100194249262, "grad_norm": 0.8197046296957559, "learning_rate": 4.9677416726564356e-06, "loss": 0.764, "step": 3533 }, { "epoch": 0.6780018705484544, "grad_norm": 0.8743400937965933, "learning_rate": 4.962372075469678e-06, "loss": 0.7426, "step": 3534 }, { "epoch": 0.6781937216719826, "grad_norm": 0.9150820693399923, "learning_rate": 4.957004424017061e-06, "loss": 0.8123, "step": 3535 }, { "epoch": 0.6783855727955107, "grad_norm": 0.9243899591004078, "learning_rate": 4.951638720371787e-06, "loss": 0.7622, "step": 3536 }, { "epoch": 0.6785774239190389, "grad_norm": 0.875911228722559, "learning_rate": 4.946274966606307e-06, "loss": 0.7745, "step": 3537 }, { "epoch": 0.678769275042567, "grad_norm": 1.037588537369569, "learning_rate": 4.940913164792312e-06, "loss": 0.8087, "step": 3538 }, { "epoch": 0.6789611261660952, "grad_norm": 1.0177748179961277, "learning_rate": 4.935553317000756e-06, "loss": 0.7366, "step": 3539 }, { "epoch": 0.6791529772896232, "grad_norm": 0.9894051516160625, "learning_rate": 4.93019542530182e-06, "loss": 0.7437, "step": 3540 }, { "epoch": 0.6793448284131514, "grad_norm": 0.9226852102823431, "learning_rate": 4.9248394917649386e-06, "loss": 0.8101, "step": 3541 }, { "epoch": 0.6795366795366795, "grad_norm": 1.1378337060131478, "learning_rate": 4.919485518458789e-06, "loss": 0.7658, "step": 3542 }, { "epoch": 0.6797285306602077, "grad_norm": 0.8094167532287037, "learning_rate": 4.914133507451285e-06, "loss": 0.7533, "step": 3543 }, { "epoch": 0.6799203817837358, "grad_norm": 0.9989649120569173, "learning_rate": 4.9087834608095975e-06, "loss": 0.7891, "step": 3544 }, { "epoch": 0.680112232907264, "grad_norm": 0.8496087351416721, "learning_rate": 4.903435380600126e-06, "loss": 0.7358, "step": 3545 }, { "epoch": 0.6803040840307921, "grad_norm": 0.9568196146104323, "learning_rate": 4.898089268888511e-06, "loss": 0.7605, "step": 3546 }, { "epoch": 0.6804959351543203, "grad_norm": 1.0096907620451134, "learning_rate": 4.8927451277396365e-06, "loss": 0.7924, "step": 3547 }, { "epoch": 0.6806877862778484, "grad_norm": 0.8624769770897327, "learning_rate": 4.887402959217625e-06, "loss": 0.8167, "step": 3548 }, { "epoch": 0.6808796374013766, "grad_norm": 0.9828310744729651, "learning_rate": 4.882062765385838e-06, "loss": 0.7701, "step": 3549 }, { "epoch": 0.6810714885249046, "grad_norm": 0.8549486454853882, "learning_rate": 4.876724548306869e-06, "loss": 0.7518, "step": 3550 }, { "epoch": 0.6812633396484328, "grad_norm": 0.9020238841925121, "learning_rate": 4.871388310042556e-06, "loss": 0.7364, "step": 3551 }, { "epoch": 0.6814551907719609, "grad_norm": 0.9827495829468804, "learning_rate": 4.866054052653963e-06, "loss": 0.7683, "step": 3552 }, { "epoch": 0.6816470418954891, "grad_norm": 0.8921678606105945, "learning_rate": 4.860721778201397e-06, "loss": 0.8178, "step": 3553 }, { "epoch": 0.6818388930190172, "grad_norm": 0.8917288793823989, "learning_rate": 4.8553914887443986e-06, "loss": 0.7723, "step": 3554 }, { "epoch": 0.6820307441425454, "grad_norm": 0.6539500889687951, "learning_rate": 4.850063186341739e-06, "loss": 0.3119, "step": 3555 }, { "epoch": 0.6822225952660735, "grad_norm": 0.9076649267072722, "learning_rate": 4.844736873051422e-06, "loss": 0.7625, "step": 3556 }, { "epoch": 0.6824144463896017, "grad_norm": 0.8532217610108516, "learning_rate": 4.839412550930682e-06, "loss": 0.7994, "step": 3557 }, { "epoch": 0.6826062975131298, "grad_norm": 0.8324397352559099, "learning_rate": 4.834090222035984e-06, "loss": 0.8163, "step": 3558 }, { "epoch": 0.682798148636658, "grad_norm": 1.0122939544756748, "learning_rate": 4.828769888423031e-06, "loss": 0.7827, "step": 3559 }, { "epoch": 0.6829899997601862, "grad_norm": 0.9383723997135129, "learning_rate": 4.8234515521467485e-06, "loss": 0.7441, "step": 3560 }, { "epoch": 0.6831818508837142, "grad_norm": 1.0670673782154094, "learning_rate": 4.8181352152612885e-06, "loss": 0.7127, "step": 3561 }, { "epoch": 0.6833737020072423, "grad_norm": 0.9075818254723268, "learning_rate": 4.812820879820034e-06, "loss": 0.7797, "step": 3562 }, { "epoch": 0.6835655531307705, "grad_norm": 0.9822971699485354, "learning_rate": 4.8075085478755965e-06, "loss": 0.8183, "step": 3563 }, { "epoch": 0.6837574042542987, "grad_norm": 0.8421764972949722, "learning_rate": 4.80219822147981e-06, "loss": 0.6923, "step": 3564 }, { "epoch": 0.6839492553778268, "grad_norm": 0.8708892343693507, "learning_rate": 4.7968899026837375e-06, "loss": 0.6892, "step": 3565 }, { "epoch": 0.684141106501355, "grad_norm": 0.7763926281621143, "learning_rate": 4.791583593537663e-06, "loss": 0.6538, "step": 3566 }, { "epoch": 0.6843329576248831, "grad_norm": 0.8891682774261009, "learning_rate": 4.786279296091094e-06, "loss": 0.7823, "step": 3567 }, { "epoch": 0.6845248087484113, "grad_norm": 1.039319053380695, "learning_rate": 4.780977012392769e-06, "loss": 0.8158, "step": 3568 }, { "epoch": 0.6847166598719394, "grad_norm": 0.9317188209271341, "learning_rate": 4.7756767444906415e-06, "loss": 0.7306, "step": 3569 }, { "epoch": 0.6849085109954676, "grad_norm": 0.8471262673305451, "learning_rate": 4.7703784944318845e-06, "loss": 0.7355, "step": 3570 }, { "epoch": 0.6851003621189956, "grad_norm": 0.6651752137024665, "learning_rate": 4.765082264262898e-06, "loss": 0.3121, "step": 3571 }, { "epoch": 0.6852922132425238, "grad_norm": 0.8427961243361044, "learning_rate": 4.759788056029292e-06, "loss": 0.7243, "step": 3572 }, { "epoch": 0.6854840643660519, "grad_norm": 0.9146626354953333, "learning_rate": 4.754495871775913e-06, "loss": 0.7378, "step": 3573 }, { "epoch": 0.6856759154895801, "grad_norm": 0.951251270039168, "learning_rate": 4.749205713546808e-06, "loss": 0.7532, "step": 3574 }, { "epoch": 0.6858677666131082, "grad_norm": 0.9301100410534304, "learning_rate": 4.7439175833852504e-06, "loss": 0.763, "step": 3575 }, { "epoch": 0.6860596177366364, "grad_norm": 1.1309175628759536, "learning_rate": 4.738631483333729e-06, "loss": 0.7306, "step": 3576 }, { "epoch": 0.6862514688601645, "grad_norm": 0.908478364626317, "learning_rate": 4.733347415433946e-06, "loss": 0.7893, "step": 3577 }, { "epoch": 0.6864433199836927, "grad_norm": 0.95591629792499, "learning_rate": 4.7280653817268205e-06, "loss": 0.7711, "step": 3578 }, { "epoch": 0.6866351711072208, "grad_norm": 0.9843435080485766, "learning_rate": 4.722785384252488e-06, "loss": 0.703, "step": 3579 }, { "epoch": 0.686827022230749, "grad_norm": 0.8975290445402936, "learning_rate": 4.717507425050294e-06, "loss": 0.7586, "step": 3580 }, { "epoch": 0.687018873354277, "grad_norm": 0.8848888122742087, "learning_rate": 4.7122315061587985e-06, "loss": 0.6534, "step": 3581 }, { "epoch": 0.6872107244778052, "grad_norm": 1.0093591552489312, "learning_rate": 4.7069576296157704e-06, "loss": 0.6647, "step": 3582 }, { "epoch": 0.6874025756013333, "grad_norm": 1.0667174129409205, "learning_rate": 4.701685797458198e-06, "loss": 0.696, "step": 3583 }, { "epoch": 0.6875944267248615, "grad_norm": 1.0135165018587706, "learning_rate": 4.696416011722272e-06, "loss": 0.7563, "step": 3584 }, { "epoch": 0.6877862778483896, "grad_norm": 0.8582608719898654, "learning_rate": 4.691148274443396e-06, "loss": 0.7697, "step": 3585 }, { "epoch": 0.6879781289719178, "grad_norm": 0.6436444421600652, "learning_rate": 4.685882587656179e-06, "loss": 0.3206, "step": 3586 }, { "epoch": 0.6881699800954459, "grad_norm": 0.8785539243819784, "learning_rate": 4.6806189533944405e-06, "loss": 0.7664, "step": 3587 }, { "epoch": 0.6883618312189741, "grad_norm": 1.0880464278468165, "learning_rate": 4.675357373691213e-06, "loss": 0.7628, "step": 3588 }, { "epoch": 0.6885536823425022, "grad_norm": 0.9342730422431852, "learning_rate": 4.670097850578724e-06, "loss": 0.7416, "step": 3589 }, { "epoch": 0.6887455334660304, "grad_norm": 0.8542739493390168, "learning_rate": 4.664840386088416e-06, "loss": 0.7625, "step": 3590 }, { "epoch": 0.6889373845895586, "grad_norm": 0.9416383079348264, "learning_rate": 4.659584982250932e-06, "loss": 0.8097, "step": 3591 }, { "epoch": 0.6891292357130866, "grad_norm": 1.0251765575764245, "learning_rate": 4.654331641096118e-06, "loss": 0.7902, "step": 3592 }, { "epoch": 0.6893210868366147, "grad_norm": 0.6343568191457577, "learning_rate": 4.649080364653027e-06, "loss": 0.2958, "step": 3593 }, { "epoch": 0.6895129379601429, "grad_norm": 0.6696946571362513, "learning_rate": 4.6438311549499115e-06, "loss": 0.296, "step": 3594 }, { "epoch": 0.689704789083671, "grad_norm": 0.8913483277171724, "learning_rate": 4.638584014014229e-06, "loss": 0.8578, "step": 3595 }, { "epoch": 0.6898966402071992, "grad_norm": 1.04317991649062, "learning_rate": 4.633338943872634e-06, "loss": 0.839, "step": 3596 }, { "epoch": 0.6900884913307274, "grad_norm": 0.9383331330932102, "learning_rate": 4.6280959465509815e-06, "loss": 0.7317, "step": 3597 }, { "epoch": 0.6902803424542555, "grad_norm": 0.8947350017612112, "learning_rate": 4.622855024074332e-06, "loss": 0.7853, "step": 3598 }, { "epoch": 0.6904721935777837, "grad_norm": 0.9707405714699144, "learning_rate": 4.617616178466939e-06, "loss": 0.6864, "step": 3599 }, { "epoch": 0.6906640447013118, "grad_norm": 0.9036600805777905, "learning_rate": 4.612379411752255e-06, "loss": 0.764, "step": 3600 }, { "epoch": 0.69085589582484, "grad_norm": 0.8121470937385916, "learning_rate": 4.607144725952924e-06, "loss": 0.7512, "step": 3601 }, { "epoch": 0.691047746948368, "grad_norm": 0.9308510066286809, "learning_rate": 4.601912123090799e-06, "loss": 0.7544, "step": 3602 }, { "epoch": 0.6912395980718962, "grad_norm": 0.8909182964523917, "learning_rate": 4.596681605186921e-06, "loss": 0.695, "step": 3603 }, { "epoch": 0.6914314491954243, "grad_norm": 0.8456855778271042, "learning_rate": 4.591453174261522e-06, "loss": 0.7399, "step": 3604 }, { "epoch": 0.6916233003189525, "grad_norm": 1.056327878200812, "learning_rate": 4.586226832334035e-06, "loss": 0.7469, "step": 3605 }, { "epoch": 0.6918151514424806, "grad_norm": 0.9583962141746702, "learning_rate": 4.581002581423081e-06, "loss": 0.7593, "step": 3606 }, { "epoch": 0.6920070025660088, "grad_norm": 0.6953341069931649, "learning_rate": 4.575780423546476e-06, "loss": 0.3062, "step": 3607 }, { "epoch": 0.6921988536895369, "grad_norm": 0.9643407130465445, "learning_rate": 4.5705603607212275e-06, "loss": 0.7989, "step": 3608 }, { "epoch": 0.6923907048130651, "grad_norm": 0.790533923428516, "learning_rate": 4.565342394963533e-06, "loss": 0.3305, "step": 3609 }, { "epoch": 0.6925825559365932, "grad_norm": 1.0390528463149988, "learning_rate": 4.560126528288781e-06, "loss": 0.8183, "step": 3610 }, { "epoch": 0.6927744070601214, "grad_norm": 0.9374292993224312, "learning_rate": 4.554912762711544e-06, "loss": 0.8414, "step": 3611 }, { "epoch": 0.6929662581836495, "grad_norm": 0.6657865991988027, "learning_rate": 4.549701100245595e-06, "loss": 0.3023, "step": 3612 }, { "epoch": 0.6931581093071776, "grad_norm": 1.1291784891758423, "learning_rate": 4.5444915429038836e-06, "loss": 0.7351, "step": 3613 }, { "epoch": 0.6933499604307057, "grad_norm": 0.9273398500973764, "learning_rate": 4.539284092698551e-06, "loss": 0.826, "step": 3614 }, { "epoch": 0.6935418115542339, "grad_norm": 0.982480535177728, "learning_rate": 4.534078751640921e-06, "loss": 0.7442, "step": 3615 }, { "epoch": 0.693733662677762, "grad_norm": 0.9749094423148262, "learning_rate": 4.528875521741505e-06, "loss": 0.8179, "step": 3616 }, { "epoch": 0.6939255138012902, "grad_norm": 0.8658192287085942, "learning_rate": 4.523674405010005e-06, "loss": 0.6702, "step": 3617 }, { "epoch": 0.6941173649248183, "grad_norm": 0.9806816984879537, "learning_rate": 4.518475403455297e-06, "loss": 0.7703, "step": 3618 }, { "epoch": 0.6943092160483465, "grad_norm": 0.8834056503528833, "learning_rate": 4.513278519085444e-06, "loss": 0.722, "step": 3619 }, { "epoch": 0.6945010671718747, "grad_norm": 0.8160933066520792, "learning_rate": 4.5080837539076914e-06, "loss": 0.6363, "step": 3620 }, { "epoch": 0.6946929182954028, "grad_norm": 0.8216570992530314, "learning_rate": 4.502891109928468e-06, "loss": 0.7766, "step": 3621 }, { "epoch": 0.694884769418931, "grad_norm": 1.0533287978537011, "learning_rate": 4.497700589153379e-06, "loss": 0.7712, "step": 3622 }, { "epoch": 0.695076620542459, "grad_norm": 0.9849461752386611, "learning_rate": 4.492512193587212e-06, "loss": 0.8014, "step": 3623 }, { "epoch": 0.6952684716659872, "grad_norm": 0.9014591324853203, "learning_rate": 4.487325925233935e-06, "loss": 0.7769, "step": 3624 }, { "epoch": 0.6954603227895153, "grad_norm": 0.9893665176320214, "learning_rate": 4.4821417860966935e-06, "loss": 0.7342, "step": 3625 }, { "epoch": 0.6956521739130435, "grad_norm": 1.02914702977716, "learning_rate": 4.476959778177804e-06, "loss": 0.7646, "step": 3626 }, { "epoch": 0.6958440250365716, "grad_norm": 0.6520171886043403, "learning_rate": 4.471779903478776e-06, "loss": 0.3223, "step": 3627 }, { "epoch": 0.6960358761600998, "grad_norm": 0.8936134937513163, "learning_rate": 4.466602164000279e-06, "loss": 0.7344, "step": 3628 }, { "epoch": 0.6962277272836279, "grad_norm": 0.965461829012544, "learning_rate": 4.461426561742166e-06, "loss": 0.8438, "step": 3629 }, { "epoch": 0.6964195784071561, "grad_norm": 1.0204837086217238, "learning_rate": 4.45625309870346e-06, "loss": 0.7327, "step": 3630 }, { "epoch": 0.6966114295306842, "grad_norm": 1.0150534495044141, "learning_rate": 4.451081776882359e-06, "loss": 0.7835, "step": 3631 }, { "epoch": 0.6968032806542124, "grad_norm": 1.014158165213877, "learning_rate": 4.4459125982762406e-06, "loss": 0.7666, "step": 3632 }, { "epoch": 0.6969951317777405, "grad_norm": 0.9984697089089023, "learning_rate": 4.440745564881646e-06, "loss": 0.7612, "step": 3633 }, { "epoch": 0.6971869829012686, "grad_norm": 0.9416834247038454, "learning_rate": 4.4355806786942925e-06, "loss": 0.7416, "step": 3634 }, { "epoch": 0.6973788340247967, "grad_norm": 0.9850998984238569, "learning_rate": 4.430417941709068e-06, "loss": 0.8236, "step": 3635 }, { "epoch": 0.6975706851483249, "grad_norm": 0.823720037940066, "learning_rate": 4.425257355920018e-06, "loss": 0.7815, "step": 3636 }, { "epoch": 0.697762536271853, "grad_norm": 0.9545977769138263, "learning_rate": 4.420098923320378e-06, "loss": 0.8052, "step": 3637 }, { "epoch": 0.6979543873953812, "grad_norm": 0.914597515976046, "learning_rate": 4.414942645902541e-06, "loss": 0.7148, "step": 3638 }, { "epoch": 0.6981462385189093, "grad_norm": 0.9463255085752552, "learning_rate": 4.409788525658068e-06, "loss": 0.7881, "step": 3639 }, { "epoch": 0.6983380896424375, "grad_norm": 0.8957540998208628, "learning_rate": 4.40463656457768e-06, "loss": 0.7747, "step": 3640 }, { "epoch": 0.6985299407659656, "grad_norm": 0.8822776966332944, "learning_rate": 4.399486764651282e-06, "loss": 0.7798, "step": 3641 }, { "epoch": 0.6987217918894938, "grad_norm": 1.0213489484080842, "learning_rate": 4.394339127867927e-06, "loss": 0.7745, "step": 3642 }, { "epoch": 0.698913643013022, "grad_norm": 0.9264623152046566, "learning_rate": 4.389193656215842e-06, "loss": 0.7359, "step": 3643 }, { "epoch": 0.69910549413655, "grad_norm": 0.8410572913843326, "learning_rate": 4.384050351682413e-06, "loss": 0.782, "step": 3644 }, { "epoch": 0.6992973452600781, "grad_norm": 0.8319349120342022, "learning_rate": 4.378909216254188e-06, "loss": 0.7582, "step": 3645 }, { "epoch": 0.6994891963836063, "grad_norm": 0.6667503772871665, "learning_rate": 4.373770251916885e-06, "loss": 0.3045, "step": 3646 }, { "epoch": 0.6996810475071344, "grad_norm": 0.8492823997727738, "learning_rate": 4.368633460655377e-06, "loss": 0.7773, "step": 3647 }, { "epoch": 0.6998728986306626, "grad_norm": 0.9442219414626635, "learning_rate": 4.363498844453702e-06, "loss": 0.736, "step": 3648 }, { "epoch": 0.7000647497541908, "grad_norm": 0.9781884585537369, "learning_rate": 4.358366405295048e-06, "loss": 0.8056, "step": 3649 }, { "epoch": 0.7002566008777189, "grad_norm": 0.8822987315030311, "learning_rate": 4.3532361451617674e-06, "loss": 0.7147, "step": 3650 }, { "epoch": 0.7004484520012471, "grad_norm": 0.9886744316735717, "learning_rate": 4.348108066035382e-06, "loss": 0.7719, "step": 3651 }, { "epoch": 0.7006403031247752, "grad_norm": 0.7994469734423002, "learning_rate": 4.342982169896555e-06, "loss": 0.6989, "step": 3652 }, { "epoch": 0.7008321542483034, "grad_norm": 1.086793212091615, "learning_rate": 4.337858458725118e-06, "loss": 0.7697, "step": 3653 }, { "epoch": 0.7010240053718314, "grad_norm": 1.1327433214702725, "learning_rate": 4.3327369345000505e-06, "loss": 0.7341, "step": 3654 }, { "epoch": 0.7012158564953596, "grad_norm": 1.083985203514251, "learning_rate": 4.327617599199489e-06, "loss": 0.7661, "step": 3655 }, { "epoch": 0.7014077076188877, "grad_norm": 1.023717410724765, "learning_rate": 4.322500454800731e-06, "loss": 0.748, "step": 3656 }, { "epoch": 0.7015995587424159, "grad_norm": 0.9552267135312109, "learning_rate": 4.317385503280221e-06, "loss": 0.7217, "step": 3657 }, { "epoch": 0.701791409865944, "grad_norm": 0.9063822824599455, "learning_rate": 4.3122727466135596e-06, "loss": 0.7816, "step": 3658 }, { "epoch": 0.7019832609894722, "grad_norm": 0.8938836201320284, "learning_rate": 4.3071621867754975e-06, "loss": 0.7189, "step": 3659 }, { "epoch": 0.7021751121130003, "grad_norm": 0.8841907485155338, "learning_rate": 4.3020538257399345e-06, "loss": 0.826, "step": 3660 }, { "epoch": 0.7023669632365285, "grad_norm": 0.8823565060640757, "learning_rate": 4.296947665479937e-06, "loss": 0.7692, "step": 3661 }, { "epoch": 0.7025588143600566, "grad_norm": 0.9098405302024337, "learning_rate": 4.291843707967696e-06, "loss": 0.7345, "step": 3662 }, { "epoch": 0.7027506654835848, "grad_norm": 0.9245277560312012, "learning_rate": 4.286741955174569e-06, "loss": 0.8096, "step": 3663 }, { "epoch": 0.7029425166071129, "grad_norm": 0.914710098097775, "learning_rate": 4.281642409071058e-06, "loss": 0.7571, "step": 3664 }, { "epoch": 0.703134367730641, "grad_norm": 0.9320795786675896, "learning_rate": 4.2765450716268105e-06, "loss": 0.7979, "step": 3665 }, { "epoch": 0.7033262188541691, "grad_norm": 0.8326483811341342, "learning_rate": 4.271449944810627e-06, "loss": 0.6913, "step": 3666 }, { "epoch": 0.7035180699776973, "grad_norm": 0.8274799607471767, "learning_rate": 4.266357030590449e-06, "loss": 0.7462, "step": 3667 }, { "epoch": 0.7037099211012254, "grad_norm": 0.8400679829651895, "learning_rate": 4.261266330933363e-06, "loss": 0.7685, "step": 3668 }, { "epoch": 0.7039017722247536, "grad_norm": 0.9341836640087563, "learning_rate": 4.256177847805603e-06, "loss": 0.74, "step": 3669 }, { "epoch": 0.7040936233482817, "grad_norm": 0.922148631903664, "learning_rate": 4.251091583172538e-06, "loss": 0.7507, "step": 3670 }, { "epoch": 0.7042854744718099, "grad_norm": 0.971801587056995, "learning_rate": 4.2460075389987e-06, "loss": 0.7498, "step": 3671 }, { "epoch": 0.704477325595338, "grad_norm": 0.9917611764850369, "learning_rate": 4.240925717247745e-06, "loss": 0.7381, "step": 3672 }, { "epoch": 0.7046691767188662, "grad_norm": 1.1541600964060486, "learning_rate": 4.235846119882475e-06, "loss": 0.8513, "step": 3673 }, { "epoch": 0.7048610278423943, "grad_norm": 0.9375556237303003, "learning_rate": 4.2307687488648375e-06, "loss": 0.7244, "step": 3674 }, { "epoch": 0.7050528789659224, "grad_norm": 1.0448617656592254, "learning_rate": 4.225693606155915e-06, "loss": 0.7676, "step": 3675 }, { "epoch": 0.7052447300894505, "grad_norm": 0.8070617498931905, "learning_rate": 4.220620693715931e-06, "loss": 0.7423, "step": 3676 }, { "epoch": 0.7054365812129787, "grad_norm": 0.9855187725650326, "learning_rate": 4.2155500135042495e-06, "loss": 0.7905, "step": 3677 }, { "epoch": 0.7056284323365069, "grad_norm": 0.897000046439047, "learning_rate": 4.210481567479371e-06, "loss": 0.7725, "step": 3678 }, { "epoch": 0.705820283460035, "grad_norm": 0.8958849307000014, "learning_rate": 4.205415357598926e-06, "loss": 0.7819, "step": 3679 }, { "epoch": 0.7060121345835632, "grad_norm": 0.9618647536631127, "learning_rate": 4.2003513858197e-06, "loss": 0.8279, "step": 3680 }, { "epoch": 0.7062039857070913, "grad_norm": 0.9470950206309816, "learning_rate": 4.195289654097594e-06, "loss": 0.7365, "step": 3681 }, { "epoch": 0.7063958368306195, "grad_norm": 0.831386892433335, "learning_rate": 4.1902301643876555e-06, "loss": 0.7366, "step": 3682 }, { "epoch": 0.7065876879541476, "grad_norm": 0.6313735622322569, "learning_rate": 4.185172918644061e-06, "loss": 0.3184, "step": 3683 }, { "epoch": 0.7067795390776758, "grad_norm": 0.6490325951726085, "learning_rate": 4.180117918820118e-06, "loss": 0.3154, "step": 3684 }, { "epoch": 0.7069713902012039, "grad_norm": 0.9088591095575764, "learning_rate": 4.175065166868278e-06, "loss": 0.7209, "step": 3685 }, { "epoch": 0.707163241324732, "grad_norm": 0.9762399032125508, "learning_rate": 4.170014664740114e-06, "loss": 0.7332, "step": 3686 }, { "epoch": 0.7073550924482601, "grad_norm": 0.9895068543129089, "learning_rate": 4.164966414386332e-06, "loss": 0.7642, "step": 3687 }, { "epoch": 0.7075469435717883, "grad_norm": 0.8908512574757416, "learning_rate": 4.159920417756768e-06, "loss": 0.7942, "step": 3688 }, { "epoch": 0.7077387946953164, "grad_norm": 0.9093512161441409, "learning_rate": 4.154876676800389e-06, "loss": 0.7608, "step": 3689 }, { "epoch": 0.7079306458188446, "grad_norm": 0.8691805525110476, "learning_rate": 4.14983519346529e-06, "loss": 0.7766, "step": 3690 }, { "epoch": 0.7081224969423727, "grad_norm": 0.900173324470032, "learning_rate": 4.144795969698695e-06, "loss": 0.7767, "step": 3691 }, { "epoch": 0.7083143480659009, "grad_norm": 0.8570492022549616, "learning_rate": 4.139759007446955e-06, "loss": 0.8004, "step": 3692 }, { "epoch": 0.708506199189429, "grad_norm": 0.8702437746585451, "learning_rate": 4.134724308655545e-06, "loss": 0.7419, "step": 3693 }, { "epoch": 0.7086980503129572, "grad_norm": 0.8734937801833625, "learning_rate": 4.129691875269066e-06, "loss": 0.7105, "step": 3694 }, { "epoch": 0.7088899014364853, "grad_norm": 0.8262311385510546, "learning_rate": 4.124661709231252e-06, "loss": 0.7165, "step": 3695 }, { "epoch": 0.7090817525600134, "grad_norm": 0.8571469533241591, "learning_rate": 4.119633812484953e-06, "loss": 0.7238, "step": 3696 }, { "epoch": 0.7092736036835415, "grad_norm": 1.2711246908527967, "learning_rate": 4.114608186972143e-06, "loss": 0.7906, "step": 3697 }, { "epoch": 0.7094654548070697, "grad_norm": 0.9811200263138271, "learning_rate": 4.1095848346339206e-06, "loss": 0.709, "step": 3698 }, { "epoch": 0.7096573059305978, "grad_norm": 0.8956969692489786, "learning_rate": 4.104563757410502e-06, "loss": 0.7545, "step": 3699 }, { "epoch": 0.709849157054126, "grad_norm": 1.0116748763172965, "learning_rate": 4.099544957241237e-06, "loss": 0.8148, "step": 3700 }, { "epoch": 0.7100410081776541, "grad_norm": 0.9525279312647645, "learning_rate": 4.094528436064584e-06, "loss": 0.764, "step": 3701 }, { "epoch": 0.7102328593011823, "grad_norm": 0.9176984946833964, "learning_rate": 4.089514195818125e-06, "loss": 0.8589, "step": 3702 }, { "epoch": 0.7104247104247104, "grad_norm": 1.1599295098684708, "learning_rate": 4.08450223843856e-06, "loss": 0.767, "step": 3703 }, { "epoch": 0.7106165615482386, "grad_norm": 0.8927545762990738, "learning_rate": 4.079492565861709e-06, "loss": 0.796, "step": 3704 }, { "epoch": 0.7108084126717668, "grad_norm": 0.9821160725599131, "learning_rate": 4.074485180022508e-06, "loss": 0.7596, "step": 3705 }, { "epoch": 0.7110002637952948, "grad_norm": 0.8524155160153852, "learning_rate": 4.069480082855012e-06, "loss": 0.8155, "step": 3706 }, { "epoch": 0.711192114918823, "grad_norm": 0.6458594628102662, "learning_rate": 4.064477276292388e-06, "loss": 0.3058, "step": 3707 }, { "epoch": 0.7113839660423511, "grad_norm": 0.8552242498302589, "learning_rate": 4.059476762266922e-06, "loss": 0.7312, "step": 3708 }, { "epoch": 0.7115758171658793, "grad_norm": 0.8640843237714495, "learning_rate": 4.0544785427100095e-06, "loss": 0.683, "step": 3709 }, { "epoch": 0.7117676682894074, "grad_norm": 0.8408195660906866, "learning_rate": 4.049482619552172e-06, "loss": 0.7291, "step": 3710 }, { "epoch": 0.7119595194129356, "grad_norm": 0.9067835224702806, "learning_rate": 4.04448899472303e-06, "loss": 0.8283, "step": 3711 }, { "epoch": 0.7121513705364637, "grad_norm": 0.6101894030295361, "learning_rate": 4.0394976701513235e-06, "loss": 0.3275, "step": 3712 }, { "epoch": 0.7123432216599919, "grad_norm": 0.8310834227277621, "learning_rate": 4.034508647764901e-06, "loss": 0.7805, "step": 3713 }, { "epoch": 0.71253507278352, "grad_norm": 0.8863105289549726, "learning_rate": 4.02952192949072e-06, "loss": 0.7676, "step": 3714 }, { "epoch": 0.7127269239070482, "grad_norm": 0.9624150026031987, "learning_rate": 4.024537517254859e-06, "loss": 0.7766, "step": 3715 }, { "epoch": 0.7129187750305763, "grad_norm": 0.9701029255158314, "learning_rate": 4.019555412982494e-06, "loss": 0.7586, "step": 3716 }, { "epoch": 0.7131106261541044, "grad_norm": 0.9050470738381953, "learning_rate": 4.0145756185979146e-06, "loss": 0.7821, "step": 3717 }, { "epoch": 0.7133024772776325, "grad_norm": 0.9308998849180894, "learning_rate": 4.009598136024517e-06, "loss": 0.7809, "step": 3718 }, { "epoch": 0.7134943284011607, "grad_norm": 0.8759482172333959, "learning_rate": 4.004622967184804e-06, "loss": 0.7415, "step": 3719 }, { "epoch": 0.7136861795246888, "grad_norm": 0.9694530282500858, "learning_rate": 3.999650114000386e-06, "loss": 0.7563, "step": 3720 }, { "epoch": 0.713878030648217, "grad_norm": 0.9019450118527835, "learning_rate": 3.994679578391979e-06, "loss": 0.699, "step": 3721 }, { "epoch": 0.7140698817717451, "grad_norm": 0.9564477368289005, "learning_rate": 3.989711362279403e-06, "loss": 0.8095, "step": 3722 }, { "epoch": 0.7142617328952733, "grad_norm": 1.0360351667462768, "learning_rate": 3.984745467581578e-06, "loss": 0.815, "step": 3723 }, { "epoch": 0.7144535840188014, "grad_norm": 1.196982243378907, "learning_rate": 3.9797818962165406e-06, "loss": 0.7523, "step": 3724 }, { "epoch": 0.7146454351423296, "grad_norm": 1.096786461777933, "learning_rate": 3.974820650101417e-06, "loss": 0.7535, "step": 3725 }, { "epoch": 0.7148372862658577, "grad_norm": 0.9984317806505855, "learning_rate": 3.969861731152438e-06, "loss": 0.7908, "step": 3726 }, { "epoch": 0.7150291373893858, "grad_norm": 1.049901041226615, "learning_rate": 3.96490514128494e-06, "loss": 0.8179, "step": 3727 }, { "epoch": 0.7152209885129139, "grad_norm": 0.8876658543672314, "learning_rate": 3.9599508824133535e-06, "loss": 0.7879, "step": 3728 }, { "epoch": 0.7154128396364421, "grad_norm": 0.8644367094872014, "learning_rate": 3.954998956451217e-06, "loss": 0.7436, "step": 3729 }, { "epoch": 0.7156046907599702, "grad_norm": 0.8982607250647574, "learning_rate": 3.950049365311161e-06, "loss": 0.7569, "step": 3730 }, { "epoch": 0.7157965418834984, "grad_norm": 0.9191958827413497, "learning_rate": 3.945102110904915e-06, "loss": 0.822, "step": 3731 }, { "epoch": 0.7159883930070265, "grad_norm": 1.0663682263669125, "learning_rate": 3.940157195143309e-06, "loss": 0.5813, "step": 3732 }, { "epoch": 0.7161802441305547, "grad_norm": 1.2364994281733546, "learning_rate": 3.935214619936268e-06, "loss": 0.7659, "step": 3733 }, { "epoch": 0.7163720952540829, "grad_norm": 0.9946118466112599, "learning_rate": 3.930274387192811e-06, "loss": 0.7443, "step": 3734 }, { "epoch": 0.716563946377611, "grad_norm": 0.9959294138997873, "learning_rate": 3.925336498821055e-06, "loss": 0.7211, "step": 3735 }, { "epoch": 0.7167557975011392, "grad_norm": 0.8269258884426338, "learning_rate": 3.920400956728211e-06, "loss": 0.7596, "step": 3736 }, { "epoch": 0.7169476486246673, "grad_norm": 0.8995803933075023, "learning_rate": 3.915467762820584e-06, "loss": 0.7743, "step": 3737 }, { "epoch": 0.7171394997481954, "grad_norm": 0.9896891643503076, "learning_rate": 3.910536919003568e-06, "loss": 0.7272, "step": 3738 }, { "epoch": 0.7173313508717235, "grad_norm": 0.6436476413607619, "learning_rate": 3.905608427181659e-06, "loss": 0.2884, "step": 3739 }, { "epoch": 0.7175232019952517, "grad_norm": 0.9450412191652227, "learning_rate": 3.900682289258435e-06, "loss": 0.6828, "step": 3740 }, { "epoch": 0.7177150531187798, "grad_norm": 1.0625890187150953, "learning_rate": 3.895758507136569e-06, "loss": 0.7432, "step": 3741 }, { "epoch": 0.717906904242308, "grad_norm": 1.000927384247874, "learning_rate": 3.890837082717822e-06, "loss": 0.7689, "step": 3742 }, { "epoch": 0.7180987553658361, "grad_norm": 0.919777171301497, "learning_rate": 3.885918017903045e-06, "loss": 0.8051, "step": 3743 }, { "epoch": 0.7182906064893643, "grad_norm": 0.9815835335309118, "learning_rate": 3.881001314592181e-06, "loss": 0.7397, "step": 3744 }, { "epoch": 0.7184824576128924, "grad_norm": 1.014894906477565, "learning_rate": 3.876086974684259e-06, "loss": 0.7471, "step": 3745 }, { "epoch": 0.7186743087364206, "grad_norm": 0.8289175966170562, "learning_rate": 3.87117500007739e-06, "loss": 0.7076, "step": 3746 }, { "epoch": 0.7188661598599487, "grad_norm": 0.934035904248691, "learning_rate": 3.866265392668781e-06, "loss": 0.7572, "step": 3747 }, { "epoch": 0.7190580109834768, "grad_norm": 0.971175851897993, "learning_rate": 3.861358154354715e-06, "loss": 0.7343, "step": 3748 }, { "epoch": 0.7192498621070049, "grad_norm": 0.9633847148069786, "learning_rate": 3.856453287030567e-06, "loss": 0.7083, "step": 3749 }, { "epoch": 0.7194417132305331, "grad_norm": 0.8896954053869909, "learning_rate": 3.851550792590791e-06, "loss": 0.7568, "step": 3750 }, { "epoch": 0.7196335643540612, "grad_norm": 0.8866670233938398, "learning_rate": 3.846650672928931e-06, "loss": 0.8332, "step": 3751 }, { "epoch": 0.7198254154775894, "grad_norm": 0.9834693361909846, "learning_rate": 3.841752929937606e-06, "loss": 0.7694, "step": 3752 }, { "epoch": 0.7200172666011175, "grad_norm": 1.0432163118055127, "learning_rate": 3.836857565508522e-06, "loss": 0.7912, "step": 3753 }, { "epoch": 0.7202091177246457, "grad_norm": 1.0121805418500474, "learning_rate": 3.831964581532467e-06, "loss": 0.5917, "step": 3754 }, { "epoch": 0.7204009688481738, "grad_norm": 0.9412145222444552, "learning_rate": 3.827073979899308e-06, "loss": 0.7826, "step": 3755 }, { "epoch": 0.720592819971702, "grad_norm": 0.9779718096995572, "learning_rate": 3.82218576249799e-06, "loss": 0.7207, "step": 3756 }, { "epoch": 0.7207846710952301, "grad_norm": 0.9360267166885058, "learning_rate": 3.817299931216537e-06, "loss": 0.8036, "step": 3757 }, { "epoch": 0.7209765222187583, "grad_norm": 0.9111922853768183, "learning_rate": 3.812416487942059e-06, "loss": 0.742, "step": 3758 }, { "epoch": 0.7211683733422863, "grad_norm": 0.8636993269570996, "learning_rate": 3.807535434560734e-06, "loss": 0.8004, "step": 3759 }, { "epoch": 0.7213602244658145, "grad_norm": 0.9775306023159727, "learning_rate": 3.8026567729578214e-06, "loss": 0.7988, "step": 3760 }, { "epoch": 0.7215520755893426, "grad_norm": 0.8737478071128022, "learning_rate": 3.7977805050176553e-06, "loss": 0.7829, "step": 3761 }, { "epoch": 0.7217439267128708, "grad_norm": 0.960052602143742, "learning_rate": 3.7929066326236475e-06, "loss": 0.7324, "step": 3762 }, { "epoch": 0.721935777836399, "grad_norm": 0.9411005547706836, "learning_rate": 3.7880351576582818e-06, "loss": 0.7856, "step": 3763 }, { "epoch": 0.7221276289599271, "grad_norm": 0.9104549463685209, "learning_rate": 3.783166082003118e-06, "loss": 0.7178, "step": 3764 }, { "epoch": 0.7223194800834553, "grad_norm": 0.8760651561365256, "learning_rate": 3.778299407538789e-06, "loss": 0.717, "step": 3765 }, { "epoch": 0.7225113312069834, "grad_norm": 0.9254829988641354, "learning_rate": 3.7734351361450007e-06, "loss": 0.7572, "step": 3766 }, { "epoch": 0.7227031823305116, "grad_norm": 0.9526585934452096, "learning_rate": 3.768573269700524e-06, "loss": 0.7692, "step": 3767 }, { "epoch": 0.7228950334540397, "grad_norm": 1.013308806616735, "learning_rate": 3.763713810083215e-06, "loss": 0.7624, "step": 3768 }, { "epoch": 0.7230868845775678, "grad_norm": 0.6403531091232462, "learning_rate": 3.75885675916999e-06, "loss": 0.3081, "step": 3769 }, { "epoch": 0.7232787357010959, "grad_norm": 0.8812258081979052, "learning_rate": 3.754002118836835e-06, "loss": 0.7379, "step": 3770 }, { "epoch": 0.7234705868246241, "grad_norm": 1.090072771048365, "learning_rate": 3.7491498909588076e-06, "loss": 0.7674, "step": 3771 }, { "epoch": 0.7236624379481522, "grad_norm": 1.0136084120081559, "learning_rate": 3.74430007741003e-06, "loss": 0.7139, "step": 3772 }, { "epoch": 0.7238542890716804, "grad_norm": 0.8197334450523441, "learning_rate": 3.739452680063702e-06, "loss": 0.8042, "step": 3773 }, { "epoch": 0.7240461401952085, "grad_norm": 0.8864675120045515, "learning_rate": 3.734607700792079e-06, "loss": 0.77, "step": 3774 }, { "epoch": 0.7242379913187367, "grad_norm": 0.8919032987038714, "learning_rate": 3.7297651414664872e-06, "loss": 0.7209, "step": 3775 }, { "epoch": 0.7244298424422648, "grad_norm": 0.6511649870272508, "learning_rate": 3.724925003957316e-06, "loss": 0.3167, "step": 3776 }, { "epoch": 0.724621693565793, "grad_norm": 0.8891196336367814, "learning_rate": 3.7200872901340214e-06, "loss": 0.794, "step": 3777 }, { "epoch": 0.7248135446893211, "grad_norm": 0.6177530280978853, "learning_rate": 3.715252001865124e-06, "loss": 0.2981, "step": 3778 }, { "epoch": 0.7250053958128492, "grad_norm": 0.9962796094883839, "learning_rate": 3.7104191410182044e-06, "loss": 0.7689, "step": 3779 }, { "epoch": 0.7251972469363773, "grad_norm": 1.025544954235403, "learning_rate": 3.7055887094599086e-06, "loss": 0.7479, "step": 3780 }, { "epoch": 0.7253890980599055, "grad_norm": 0.8441312697507349, "learning_rate": 3.700760709055944e-06, "loss": 0.7405, "step": 3781 }, { "epoch": 0.7255809491834336, "grad_norm": 0.9401685785169653, "learning_rate": 3.695935141671072e-06, "loss": 0.7728, "step": 3782 }, { "epoch": 0.7257728003069618, "grad_norm": 0.9723901700918302, "learning_rate": 3.69111200916913e-06, "loss": 0.8058, "step": 3783 }, { "epoch": 0.7259646514304899, "grad_norm": 0.8702196360523403, "learning_rate": 3.686291313413001e-06, "loss": 0.8138, "step": 3784 }, { "epoch": 0.7261565025540181, "grad_norm": 0.8938170054601936, "learning_rate": 3.6814730562646295e-06, "loss": 0.7674, "step": 3785 }, { "epoch": 0.7263483536775462, "grad_norm": 0.8372318011795499, "learning_rate": 3.676657239585023e-06, "loss": 0.7161, "step": 3786 }, { "epoch": 0.7265402048010744, "grad_norm": 0.929764025547973, "learning_rate": 3.671843865234238e-06, "loss": 0.7389, "step": 3787 }, { "epoch": 0.7267320559246025, "grad_norm": 0.9198533568059045, "learning_rate": 3.6670329350713997e-06, "loss": 0.7201, "step": 3788 }, { "epoch": 0.7269239070481307, "grad_norm": 0.8977730875760803, "learning_rate": 3.66222445095468e-06, "loss": 0.7436, "step": 3789 }, { "epoch": 0.7271157581716587, "grad_norm": 0.9400737510819335, "learning_rate": 3.657418414741312e-06, "loss": 0.775, "step": 3790 }, { "epoch": 0.7273076092951869, "grad_norm": 0.9797184058781393, "learning_rate": 3.6526148282875706e-06, "loss": 0.7518, "step": 3791 }, { "epoch": 0.727499460418715, "grad_norm": 0.9925064051005131, "learning_rate": 3.6478136934487962e-06, "loss": 0.7789, "step": 3792 }, { "epoch": 0.7276913115422432, "grad_norm": 1.0098544084832612, "learning_rate": 3.643015012079386e-06, "loss": 0.7883, "step": 3793 }, { "epoch": 0.7278831626657714, "grad_norm": 1.0986065270090755, "learning_rate": 3.638218786032779e-06, "loss": 0.8213, "step": 3794 }, { "epoch": 0.7280750137892995, "grad_norm": 0.8901565502350473, "learning_rate": 3.6334250171614727e-06, "loss": 0.7998, "step": 3795 }, { "epoch": 0.7282668649128277, "grad_norm": 0.9917630479302818, "learning_rate": 3.628633707317006e-06, "loss": 0.6889, "step": 3796 }, { "epoch": 0.7284587160363558, "grad_norm": 1.003821873926189, "learning_rate": 3.6238448583499854e-06, "loss": 0.7684, "step": 3797 }, { "epoch": 0.728650567159884, "grad_norm": 1.1678182836081983, "learning_rate": 3.619058472110052e-06, "loss": 0.7769, "step": 3798 }, { "epoch": 0.7288424182834121, "grad_norm": 0.9032352217840244, "learning_rate": 3.6142745504459e-06, "loss": 0.6658, "step": 3799 }, { "epoch": 0.7290342694069402, "grad_norm": 0.9357577008927126, "learning_rate": 3.6094930952052708e-06, "loss": 0.7601, "step": 3800 }, { "epoch": 0.7292261205304683, "grad_norm": 1.1301498423255234, "learning_rate": 3.6047141082349533e-06, "loss": 0.6953, "step": 3801 }, { "epoch": 0.7294179716539965, "grad_norm": 0.9143263281722814, "learning_rate": 3.599937591380791e-06, "loss": 0.7248, "step": 3802 }, { "epoch": 0.7296098227775246, "grad_norm": 0.8931168992759485, "learning_rate": 3.595163546487663e-06, "loss": 0.7184, "step": 3803 }, { "epoch": 0.7298016739010528, "grad_norm": 0.9966811950709236, "learning_rate": 3.5903919753994944e-06, "loss": 0.7863, "step": 3804 }, { "epoch": 0.7299935250245809, "grad_norm": 1.0298576263227972, "learning_rate": 3.5856228799592594e-06, "loss": 0.7135, "step": 3805 }, { "epoch": 0.7301853761481091, "grad_norm": 0.9110725681151747, "learning_rate": 3.5808562620089695e-06, "loss": 0.7341, "step": 3806 }, { "epoch": 0.7303772272716372, "grad_norm": 1.1480567487772537, "learning_rate": 3.5760921233896918e-06, "loss": 0.7667, "step": 3807 }, { "epoch": 0.7305690783951654, "grad_norm": 0.9449186939883265, "learning_rate": 3.571330465941525e-06, "loss": 0.8289, "step": 3808 }, { "epoch": 0.7307609295186935, "grad_norm": 0.6363299743413687, "learning_rate": 3.566571291503611e-06, "loss": 0.2938, "step": 3809 }, { "epoch": 0.7309527806422217, "grad_norm": 0.8999429635952441, "learning_rate": 3.561814601914135e-06, "loss": 0.7687, "step": 3810 }, { "epoch": 0.7311446317657497, "grad_norm": 0.9893217957163332, "learning_rate": 3.557060399010318e-06, "loss": 0.8079, "step": 3811 }, { "epoch": 0.7313364828892779, "grad_norm": 0.6956428759972767, "learning_rate": 3.5523086846284303e-06, "loss": 0.3561, "step": 3812 }, { "epoch": 0.731528334012806, "grad_norm": 0.9937248222842886, "learning_rate": 3.5475594606037723e-06, "loss": 0.7924, "step": 3813 }, { "epoch": 0.7317201851363342, "grad_norm": 0.8302750344564027, "learning_rate": 3.542812728770685e-06, "loss": 0.7137, "step": 3814 }, { "epoch": 0.7319120362598623, "grad_norm": 0.8386358930469474, "learning_rate": 3.5380684909625453e-06, "loss": 0.7952, "step": 3815 }, { "epoch": 0.7321038873833905, "grad_norm": 0.9156008012978377, "learning_rate": 3.533326749011765e-06, "loss": 0.6755, "step": 3816 }, { "epoch": 0.7322957385069186, "grad_norm": 0.9476538760879044, "learning_rate": 3.5285875047498075e-06, "loss": 0.829, "step": 3817 }, { "epoch": 0.7324875896304468, "grad_norm": 0.927540712787333, "learning_rate": 3.5238507600071468e-06, "loss": 0.7638, "step": 3818 }, { "epoch": 0.732679440753975, "grad_norm": 0.9701573563170203, "learning_rate": 3.519116516613309e-06, "loss": 0.7212, "step": 3819 }, { "epoch": 0.7328712918775031, "grad_norm": 0.9650471218638175, "learning_rate": 3.5143847763968477e-06, "loss": 0.7296, "step": 3820 }, { "epoch": 0.7330631430010311, "grad_norm": 0.609350842221441, "learning_rate": 3.509655541185348e-06, "loss": 0.27, "step": 3821 }, { "epoch": 0.7332549941245593, "grad_norm": 0.8569286641891894, "learning_rate": 3.504928812805437e-06, "loss": 0.7094, "step": 3822 }, { "epoch": 0.7334468452480875, "grad_norm": 1.0605930680660276, "learning_rate": 3.5002045930827644e-06, "loss": 0.8629, "step": 3823 }, { "epoch": 0.7336386963716156, "grad_norm": 0.9354929152033129, "learning_rate": 3.495482883842012e-06, "loss": 0.7676, "step": 3824 }, { "epoch": 0.7338305474951438, "grad_norm": 0.925852025700728, "learning_rate": 3.490763686906895e-06, "loss": 0.7647, "step": 3825 }, { "epoch": 0.7340223986186719, "grad_norm": 0.9951233098459837, "learning_rate": 3.4860470041001515e-06, "loss": 0.799, "step": 3826 }, { "epoch": 0.7342142497422001, "grad_norm": 0.9329898719894535, "learning_rate": 3.4813328372435627e-06, "loss": 0.8047, "step": 3827 }, { "epoch": 0.7344061008657282, "grad_norm": 0.9338105787346888, "learning_rate": 3.476621188157924e-06, "loss": 0.7239, "step": 3828 }, { "epoch": 0.7345979519892564, "grad_norm": 1.0748378553241873, "learning_rate": 3.471912058663064e-06, "loss": 0.7123, "step": 3829 }, { "epoch": 0.7347898031127845, "grad_norm": 1.1481936336538598, "learning_rate": 3.4672054505778375e-06, "loss": 0.7755, "step": 3830 }, { "epoch": 0.7349816542363127, "grad_norm": 0.834863828142151, "learning_rate": 3.4625013657201255e-06, "loss": 0.7289, "step": 3831 }, { "epoch": 0.7351735053598407, "grad_norm": 0.9056126006383676, "learning_rate": 3.4577998059068354e-06, "loss": 0.7877, "step": 3832 }, { "epoch": 0.7353653564833689, "grad_norm": 0.8723425568570996, "learning_rate": 3.4531007729538966e-06, "loss": 0.771, "step": 3833 }, { "epoch": 0.735557207606897, "grad_norm": 0.8951106939506793, "learning_rate": 3.4484042686762653e-06, "loss": 0.8464, "step": 3834 }, { "epoch": 0.7357490587304252, "grad_norm": 0.9018890255331038, "learning_rate": 3.4437102948879176e-06, "loss": 0.7913, "step": 3835 }, { "epoch": 0.7359409098539533, "grad_norm": 1.1189413003118986, "learning_rate": 3.439018853401859e-06, "loss": 0.7855, "step": 3836 }, { "epoch": 0.7361327609774815, "grad_norm": 0.909250863899413, "learning_rate": 3.4343299460301106e-06, "loss": 0.7005, "step": 3837 }, { "epoch": 0.7363246121010096, "grad_norm": 0.9532824582799257, "learning_rate": 3.4296435745837163e-06, "loss": 0.7878, "step": 3838 }, { "epoch": 0.7365164632245378, "grad_norm": 0.8878338277985817, "learning_rate": 3.4249597408727407e-06, "loss": 0.7645, "step": 3839 }, { "epoch": 0.7367083143480659, "grad_norm": 1.0164309534409801, "learning_rate": 3.4202784467062667e-06, "loss": 0.7829, "step": 3840 }, { "epoch": 0.7369001654715941, "grad_norm": 0.9245550199484595, "learning_rate": 3.4155996938924017e-06, "loss": 0.7541, "step": 3841 }, { "epoch": 0.7370920165951221, "grad_norm": 1.0634805130693739, "learning_rate": 3.4109234842382677e-06, "loss": 0.6952, "step": 3842 }, { "epoch": 0.7372838677186503, "grad_norm": 0.9555128226155385, "learning_rate": 3.4062498195500027e-06, "loss": 0.7889, "step": 3843 }, { "epoch": 0.7374757188421784, "grad_norm": 0.9806258831932484, "learning_rate": 3.401578701632764e-06, "loss": 0.7489, "step": 3844 }, { "epoch": 0.7376675699657066, "grad_norm": 1.041557766792848, "learning_rate": 3.3969101322907237e-06, "loss": 0.8144, "step": 3845 }, { "epoch": 0.7378594210892347, "grad_norm": 1.0027107850221346, "learning_rate": 3.3922441133270734e-06, "loss": 0.7167, "step": 3846 }, { "epoch": 0.7380512722127629, "grad_norm": 0.81341314750882, "learning_rate": 3.3875806465440152e-06, "loss": 0.7513, "step": 3847 }, { "epoch": 0.738243123336291, "grad_norm": 0.9783709929209742, "learning_rate": 3.3829197337427676e-06, "loss": 0.7912, "step": 3848 }, { "epoch": 0.7384349744598192, "grad_norm": 0.8903495113981599, "learning_rate": 3.3782613767235618e-06, "loss": 0.7611, "step": 3849 }, { "epoch": 0.7386268255833474, "grad_norm": 0.949379836865581, "learning_rate": 3.373605577285639e-06, "loss": 0.7848, "step": 3850 }, { "epoch": 0.7388186767068755, "grad_norm": 0.9454407866738234, "learning_rate": 3.3689523372272637e-06, "loss": 0.741, "step": 3851 }, { "epoch": 0.7390105278304036, "grad_norm": 0.9673193965428077, "learning_rate": 3.3643016583456987e-06, "loss": 0.7734, "step": 3852 }, { "epoch": 0.7392023789539317, "grad_norm": 0.9554429028946755, "learning_rate": 3.3596535424372245e-06, "loss": 0.736, "step": 3853 }, { "epoch": 0.7393942300774599, "grad_norm": 1.0138916505170497, "learning_rate": 3.35500799129713e-06, "loss": 0.7677, "step": 3854 }, { "epoch": 0.739586081200988, "grad_norm": 0.8559421591295349, "learning_rate": 3.350365006719709e-06, "loss": 0.7666, "step": 3855 }, { "epoch": 0.7397779323245162, "grad_norm": 0.9656063196647332, "learning_rate": 3.345724590498276e-06, "loss": 0.7507, "step": 3856 }, { "epoch": 0.7399697834480443, "grad_norm": 1.136017486574861, "learning_rate": 3.3410867444251426e-06, "loss": 0.7671, "step": 3857 }, { "epoch": 0.7401616345715725, "grad_norm": 0.8859913061812401, "learning_rate": 3.3364514702916306e-06, "loss": 0.7776, "step": 3858 }, { "epoch": 0.7403534856951006, "grad_norm": 1.0164978843400008, "learning_rate": 3.331818769888071e-06, "loss": 0.766, "step": 3859 }, { "epoch": 0.7405453368186288, "grad_norm": 1.0070974022613826, "learning_rate": 3.327188645003796e-06, "loss": 0.7434, "step": 3860 }, { "epoch": 0.7407371879421569, "grad_norm": 0.8507798585693326, "learning_rate": 3.3225610974271473e-06, "loss": 0.7757, "step": 3861 }, { "epoch": 0.7409290390656851, "grad_norm": 0.9385628991991902, "learning_rate": 3.3179361289454694e-06, "loss": 0.7676, "step": 3862 }, { "epoch": 0.7411208901892131, "grad_norm": 0.9699757943445163, "learning_rate": 3.31331374134511e-06, "loss": 0.8246, "step": 3863 }, { "epoch": 0.7413127413127413, "grad_norm": 0.8979951477157777, "learning_rate": 3.308693936411421e-06, "loss": 0.7832, "step": 3864 }, { "epoch": 0.7415045924362694, "grad_norm": 0.9084862264593787, "learning_rate": 3.3040767159287536e-06, "loss": 0.732, "step": 3865 }, { "epoch": 0.7416964435597976, "grad_norm": 0.6321865847611597, "learning_rate": 3.299462081680471e-06, "loss": 0.2997, "step": 3866 }, { "epoch": 0.7418882946833257, "grad_norm": 1.0639140815331254, "learning_rate": 3.2948500354489255e-06, "loss": 0.7637, "step": 3867 }, { "epoch": 0.7420801458068539, "grad_norm": 0.9451439383889059, "learning_rate": 3.2902405790154755e-06, "loss": 0.8021, "step": 3868 }, { "epoch": 0.742271996930382, "grad_norm": 0.9610774541472701, "learning_rate": 3.2856337141604776e-06, "loss": 0.7771, "step": 3869 }, { "epoch": 0.7424638480539102, "grad_norm": 1.132597633785605, "learning_rate": 3.281029442663285e-06, "loss": 0.7651, "step": 3870 }, { "epoch": 0.7426556991774383, "grad_norm": 0.8864009801006588, "learning_rate": 3.276427766302258e-06, "loss": 0.7782, "step": 3871 }, { "epoch": 0.7428475503009665, "grad_norm": 0.884233031434944, "learning_rate": 3.2718286868547467e-06, "loss": 0.6932, "step": 3872 }, { "epoch": 0.7430394014244945, "grad_norm": 0.8836502237211509, "learning_rate": 3.2672322060970996e-06, "loss": 0.7172, "step": 3873 }, { "epoch": 0.7432312525480227, "grad_norm": 0.9148258983022184, "learning_rate": 3.262638325804662e-06, "loss": 0.7252, "step": 3874 }, { "epoch": 0.7434231036715508, "grad_norm": 0.8722753754855098, "learning_rate": 3.2580470477517744e-06, "loss": 0.7502, "step": 3875 }, { "epoch": 0.743614954795079, "grad_norm": 0.9635668385800128, "learning_rate": 3.253458373711774e-06, "loss": 0.7529, "step": 3876 }, { "epoch": 0.7438068059186072, "grad_norm": 0.655930236875136, "learning_rate": 3.2488723054569905e-06, "loss": 0.3052, "step": 3877 }, { "epoch": 0.7439986570421353, "grad_norm": 0.9724253143354079, "learning_rate": 3.244288844758746e-06, "loss": 0.8051, "step": 3878 }, { "epoch": 0.7441905081656635, "grad_norm": 0.9680589974655766, "learning_rate": 3.2397079933873555e-06, "loss": 0.7486, "step": 3879 }, { "epoch": 0.7443823592891916, "grad_norm": 0.9018050485549068, "learning_rate": 3.235129753112134e-06, "loss": 0.7631, "step": 3880 }, { "epoch": 0.7445742104127198, "grad_norm": 0.8600861924369148, "learning_rate": 3.230554125701377e-06, "loss": 0.7202, "step": 3881 }, { "epoch": 0.7447660615362479, "grad_norm": 0.9087002533412879, "learning_rate": 3.2259811129223784e-06, "loss": 0.7445, "step": 3882 }, { "epoch": 0.7449579126597761, "grad_norm": 1.0616978765112655, "learning_rate": 3.2214107165414164e-06, "loss": 0.703, "step": 3883 }, { "epoch": 0.7451497637833041, "grad_norm": 0.976519660234061, "learning_rate": 3.2168429383237597e-06, "loss": 0.7494, "step": 3884 }, { "epoch": 0.7453416149068323, "grad_norm": 0.969918016286495, "learning_rate": 3.2122777800336747e-06, "loss": 0.7036, "step": 3885 }, { "epoch": 0.7455334660303604, "grad_norm": 0.949356787140584, "learning_rate": 3.207715243434404e-06, "loss": 0.729, "step": 3886 }, { "epoch": 0.7457253171538886, "grad_norm": 0.8779908302649952, "learning_rate": 3.2031553302881835e-06, "loss": 0.8075, "step": 3887 }, { "epoch": 0.7459171682774167, "grad_norm": 0.897414284313353, "learning_rate": 3.1985980423562333e-06, "loss": 0.7909, "step": 3888 }, { "epoch": 0.7461090194009449, "grad_norm": 1.0510549322142544, "learning_rate": 3.1940433813987636e-06, "loss": 0.8433, "step": 3889 }, { "epoch": 0.746300870524473, "grad_norm": 1.0442936171133692, "learning_rate": 3.189491349174965e-06, "loss": 0.8018, "step": 3890 }, { "epoch": 0.7464927216480012, "grad_norm": 0.9074671003999226, "learning_rate": 3.1849419474430156e-06, "loss": 0.7286, "step": 3891 }, { "epoch": 0.7466845727715293, "grad_norm": 0.9769034022137343, "learning_rate": 3.1803951779600774e-06, "loss": 0.7462, "step": 3892 }, { "epoch": 0.7468764238950575, "grad_norm": 0.9582266118018457, "learning_rate": 3.1758510424822942e-06, "loss": 0.7085, "step": 3893 }, { "epoch": 0.7470682750185855, "grad_norm": 0.9167442959223887, "learning_rate": 3.171309542764792e-06, "loss": 0.7974, "step": 3894 }, { "epoch": 0.7472601261421137, "grad_norm": 1.0498809916556187, "learning_rate": 3.1667706805616873e-06, "loss": 0.7479, "step": 3895 }, { "epoch": 0.7474519772656418, "grad_norm": 0.9350559070911536, "learning_rate": 3.1622344576260644e-06, "loss": 0.7639, "step": 3896 }, { "epoch": 0.74764382838917, "grad_norm": 0.8316104782975788, "learning_rate": 3.1577008757099967e-06, "loss": 0.7523, "step": 3897 }, { "epoch": 0.7478356795126981, "grad_norm": 0.9492700190558834, "learning_rate": 3.1531699365645372e-06, "loss": 0.7726, "step": 3898 }, { "epoch": 0.7480275306362263, "grad_norm": 0.8736823931836311, "learning_rate": 3.14864164193971e-06, "loss": 0.6468, "step": 3899 }, { "epoch": 0.7482193817597544, "grad_norm": 0.868632492408462, "learning_rate": 3.144115993584533e-06, "loss": 0.7562, "step": 3900 }, { "epoch": 0.7484112328832826, "grad_norm": 0.8843804740905093, "learning_rate": 3.139592993246989e-06, "loss": 0.783, "step": 3901 }, { "epoch": 0.7486030840068107, "grad_norm": 0.93359088610732, "learning_rate": 3.135072642674043e-06, "loss": 0.7509, "step": 3902 }, { "epoch": 0.7487949351303389, "grad_norm": 0.6430903781795358, "learning_rate": 3.1305549436116356e-06, "loss": 0.2831, "step": 3903 }, { "epoch": 0.748986786253867, "grad_norm": 0.8524142180682649, "learning_rate": 3.1260398978046834e-06, "loss": 0.734, "step": 3904 }, { "epoch": 0.7491786373773951, "grad_norm": 1.0101287524502818, "learning_rate": 3.1215275069970773e-06, "loss": 0.7383, "step": 3905 }, { "epoch": 0.7493704885009232, "grad_norm": 0.7883499064521372, "learning_rate": 3.1170177729316863e-06, "loss": 0.6842, "step": 3906 }, { "epoch": 0.7495623396244514, "grad_norm": 0.9650245094303475, "learning_rate": 3.112510697350348e-06, "loss": 0.7831, "step": 3907 }, { "epoch": 0.7497541907479796, "grad_norm": 0.8366262314939249, "learning_rate": 3.1080062819938783e-06, "loss": 0.7435, "step": 3908 }, { "epoch": 0.7499460418715077, "grad_norm": 1.1818691303728575, "learning_rate": 3.1035045286020595e-06, "loss": 0.6445, "step": 3909 }, { "epoch": 0.7501378929950359, "grad_norm": 0.8886266048269543, "learning_rate": 3.0990054389136537e-06, "loss": 0.7834, "step": 3910 }, { "epoch": 0.750329744118564, "grad_norm": 0.5953472828104952, "learning_rate": 3.0945090146663893e-06, "loss": 0.2987, "step": 3911 }, { "epoch": 0.7505215952420922, "grad_norm": 1.059401378399067, "learning_rate": 3.0900152575969653e-06, "loss": 0.7675, "step": 3912 }, { "epoch": 0.7507134463656203, "grad_norm": 0.9107213092467594, "learning_rate": 3.0855241694410478e-06, "loss": 0.7115, "step": 3913 }, { "epoch": 0.7509052974891485, "grad_norm": 0.8149662530083985, "learning_rate": 3.08103575193328e-06, "loss": 0.672, "step": 3914 }, { "epoch": 0.7510971486126765, "grad_norm": 0.9562128916032491, "learning_rate": 3.0765500068072686e-06, "loss": 0.6821, "step": 3915 }, { "epoch": 0.7512889997362047, "grad_norm": 0.8516512627541308, "learning_rate": 3.072066935795587e-06, "loss": 0.8231, "step": 3916 }, { "epoch": 0.7514808508597328, "grad_norm": 1.0190611916352814, "learning_rate": 3.0675865406297766e-06, "loss": 0.7666, "step": 3917 }, { "epoch": 0.751672701983261, "grad_norm": 0.994163377438418, "learning_rate": 3.063108823040346e-06, "loss": 0.8139, "step": 3918 }, { "epoch": 0.7518645531067891, "grad_norm": 0.9434145820455595, "learning_rate": 3.0586337847567714e-06, "loss": 0.7351, "step": 3919 }, { "epoch": 0.7520564042303173, "grad_norm": 0.9673320307065835, "learning_rate": 3.0541614275074903e-06, "loss": 0.7472, "step": 3920 }, { "epoch": 0.7522482553538454, "grad_norm": 1.025881487463596, "learning_rate": 3.0496917530199076e-06, "loss": 0.7269, "step": 3921 }, { "epoch": 0.7524401064773736, "grad_norm": 0.9419633118847959, "learning_rate": 3.04522476302039e-06, "loss": 0.7458, "step": 3922 }, { "epoch": 0.7526319576009017, "grad_norm": 0.9547718189477499, "learning_rate": 3.0407604592342664e-06, "loss": 0.8395, "step": 3923 }, { "epoch": 0.7528238087244299, "grad_norm": 1.027361652017863, "learning_rate": 3.0362988433858356e-06, "loss": 0.7089, "step": 3924 }, { "epoch": 0.7530156598479579, "grad_norm": 0.9275822338839552, "learning_rate": 3.031839917198349e-06, "loss": 0.7789, "step": 3925 }, { "epoch": 0.7532075109714861, "grad_norm": 0.9328171551564505, "learning_rate": 3.0273836823940252e-06, "loss": 0.7211, "step": 3926 }, { "epoch": 0.7533993620950142, "grad_norm": 0.9629765997177805, "learning_rate": 3.0229301406940393e-06, "loss": 0.779, "step": 3927 }, { "epoch": 0.7535912132185424, "grad_norm": 1.1305467726300635, "learning_rate": 3.0184792938185237e-06, "loss": 0.827, "step": 3928 }, { "epoch": 0.7537830643420705, "grad_norm": 0.9580759456429924, "learning_rate": 3.014031143486583e-06, "loss": 0.8337, "step": 3929 }, { "epoch": 0.7539749154655987, "grad_norm": 0.9481318005787043, "learning_rate": 3.009585691416267e-06, "loss": 0.7129, "step": 3930 }, { "epoch": 0.7541667665891268, "grad_norm": 0.990809339032647, "learning_rate": 3.0051429393245867e-06, "loss": 0.7683, "step": 3931 }, { "epoch": 0.754358617712655, "grad_norm": 1.1113951491162861, "learning_rate": 3.000702888927517e-06, "loss": 0.7385, "step": 3932 }, { "epoch": 0.7545504688361832, "grad_norm": 0.8926645412225542, "learning_rate": 2.996265541939972e-06, "loss": 0.7803, "step": 3933 }, { "epoch": 0.7547423199597113, "grad_norm": 0.8436394825910604, "learning_rate": 2.9918309000758426e-06, "loss": 0.6933, "step": 3934 }, { "epoch": 0.7549341710832395, "grad_norm": 0.9153426606480294, "learning_rate": 2.9873989650479653e-06, "loss": 0.8065, "step": 3935 }, { "epoch": 0.7551260222067675, "grad_norm": 0.9015186950111661, "learning_rate": 2.982969738568127e-06, "loss": 0.7159, "step": 3936 }, { "epoch": 0.7553178733302957, "grad_norm": 1.1001434760836697, "learning_rate": 2.978543222347076e-06, "loss": 0.6947, "step": 3937 }, { "epoch": 0.7555097244538238, "grad_norm": 0.8784759111961412, "learning_rate": 2.9741194180945055e-06, "loss": 0.779, "step": 3938 }, { "epoch": 0.755701575577352, "grad_norm": 0.9211648737026781, "learning_rate": 2.969698327519075e-06, "loss": 0.7431, "step": 3939 }, { "epoch": 0.7558934267008801, "grad_norm": 0.8344187906099167, "learning_rate": 2.9652799523283815e-06, "loss": 0.7341, "step": 3940 }, { "epoch": 0.7560852778244083, "grad_norm": 0.8503817644795111, "learning_rate": 2.96086429422898e-06, "loss": 0.8008, "step": 3941 }, { "epoch": 0.7562771289479364, "grad_norm": 0.9471172902521665, "learning_rate": 2.956451354926374e-06, "loss": 0.8184, "step": 3942 }, { "epoch": 0.7564689800714646, "grad_norm": 0.9334308109372697, "learning_rate": 2.9520411361250166e-06, "loss": 0.72, "step": 3943 }, { "epoch": 0.7566608311949927, "grad_norm": 0.8875736726203926, "learning_rate": 2.9476336395283157e-06, "loss": 0.7688, "step": 3944 }, { "epoch": 0.7568526823185209, "grad_norm": 0.8409671695600377, "learning_rate": 2.9432288668386244e-06, "loss": 0.7686, "step": 3945 }, { "epoch": 0.7570445334420489, "grad_norm": 0.9690951636665599, "learning_rate": 2.9388268197572366e-06, "loss": 0.8434, "step": 3946 }, { "epoch": 0.7572363845655771, "grad_norm": 0.7994257116753991, "learning_rate": 2.9344274999844023e-06, "loss": 0.7775, "step": 3947 }, { "epoch": 0.7574282356891052, "grad_norm": 1.0259266517486476, "learning_rate": 2.9300309092193126e-06, "loss": 0.7351, "step": 3948 }, { "epoch": 0.7576200868126334, "grad_norm": 1.0304866666849952, "learning_rate": 2.925637049160114e-06, "loss": 0.7651, "step": 3949 }, { "epoch": 0.7578119379361615, "grad_norm": 0.8802160149599226, "learning_rate": 2.921245921503887e-06, "loss": 0.7539, "step": 3950 }, { "epoch": 0.7580037890596897, "grad_norm": 0.9160136657740778, "learning_rate": 2.916857527946663e-06, "loss": 0.7774, "step": 3951 }, { "epoch": 0.7581956401832178, "grad_norm": 0.9850146137334673, "learning_rate": 2.912471870183411e-06, "loss": 0.7435, "step": 3952 }, { "epoch": 0.758387491306746, "grad_norm": 1.060306455479069, "learning_rate": 2.9080889499080555e-06, "loss": 0.8253, "step": 3953 }, { "epoch": 0.7585793424302741, "grad_norm": 0.8510359764030475, "learning_rate": 2.9037087688134526e-06, "loss": 0.6959, "step": 3954 }, { "epoch": 0.7587711935538023, "grad_norm": 0.8807603561036802, "learning_rate": 2.899331328591405e-06, "loss": 0.7722, "step": 3955 }, { "epoch": 0.7589630446773304, "grad_norm": 0.9428933888922587, "learning_rate": 2.894956630932656e-06, "loss": 0.672, "step": 3956 }, { "epoch": 0.7591548958008585, "grad_norm": 0.94834463015144, "learning_rate": 2.8905846775268843e-06, "loss": 0.7437, "step": 3957 }, { "epoch": 0.7593467469243866, "grad_norm": 0.9189973980813524, "learning_rate": 2.886215470062722e-06, "loss": 0.7365, "step": 3958 }, { "epoch": 0.7595385980479148, "grad_norm": 1.0784950192146041, "learning_rate": 2.881849010227732e-06, "loss": 0.7399, "step": 3959 }, { "epoch": 0.759730449171443, "grad_norm": 0.8901256005124945, "learning_rate": 2.877485299708409e-06, "loss": 0.79, "step": 3960 }, { "epoch": 0.7599223002949711, "grad_norm": 0.8589862943872496, "learning_rate": 2.8731243401901977e-06, "loss": 0.7507, "step": 3961 }, { "epoch": 0.7601141514184993, "grad_norm": 1.0391711211472174, "learning_rate": 2.8687661333574723e-06, "loss": 0.7253, "step": 3962 }, { "epoch": 0.7603060025420274, "grad_norm": 0.7708075460090674, "learning_rate": 2.8644106808935535e-06, "loss": 0.8127, "step": 3963 }, { "epoch": 0.7604978536655556, "grad_norm": 0.9645339894721799, "learning_rate": 2.8600579844806887e-06, "loss": 0.822, "step": 3964 }, { "epoch": 0.7606897047890837, "grad_norm": 0.939140226231821, "learning_rate": 2.855708045800063e-06, "loss": 0.7781, "step": 3965 }, { "epoch": 0.7608815559126119, "grad_norm": 1.0174760756248808, "learning_rate": 2.8513608665317995e-06, "loss": 0.7279, "step": 3966 }, { "epoch": 0.7610734070361399, "grad_norm": 0.9621817298750992, "learning_rate": 2.847016448354948e-06, "loss": 0.7802, "step": 3967 }, { "epoch": 0.7612652581596681, "grad_norm": 0.8589166497775539, "learning_rate": 2.842674792947505e-06, "loss": 0.7801, "step": 3968 }, { "epoch": 0.7614571092831962, "grad_norm": 1.06822275495834, "learning_rate": 2.8383359019863878e-06, "loss": 0.7317, "step": 3969 }, { "epoch": 0.7616489604067244, "grad_norm": 1.0196771124972466, "learning_rate": 2.8339997771474513e-06, "loss": 0.7631, "step": 3970 }, { "epoch": 0.7618408115302525, "grad_norm": 0.9411143623450366, "learning_rate": 2.82966642010548e-06, "loss": 0.8218, "step": 3971 }, { "epoch": 0.7620326626537807, "grad_norm": 0.9716033133885923, "learning_rate": 2.8253358325341917e-06, "loss": 0.7121, "step": 3972 }, { "epoch": 0.7622245137773088, "grad_norm": 0.9723345957308923, "learning_rate": 2.8210080161062324e-06, "loss": 0.8063, "step": 3973 }, { "epoch": 0.762416364900837, "grad_norm": 0.8402955146432843, "learning_rate": 2.816682972493178e-06, "loss": 0.7422, "step": 3974 }, { "epoch": 0.7626082160243651, "grad_norm": 0.8028685508689939, "learning_rate": 2.8123607033655343e-06, "loss": 0.6879, "step": 3975 }, { "epoch": 0.7628000671478933, "grad_norm": 1.0414909802844585, "learning_rate": 2.8080412103927355e-06, "loss": 0.837, "step": 3976 }, { "epoch": 0.7629919182714214, "grad_norm": 0.9424808025328132, "learning_rate": 2.8037244952431408e-06, "loss": 0.7892, "step": 3977 }, { "epoch": 0.7631837693949495, "grad_norm": 0.8669949491842744, "learning_rate": 2.7994105595840437e-06, "loss": 0.7694, "step": 3978 }, { "epoch": 0.7633756205184776, "grad_norm": 0.8603753349425195, "learning_rate": 2.795099405081657e-06, "loss": 0.7603, "step": 3979 }, { "epoch": 0.7635674716420058, "grad_norm": 0.8411404538678826, "learning_rate": 2.7907910334011214e-06, "loss": 0.7496, "step": 3980 }, { "epoch": 0.7637593227655339, "grad_norm": 0.9659532690666507, "learning_rate": 2.7864854462065027e-06, "loss": 0.8208, "step": 3981 }, { "epoch": 0.7639511738890621, "grad_norm": 0.9363828494225591, "learning_rate": 2.782182645160789e-06, "loss": 0.731, "step": 3982 }, { "epoch": 0.7641430250125902, "grad_norm": 1.0484313187952348, "learning_rate": 2.7778826319259e-06, "loss": 0.6905, "step": 3983 }, { "epoch": 0.7643348761361184, "grad_norm": 0.9081868471696417, "learning_rate": 2.7735854081626723e-06, "loss": 0.7319, "step": 3984 }, { "epoch": 0.7645267272596465, "grad_norm": 0.777934678627616, "learning_rate": 2.769290975530864e-06, "loss": 0.709, "step": 3985 }, { "epoch": 0.7647185783831747, "grad_norm": 0.945658663548356, "learning_rate": 2.764999335689159e-06, "loss": 0.796, "step": 3986 }, { "epoch": 0.7649104295067028, "grad_norm": 0.8970031453704144, "learning_rate": 2.76071049029516e-06, "loss": 0.7604, "step": 3987 }, { "epoch": 0.7651022806302309, "grad_norm": 0.9148913858601737, "learning_rate": 2.756424441005392e-06, "loss": 0.7595, "step": 3988 }, { "epoch": 0.765294131753759, "grad_norm": 1.0108998511179137, "learning_rate": 2.7521411894752982e-06, "loss": 0.7431, "step": 3989 }, { "epoch": 0.7654859828772872, "grad_norm": 0.7667803378884435, "learning_rate": 2.747860737359244e-06, "loss": 0.7651, "step": 3990 }, { "epoch": 0.7656778340008153, "grad_norm": 0.8722536029276041, "learning_rate": 2.7435830863105063e-06, "loss": 0.7039, "step": 3991 }, { "epoch": 0.7658696851243435, "grad_norm": 0.8785858278798042, "learning_rate": 2.739308237981294e-06, "loss": 0.8328, "step": 3992 }, { "epoch": 0.7660615362478717, "grad_norm": 0.8992096423914068, "learning_rate": 2.735036194022721e-06, "loss": 0.7873, "step": 3993 }, { "epoch": 0.7662533873713998, "grad_norm": 0.6754180881215042, "learning_rate": 2.7307669560848225e-06, "loss": 0.3255, "step": 3994 }, { "epoch": 0.766445238494928, "grad_norm": 0.9143205246460363, "learning_rate": 2.72650052581655e-06, "loss": 0.7568, "step": 3995 }, { "epoch": 0.7666370896184561, "grad_norm": 1.0063040201347, "learning_rate": 2.722236904865766e-06, "loss": 0.7299, "step": 3996 }, { "epoch": 0.7668289407419843, "grad_norm": 1.093973907150423, "learning_rate": 2.71797609487926e-06, "loss": 0.7888, "step": 3997 }, { "epoch": 0.7670207918655123, "grad_norm": 0.9394144483879043, "learning_rate": 2.7137180975027224e-06, "loss": 0.8125, "step": 3998 }, { "epoch": 0.7672126429890405, "grad_norm": 0.9008791035143979, "learning_rate": 2.709462914380765e-06, "loss": 0.7402, "step": 3999 }, { "epoch": 0.7674044941125686, "grad_norm": 1.012720722328371, "learning_rate": 2.705210547156908e-06, "loss": 0.7709, "step": 4000 }, { "epoch": 0.7675963452360968, "grad_norm": 1.0738945934019744, "learning_rate": 2.7009609974735883e-06, "loss": 0.7434, "step": 4001 }, { "epoch": 0.7677881963596249, "grad_norm": 1.1072226406783734, "learning_rate": 2.6967142669721522e-06, "loss": 0.7989, "step": 4002 }, { "epoch": 0.7679800474831531, "grad_norm": 0.9787892525149022, "learning_rate": 2.692470357292857e-06, "loss": 0.7922, "step": 4003 }, { "epoch": 0.7681718986066812, "grad_norm": 1.0824935441567505, "learning_rate": 2.6882292700748714e-06, "loss": 0.7264, "step": 4004 }, { "epoch": 0.7683637497302094, "grad_norm": 0.8265394311011778, "learning_rate": 2.6839910069562738e-06, "loss": 0.744, "step": 4005 }, { "epoch": 0.7685556008537375, "grad_norm": 0.8396458581682597, "learning_rate": 2.679755569574047e-06, "loss": 0.7217, "step": 4006 }, { "epoch": 0.7687474519772657, "grad_norm": 0.8330263670253945, "learning_rate": 2.675522959564095e-06, "loss": 0.7132, "step": 4007 }, { "epoch": 0.7689393031007938, "grad_norm": 1.062587994153474, "learning_rate": 2.671293178561217e-06, "loss": 0.7586, "step": 4008 }, { "epoch": 0.7691311542243219, "grad_norm": 0.9367164859380617, "learning_rate": 2.6670662281991253e-06, "loss": 0.7502, "step": 4009 }, { "epoch": 0.76932300534785, "grad_norm": 0.5954373203326552, "learning_rate": 2.6628421101104385e-06, "loss": 0.3122, "step": 4010 }, { "epoch": 0.7695148564713782, "grad_norm": 0.9369772388281082, "learning_rate": 2.6586208259266755e-06, "loss": 0.7954, "step": 4011 }, { "epoch": 0.7697067075949063, "grad_norm": 0.962876669293708, "learning_rate": 2.6544023772782736e-06, "loss": 0.7658, "step": 4012 }, { "epoch": 0.7698985587184345, "grad_norm": 0.9223839389013894, "learning_rate": 2.6501867657945624e-06, "loss": 0.7069, "step": 4013 }, { "epoch": 0.7700904098419626, "grad_norm": 0.9131771940854908, "learning_rate": 2.6459739931037808e-06, "loss": 0.7428, "step": 4014 }, { "epoch": 0.7702822609654908, "grad_norm": 0.9537286767834516, "learning_rate": 2.641764060833073e-06, "loss": 0.7871, "step": 4015 }, { "epoch": 0.770474112089019, "grad_norm": 0.9466725402443449, "learning_rate": 2.6375569706084804e-06, "loss": 0.824, "step": 4016 }, { "epoch": 0.7706659632125471, "grad_norm": 1.0256561525298247, "learning_rate": 2.6333527240549518e-06, "loss": 0.7636, "step": 4017 }, { "epoch": 0.7708578143360753, "grad_norm": 0.8967167919458953, "learning_rate": 2.6291513227963363e-06, "loss": 0.7794, "step": 4018 }, { "epoch": 0.7710496654596033, "grad_norm": 0.9819203174162089, "learning_rate": 2.624952768455383e-06, "loss": 0.7877, "step": 4019 }, { "epoch": 0.7712415165831314, "grad_norm": 0.9239515365460178, "learning_rate": 2.620757062653743e-06, "loss": 0.6892, "step": 4020 }, { "epoch": 0.7714333677066596, "grad_norm": 0.9392952709167689, "learning_rate": 2.616564207011962e-06, "loss": 0.7533, "step": 4021 }, { "epoch": 0.7716252188301878, "grad_norm": 0.9169057587798142, "learning_rate": 2.6123742031494957e-06, "loss": 0.7158, "step": 4022 }, { "epoch": 0.7718170699537159, "grad_norm": 0.8282479138362986, "learning_rate": 2.6081870526846897e-06, "loss": 0.6921, "step": 4023 }, { "epoch": 0.7720089210772441, "grad_norm": 0.9671493698274372, "learning_rate": 2.604002757234789e-06, "loss": 0.7208, "step": 4024 }, { "epoch": 0.7722007722007722, "grad_norm": 1.0856656596577492, "learning_rate": 2.599821318415936e-06, "loss": 0.7803, "step": 4025 }, { "epoch": 0.7723926233243004, "grad_norm": 0.8506095692899077, "learning_rate": 2.595642737843168e-06, "loss": 0.7283, "step": 4026 }, { "epoch": 0.7725844744478285, "grad_norm": 0.9937933742037524, "learning_rate": 2.591467017130426e-06, "loss": 0.802, "step": 4027 }, { "epoch": 0.7727763255713567, "grad_norm": 0.8661030866687203, "learning_rate": 2.5872941578905388e-06, "loss": 0.7946, "step": 4028 }, { "epoch": 0.7729681766948848, "grad_norm": 1.0507370244090177, "learning_rate": 2.583124161735231e-06, "loss": 0.7427, "step": 4029 }, { "epoch": 0.7731600278184129, "grad_norm": 1.0479764594631296, "learning_rate": 2.578957030275122e-06, "loss": 0.7392, "step": 4030 }, { "epoch": 0.773351878941941, "grad_norm": 0.9504073210661271, "learning_rate": 2.574792765119728e-06, "loss": 0.812, "step": 4031 }, { "epoch": 0.7735437300654692, "grad_norm": 0.9173734902781927, "learning_rate": 2.5706313678774533e-06, "loss": 0.7882, "step": 4032 }, { "epoch": 0.7737355811889973, "grad_norm": 0.9817105459742606, "learning_rate": 2.566472840155596e-06, "loss": 0.7669, "step": 4033 }, { "epoch": 0.7739274323125255, "grad_norm": 0.9016904649997839, "learning_rate": 2.562317183560349e-06, "loss": 0.7131, "step": 4034 }, { "epoch": 0.7741192834360536, "grad_norm": 1.0094782660631159, "learning_rate": 2.5581643996967876e-06, "loss": 0.7934, "step": 4035 }, { "epoch": 0.7743111345595818, "grad_norm": 0.6630573786194691, "learning_rate": 2.5540144901688923e-06, "loss": 0.322, "step": 4036 }, { "epoch": 0.7745029856831099, "grad_norm": 0.9998036410746018, "learning_rate": 2.54986745657952e-06, "loss": 0.841, "step": 4037 }, { "epoch": 0.7746948368066381, "grad_norm": 1.1069632928913895, "learning_rate": 2.545723300530423e-06, "loss": 0.8253, "step": 4038 }, { "epoch": 0.7748866879301662, "grad_norm": 0.8766642442691025, "learning_rate": 2.541582023622239e-06, "loss": 0.7535, "step": 4039 }, { "epoch": 0.7750785390536943, "grad_norm": 0.8868775335724637, "learning_rate": 2.5374436274544933e-06, "loss": 0.7466, "step": 4040 }, { "epoch": 0.7752703901772224, "grad_norm": 1.1686547837822066, "learning_rate": 2.5333081136256076e-06, "loss": 0.7008, "step": 4041 }, { "epoch": 0.7754622413007506, "grad_norm": 0.987542731689592, "learning_rate": 2.5291754837328787e-06, "loss": 0.7155, "step": 4042 }, { "epoch": 0.7756540924242787, "grad_norm": 1.1918671442619122, "learning_rate": 2.5250457393724946e-06, "loss": 0.7436, "step": 4043 }, { "epoch": 0.7758459435478069, "grad_norm": 1.0062093419427751, "learning_rate": 2.520918882139529e-06, "loss": 0.7921, "step": 4044 }, { "epoch": 0.776037794671335, "grad_norm": 1.0313662520966522, "learning_rate": 2.51679491362794e-06, "loss": 0.7391, "step": 4045 }, { "epoch": 0.7762296457948632, "grad_norm": 0.9088522862897781, "learning_rate": 2.512673835430569e-06, "loss": 0.7235, "step": 4046 }, { "epoch": 0.7764214969183914, "grad_norm": 0.9026989102256661, "learning_rate": 2.5085556491391416e-06, "loss": 0.7679, "step": 4047 }, { "epoch": 0.7766133480419195, "grad_norm": 0.8723935423494322, "learning_rate": 2.5044403563442676e-06, "loss": 0.7637, "step": 4048 }, { "epoch": 0.7768051991654477, "grad_norm": 0.609789167174429, "learning_rate": 2.5003279586354378e-06, "loss": 0.2923, "step": 4049 }, { "epoch": 0.7769970502889758, "grad_norm": 0.9710892599916706, "learning_rate": 2.496218457601023e-06, "loss": 0.7725, "step": 4050 }, { "epoch": 0.7771889014125039, "grad_norm": 1.0147081589624576, "learning_rate": 2.492111854828282e-06, "loss": 0.811, "step": 4051 }, { "epoch": 0.777380752536032, "grad_norm": 0.6058014105002657, "learning_rate": 2.4880081519033473e-06, "loss": 0.3349, "step": 4052 }, { "epoch": 0.7775726036595602, "grad_norm": 0.8112183336516632, "learning_rate": 2.4839073504112343e-06, "loss": 0.706, "step": 4053 }, { "epoch": 0.7777644547830883, "grad_norm": 0.8591464079074347, "learning_rate": 2.4798094519358363e-06, "loss": 0.7647, "step": 4054 }, { "epoch": 0.7779563059066165, "grad_norm": 0.9595642429372375, "learning_rate": 2.4757144580599246e-06, "loss": 0.7671, "step": 4055 }, { "epoch": 0.7781481570301446, "grad_norm": 0.9305268452869471, "learning_rate": 2.4716223703651544e-06, "loss": 0.737, "step": 4056 }, { "epoch": 0.7783400081536728, "grad_norm": 0.6142339868148937, "learning_rate": 2.4675331904320533e-06, "loss": 0.2833, "step": 4057 }, { "epoch": 0.7785318592772009, "grad_norm": 0.9508305503933769, "learning_rate": 2.4634469198400246e-06, "loss": 0.7734, "step": 4058 }, { "epoch": 0.7787237104007291, "grad_norm": 0.9212931139769425, "learning_rate": 2.4593635601673518e-06, "loss": 0.837, "step": 4059 }, { "epoch": 0.7789155615242572, "grad_norm": 1.1344591803557993, "learning_rate": 2.45528311299119e-06, "loss": 0.8319, "step": 4060 }, { "epoch": 0.7791074126477853, "grad_norm": 0.9528345552201126, "learning_rate": 2.4512055798875744e-06, "loss": 0.7409, "step": 4061 }, { "epoch": 0.7792992637713134, "grad_norm": 0.9071505682753392, "learning_rate": 2.447130962431411e-06, "loss": 0.8001, "step": 4062 }, { "epoch": 0.7794911148948416, "grad_norm": 0.85893817242708, "learning_rate": 2.443059262196481e-06, "loss": 0.6974, "step": 4063 }, { "epoch": 0.7796829660183697, "grad_norm": 0.8747689461652944, "learning_rate": 2.4389904807554365e-06, "loss": 0.7169, "step": 4064 }, { "epoch": 0.7798748171418979, "grad_norm": 0.9718680910371386, "learning_rate": 2.434924619679804e-06, "loss": 0.7465, "step": 4065 }, { "epoch": 0.780066668265426, "grad_norm": 0.6460807298071708, "learning_rate": 2.4308616805399867e-06, "loss": 0.3289, "step": 4066 }, { "epoch": 0.7802585193889542, "grad_norm": 0.6489124540449971, "learning_rate": 2.4268016649052515e-06, "loss": 0.3107, "step": 4067 }, { "epoch": 0.7804503705124823, "grad_norm": 0.8929159475542524, "learning_rate": 2.4227445743437406e-06, "loss": 0.7498, "step": 4068 }, { "epoch": 0.7806422216360105, "grad_norm": 0.9159008346366025, "learning_rate": 2.418690410422462e-06, "loss": 0.8143, "step": 4069 }, { "epoch": 0.7808340727595386, "grad_norm": 0.9327498372202715, "learning_rate": 2.414639174707302e-06, "loss": 0.795, "step": 4070 }, { "epoch": 0.7810259238830667, "grad_norm": 0.9047004392757727, "learning_rate": 2.4105908687630066e-06, "loss": 0.7568, "step": 4071 }, { "epoch": 0.7812177750065948, "grad_norm": 1.0919504166199536, "learning_rate": 2.4065454941531963e-06, "loss": 0.7831, "step": 4072 }, { "epoch": 0.781409626130123, "grad_norm": 0.8456099477452675, "learning_rate": 2.402503052440356e-06, "loss": 0.7073, "step": 4073 }, { "epoch": 0.7816014772536511, "grad_norm": 0.9584783426495141, "learning_rate": 2.3984635451858384e-06, "loss": 0.7113, "step": 4074 }, { "epoch": 0.7817933283771793, "grad_norm": 1.0114561698270592, "learning_rate": 2.394426973949865e-06, "loss": 0.8087, "step": 4075 }, { "epoch": 0.7819851795007075, "grad_norm": 0.9975080168051077, "learning_rate": 2.390393340291519e-06, "loss": 0.7699, "step": 4076 }, { "epoch": 0.7821770306242356, "grad_norm": 1.0844296803862752, "learning_rate": 2.386362645768755e-06, "loss": 0.7969, "step": 4077 }, { "epoch": 0.7823688817477638, "grad_norm": 0.9706642147607147, "learning_rate": 2.382334891938386e-06, "loss": 0.6933, "step": 4078 }, { "epoch": 0.7825607328712919, "grad_norm": 0.8957982139737952, "learning_rate": 2.378310080356092e-06, "loss": 0.7034, "step": 4079 }, { "epoch": 0.7827525839948201, "grad_norm": 0.8965260426120453, "learning_rate": 2.3742882125764198e-06, "loss": 0.7773, "step": 4080 }, { "epoch": 0.7829444351183482, "grad_norm": 0.8734351559101384, "learning_rate": 2.3702692901527757e-06, "loss": 0.8132, "step": 4081 }, { "epoch": 0.7831362862418763, "grad_norm": 1.0815815120730108, "learning_rate": 2.366253314637428e-06, "loss": 0.7719, "step": 4082 }, { "epoch": 0.7833281373654044, "grad_norm": 0.9338808171889427, "learning_rate": 2.3622402875815077e-06, "loss": 0.7632, "step": 4083 }, { "epoch": 0.7835199884889326, "grad_norm": 0.8567193601083514, "learning_rate": 2.358230210535004e-06, "loss": 0.7263, "step": 4084 }, { "epoch": 0.7837118396124607, "grad_norm": 0.9996180689414726, "learning_rate": 2.354223085046775e-06, "loss": 0.8028, "step": 4085 }, { "epoch": 0.7839036907359889, "grad_norm": 0.624209891607484, "learning_rate": 2.3502189126645303e-06, "loss": 0.2865, "step": 4086 }, { "epoch": 0.784095541859517, "grad_norm": 0.9129327219635445, "learning_rate": 2.346217694934847e-06, "loss": 0.7323, "step": 4087 }, { "epoch": 0.7842873929830452, "grad_norm": 0.9198163644723887, "learning_rate": 2.3422194334031477e-06, "loss": 0.7539, "step": 4088 }, { "epoch": 0.7844792441065733, "grad_norm": 1.1108319305133054, "learning_rate": 2.3382241296137233e-06, "loss": 0.7035, "step": 4089 }, { "epoch": 0.7846710952301015, "grad_norm": 0.6838895508831485, "learning_rate": 2.334231785109725e-06, "loss": 0.3256, "step": 4090 }, { "epoch": 0.7848629463536296, "grad_norm": 0.9572619637854674, "learning_rate": 2.330242401433155e-06, "loss": 0.7326, "step": 4091 }, { "epoch": 0.7850547974771577, "grad_norm": 0.6348872189321471, "learning_rate": 2.3262559801248718e-06, "loss": 0.3147, "step": 4092 }, { "epoch": 0.7852466486006858, "grad_norm": 0.9028612820550965, "learning_rate": 2.3222725227245925e-06, "loss": 0.7525, "step": 4093 }, { "epoch": 0.785438499724214, "grad_norm": 1.0009049740851483, "learning_rate": 2.3182920307708855e-06, "loss": 0.7789, "step": 4094 }, { "epoch": 0.7856303508477421, "grad_norm": 0.909513286157871, "learning_rate": 2.3143145058011805e-06, "loss": 0.7241, "step": 4095 }, { "epoch": 0.7858222019712703, "grad_norm": 0.9233094331037559, "learning_rate": 2.3103399493517576e-06, "loss": 0.7166, "step": 4096 }, { "epoch": 0.7860140530947984, "grad_norm": 0.6521582149769476, "learning_rate": 2.306368362957747e-06, "loss": 0.294, "step": 4097 }, { "epoch": 0.7862059042183266, "grad_norm": 1.1119760870679756, "learning_rate": 2.3023997481531377e-06, "loss": 0.8376, "step": 4098 }, { "epoch": 0.7863977553418547, "grad_norm": 0.9093022349580596, "learning_rate": 2.298434106470763e-06, "loss": 0.7972, "step": 4099 }, { "epoch": 0.7865896064653829, "grad_norm": 0.609838503856766, "learning_rate": 2.2944714394423186e-06, "loss": 0.3276, "step": 4100 }, { "epoch": 0.786781457588911, "grad_norm": 0.9523995266467121, "learning_rate": 2.2905117485983464e-06, "loss": 0.7537, "step": 4101 }, { "epoch": 0.7869733087124392, "grad_norm": 1.137450745767826, "learning_rate": 2.286555035468233e-06, "loss": 0.727, "step": 4102 }, { "epoch": 0.7871651598359672, "grad_norm": 0.9473270610574921, "learning_rate": 2.2826013015802205e-06, "loss": 0.8182, "step": 4103 }, { "epoch": 0.7873570109594954, "grad_norm": 0.8687351314017145, "learning_rate": 2.2786505484613997e-06, "loss": 0.7021, "step": 4104 }, { "epoch": 0.7875488620830235, "grad_norm": 0.8666395504599212, "learning_rate": 2.2747027776377127e-06, "loss": 0.7632, "step": 4105 }, { "epoch": 0.7877407132065517, "grad_norm": 0.9069242864714087, "learning_rate": 2.270757990633946e-06, "loss": 0.87, "step": 4106 }, { "epoch": 0.7879325643300799, "grad_norm": 0.9576429375894657, "learning_rate": 2.266816188973735e-06, "loss": 0.6778, "step": 4107 }, { "epoch": 0.788124415453608, "grad_norm": 0.8775303247609928, "learning_rate": 2.2628773741795585e-06, "loss": 0.801, "step": 4108 }, { "epoch": 0.7883162665771362, "grad_norm": 1.0095250672359477, "learning_rate": 2.2589415477727496e-06, "loss": 0.7149, "step": 4109 }, { "epoch": 0.7885081177006643, "grad_norm": 0.8782453980314318, "learning_rate": 2.255008711273481e-06, "loss": 0.7569, "step": 4110 }, { "epoch": 0.7886999688241925, "grad_norm": 0.9883090961939487, "learning_rate": 2.251078866200771e-06, "loss": 0.7611, "step": 4111 }, { "epoch": 0.7888918199477206, "grad_norm": 0.8774338295457097, "learning_rate": 2.2471520140724845e-06, "loss": 0.7286, "step": 4112 }, { "epoch": 0.7890836710712487, "grad_norm": 0.8906188066675567, "learning_rate": 2.243228156405326e-06, "loss": 0.7295, "step": 4113 }, { "epoch": 0.7892755221947768, "grad_norm": 0.9689492578759749, "learning_rate": 2.2393072947148555e-06, "loss": 0.7242, "step": 4114 }, { "epoch": 0.789467373318305, "grad_norm": 0.9076912273086173, "learning_rate": 2.235389430515458e-06, "loss": 0.6771, "step": 4115 }, { "epoch": 0.7896592244418331, "grad_norm": 0.7439982395163109, "learning_rate": 2.231474565320374e-06, "loss": 0.3047, "step": 4116 }, { "epoch": 0.7898510755653613, "grad_norm": 0.9138243438921917, "learning_rate": 2.22756270064168e-06, "loss": 0.7504, "step": 4117 }, { "epoch": 0.7900429266888894, "grad_norm": 1.0468567100732495, "learning_rate": 2.223653837990295e-06, "loss": 0.7291, "step": 4118 }, { "epoch": 0.7902347778124176, "grad_norm": 0.9883041333640572, "learning_rate": 2.2197479788759814e-06, "loss": 0.7525, "step": 4119 }, { "epoch": 0.7904266289359457, "grad_norm": 0.8836508606761413, "learning_rate": 2.2158451248073374e-06, "loss": 0.7808, "step": 4120 }, { "epoch": 0.7906184800594739, "grad_norm": 0.801003598935884, "learning_rate": 2.2119452772918026e-06, "loss": 0.7723, "step": 4121 }, { "epoch": 0.790810331183002, "grad_norm": 0.8609318224110549, "learning_rate": 2.2080484378356528e-06, "loss": 0.8452, "step": 4122 }, { "epoch": 0.7910021823065301, "grad_norm": 0.8966469856560642, "learning_rate": 2.2041546079440036e-06, "loss": 0.7514, "step": 4123 }, { "epoch": 0.7911940334300582, "grad_norm": 0.9531999298488784, "learning_rate": 2.2002637891208112e-06, "loss": 0.8337, "step": 4124 }, { "epoch": 0.7913858845535864, "grad_norm": 0.9797436597280574, "learning_rate": 2.1963759828688645e-06, "loss": 0.8119, "step": 4125 }, { "epoch": 0.7915777356771145, "grad_norm": 0.8412655042812739, "learning_rate": 2.1924911906897904e-06, "loss": 0.7475, "step": 4126 }, { "epoch": 0.7917695868006427, "grad_norm": 0.8574420695156819, "learning_rate": 2.188609414084052e-06, "loss": 0.6636, "step": 4127 }, { "epoch": 0.7919614379241708, "grad_norm": 0.6363716604313344, "learning_rate": 2.184730654550947e-06, "loss": 0.3276, "step": 4128 }, { "epoch": 0.792153289047699, "grad_norm": 1.0539810911752208, "learning_rate": 2.1808549135886083e-06, "loss": 0.7612, "step": 4129 }, { "epoch": 0.7923451401712271, "grad_norm": 0.9859707584143846, "learning_rate": 2.1769821926940006e-06, "loss": 0.7713, "step": 4130 }, { "epoch": 0.7925369912947553, "grad_norm": 0.9041868855423406, "learning_rate": 2.173112493362927e-06, "loss": 0.7438, "step": 4131 }, { "epoch": 0.7927288424182835, "grad_norm": 1.0746761518688475, "learning_rate": 2.16924581709002e-06, "loss": 0.8278, "step": 4132 }, { "epoch": 0.7929206935418116, "grad_norm": 1.1328751975175742, "learning_rate": 2.16538216536874e-06, "loss": 0.6889, "step": 4133 }, { "epoch": 0.7931125446653396, "grad_norm": 1.0206266389841208, "learning_rate": 2.1615215396913935e-06, "loss": 0.7688, "step": 4134 }, { "epoch": 0.7933043957888678, "grad_norm": 0.8333184715375584, "learning_rate": 2.1576639415491053e-06, "loss": 0.7355, "step": 4135 }, { "epoch": 0.793496246912396, "grad_norm": 0.8867057000804519, "learning_rate": 2.153809372431833e-06, "loss": 0.7317, "step": 4136 }, { "epoch": 0.7936880980359241, "grad_norm": 0.9153157813866225, "learning_rate": 2.1499578338283677e-06, "loss": 0.7182, "step": 4137 }, { "epoch": 0.7938799491594523, "grad_norm": 1.0833397658014998, "learning_rate": 2.1461093272263244e-06, "loss": 0.737, "step": 4138 }, { "epoch": 0.7940718002829804, "grad_norm": 0.9511911583082822, "learning_rate": 2.1422638541121567e-06, "loss": 0.7517, "step": 4139 }, { "epoch": 0.7942636514065086, "grad_norm": 0.9201442639949047, "learning_rate": 2.138421415971138e-06, "loss": 0.8026, "step": 4140 }, { "epoch": 0.7944555025300367, "grad_norm": 0.8521358376098543, "learning_rate": 2.134582014287373e-06, "loss": 0.7281, "step": 4141 }, { "epoch": 0.7946473536535649, "grad_norm": 0.6263230542875745, "learning_rate": 2.1307456505437918e-06, "loss": 0.3145, "step": 4142 }, { "epoch": 0.794839204777093, "grad_norm": 0.8602736002409623, "learning_rate": 2.1269123262221513e-06, "loss": 0.7888, "step": 4143 }, { "epoch": 0.7950310559006211, "grad_norm": 0.6824643682070911, "learning_rate": 2.1230820428030376e-06, "loss": 0.3136, "step": 4144 }, { "epoch": 0.7952229070241492, "grad_norm": 1.0049847074262261, "learning_rate": 2.1192548017658576e-06, "loss": 0.7463, "step": 4145 }, { "epoch": 0.7954147581476774, "grad_norm": 0.6252899168700489, "learning_rate": 2.115430604588846e-06, "loss": 0.3291, "step": 4146 }, { "epoch": 0.7956066092712055, "grad_norm": 0.6211796866179463, "learning_rate": 2.1116094527490594e-06, "loss": 0.2815, "step": 4147 }, { "epoch": 0.7957984603947337, "grad_norm": 1.0540991800119, "learning_rate": 2.1077913477223853e-06, "loss": 0.8405, "step": 4148 }, { "epoch": 0.7959903115182618, "grad_norm": 1.1872748741572106, "learning_rate": 2.103976290983526e-06, "loss": 0.7516, "step": 4149 }, { "epoch": 0.79618216264179, "grad_norm": 0.8843480807902897, "learning_rate": 2.10016428400601e-06, "loss": 0.7393, "step": 4150 }, { "epoch": 0.7963740137653181, "grad_norm": 0.9171867617237975, "learning_rate": 2.096355328262186e-06, "loss": 0.7436, "step": 4151 }, { "epoch": 0.7965658648888463, "grad_norm": 0.9084834300358008, "learning_rate": 2.092549425223225e-06, "loss": 0.6817, "step": 4152 }, { "epoch": 0.7967577160123744, "grad_norm": 0.9071279079311464, "learning_rate": 2.088746576359123e-06, "loss": 0.7297, "step": 4153 }, { "epoch": 0.7969495671359026, "grad_norm": 0.8780270062210735, "learning_rate": 2.084946783138693e-06, "loss": 0.7419, "step": 4154 }, { "epoch": 0.7971414182594306, "grad_norm": 0.8765378685199796, "learning_rate": 2.0811500470295642e-06, "loss": 0.7719, "step": 4155 }, { "epoch": 0.7973332693829588, "grad_norm": 0.9197433393025677, "learning_rate": 2.0773563694981903e-06, "loss": 0.7263, "step": 4156 }, { "epoch": 0.7975251205064869, "grad_norm": 0.8753629319913415, "learning_rate": 2.073565752009842e-06, "loss": 0.7753, "step": 4157 }, { "epoch": 0.7977169716300151, "grad_norm": 0.9252390023625604, "learning_rate": 2.069778196028608e-06, "loss": 0.7809, "step": 4158 }, { "epoch": 0.7979088227535432, "grad_norm": 0.8718848601929311, "learning_rate": 2.0659937030173937e-06, "loss": 0.7025, "step": 4159 }, { "epoch": 0.7981006738770714, "grad_norm": 1.09932086240011, "learning_rate": 2.0622122744379226e-06, "loss": 0.802, "step": 4160 }, { "epoch": 0.7982925250005996, "grad_norm": 0.9288999905728741, "learning_rate": 2.0584339117507346e-06, "loss": 0.7439, "step": 4161 }, { "epoch": 0.7984843761241277, "grad_norm": 1.0129390438779162, "learning_rate": 2.0546586164151827e-06, "loss": 0.7554, "step": 4162 }, { "epoch": 0.7986762272476559, "grad_norm": 0.9222082299066424, "learning_rate": 2.0508863898894416e-06, "loss": 0.7315, "step": 4163 }, { "epoch": 0.798868078371184, "grad_norm": 0.9251860770055451, "learning_rate": 2.0471172336304945e-06, "loss": 0.7583, "step": 4164 }, { "epoch": 0.799059929494712, "grad_norm": 0.9143429011298233, "learning_rate": 2.0433511490941404e-06, "loss": 0.7163, "step": 4165 }, { "epoch": 0.7992517806182402, "grad_norm": 0.7572122215550003, "learning_rate": 2.0395881377349933e-06, "loss": 0.6962, "step": 4166 }, { "epoch": 0.7994436317417684, "grad_norm": 0.9601604401436797, "learning_rate": 2.0358282010064766e-06, "loss": 0.7705, "step": 4167 }, { "epoch": 0.7996354828652965, "grad_norm": 0.6477435160051113, "learning_rate": 2.0320713403608337e-06, "loss": 0.2779, "step": 4168 }, { "epoch": 0.7998273339888247, "grad_norm": 0.9356332549760397, "learning_rate": 2.028317557249112e-06, "loss": 0.7687, "step": 4169 }, { "epoch": 0.8000191851123528, "grad_norm": 0.981079636315522, "learning_rate": 2.0245668531211736e-06, "loss": 0.7631, "step": 4170 }, { "epoch": 0.800211036235881, "grad_norm": 0.9717754840088797, "learning_rate": 2.020819229425691e-06, "loss": 0.7436, "step": 4171 }, { "epoch": 0.8004028873594091, "grad_norm": 0.9216030675134955, "learning_rate": 2.0170746876101465e-06, "loss": 0.7588, "step": 4172 }, { "epoch": 0.8005947384829373, "grad_norm": 0.6615599250389169, "learning_rate": 2.013333229120834e-06, "loss": 0.2954, "step": 4173 }, { "epoch": 0.8007865896064654, "grad_norm": 1.0369887011204093, "learning_rate": 2.0095948554028534e-06, "loss": 0.7739, "step": 4174 }, { "epoch": 0.8009784407299936, "grad_norm": 0.9235521116504001, "learning_rate": 2.005859567900115e-06, "loss": 0.7725, "step": 4175 }, { "epoch": 0.8011702918535216, "grad_norm": 0.8442910195606407, "learning_rate": 2.0021273680553367e-06, "loss": 0.797, "step": 4176 }, { "epoch": 0.8013621429770498, "grad_norm": 0.9160440352376091, "learning_rate": 1.9983982573100413e-06, "loss": 0.7677, "step": 4177 }, { "epoch": 0.8015539941005779, "grad_norm": 0.9156824377741399, "learning_rate": 1.9946722371045668e-06, "loss": 0.7639, "step": 4178 }, { "epoch": 0.8017458452241061, "grad_norm": 0.959283937982969, "learning_rate": 1.9909493088780496e-06, "loss": 0.7414, "step": 4179 }, { "epoch": 0.8019376963476342, "grad_norm": 0.8457039484140649, "learning_rate": 1.987229474068433e-06, "loss": 0.7604, "step": 4180 }, { "epoch": 0.8021295474711624, "grad_norm": 0.8419333404089843, "learning_rate": 1.983512734112466e-06, "loss": 0.7397, "step": 4181 }, { "epoch": 0.8023213985946905, "grad_norm": 0.8082097722808629, "learning_rate": 1.979799090445702e-06, "loss": 0.747, "step": 4182 }, { "epoch": 0.8025132497182187, "grad_norm": 0.9048351032046822, "learning_rate": 1.9760885445025035e-06, "loss": 0.7264, "step": 4183 }, { "epoch": 0.8027051008417468, "grad_norm": 0.9013616339590955, "learning_rate": 1.9723810977160294e-06, "loss": 0.7341, "step": 4184 }, { "epoch": 0.802896951965275, "grad_norm": 0.925744049686874, "learning_rate": 1.9686767515182448e-06, "loss": 0.7872, "step": 4185 }, { "epoch": 0.803088803088803, "grad_norm": 0.6277254266222394, "learning_rate": 1.964975507339917e-06, "loss": 0.2833, "step": 4186 }, { "epoch": 0.8032806542123312, "grad_norm": 0.9773403575726873, "learning_rate": 1.9612773666106155e-06, "loss": 0.7836, "step": 4187 }, { "epoch": 0.8034725053358593, "grad_norm": 0.9303013158858056, "learning_rate": 1.9575823307587096e-06, "loss": 0.8116, "step": 4188 }, { "epoch": 0.8036643564593875, "grad_norm": 1.1728873149507895, "learning_rate": 1.9538904012113714e-06, "loss": 0.7394, "step": 4189 }, { "epoch": 0.8038562075829156, "grad_norm": 0.8951479385757521, "learning_rate": 1.9502015793945715e-06, "loss": 0.7467, "step": 4190 }, { "epoch": 0.8040480587064438, "grad_norm": 0.9757751294433427, "learning_rate": 1.9465158667330796e-06, "loss": 0.8152, "step": 4191 }, { "epoch": 0.804239909829972, "grad_norm": 0.9240194653425071, "learning_rate": 1.94283326465047e-06, "loss": 0.717, "step": 4192 }, { "epoch": 0.8044317609535001, "grad_norm": 0.8837422098717125, "learning_rate": 1.9391537745691102e-06, "loss": 0.746, "step": 4193 }, { "epoch": 0.8046236120770283, "grad_norm": 0.843956584036954, "learning_rate": 1.935477397910165e-06, "loss": 0.7008, "step": 4194 }, { "epoch": 0.8048154632005564, "grad_norm": 0.8717816925135647, "learning_rate": 1.9318041360936e-06, "loss": 0.7553, "step": 4195 }, { "epoch": 0.8050073143240845, "grad_norm": 0.6724378657105302, "learning_rate": 1.9281339905381735e-06, "loss": 0.3382, "step": 4196 }, { "epoch": 0.8051991654476126, "grad_norm": 0.8991345358490429, "learning_rate": 1.9244669626614478e-06, "loss": 0.8133, "step": 4197 }, { "epoch": 0.8053910165711408, "grad_norm": 0.6285702601875727, "learning_rate": 1.920803053879775e-06, "loss": 0.3262, "step": 4198 }, { "epoch": 0.8055828676946689, "grad_norm": 0.844077059446261, "learning_rate": 1.9171422656083015e-06, "loss": 0.6963, "step": 4199 }, { "epoch": 0.8057747188181971, "grad_norm": 1.0034633282032455, "learning_rate": 1.913484599260973e-06, "loss": 0.7072, "step": 4200 }, { "epoch": 0.8059665699417252, "grad_norm": 0.9649431476794436, "learning_rate": 1.9098300562505266e-06, "loss": 0.7984, "step": 4201 }, { "epoch": 0.8061584210652534, "grad_norm": 0.8671692677771101, "learning_rate": 1.9061786379884927e-06, "loss": 0.697, "step": 4202 }, { "epoch": 0.8063502721887815, "grad_norm": 1.090021595039206, "learning_rate": 1.9025303458851952e-06, "loss": 0.7802, "step": 4203 }, { "epoch": 0.8065421233123097, "grad_norm": 0.6282308148335557, "learning_rate": 1.8988851813497523e-06, "loss": 0.3003, "step": 4204 }, { "epoch": 0.8067339744358378, "grad_norm": 0.9445982553882625, "learning_rate": 1.895243145790072e-06, "loss": 0.8217, "step": 4205 }, { "epoch": 0.806925825559366, "grad_norm": 0.8354541126925991, "learning_rate": 1.8916042406128533e-06, "loss": 0.7544, "step": 4206 }, { "epoch": 0.807117676682894, "grad_norm": 0.9988046809594555, "learning_rate": 1.887968467223591e-06, "loss": 0.7187, "step": 4207 }, { "epoch": 0.8073095278064222, "grad_norm": 0.8700795824096093, "learning_rate": 1.8843358270265644e-06, "loss": 0.7755, "step": 4208 }, { "epoch": 0.8075013789299503, "grad_norm": 0.6181975281046364, "learning_rate": 1.8807063214248467e-06, "loss": 0.2997, "step": 4209 }, { "epoch": 0.8076932300534785, "grad_norm": 0.9243075790482081, "learning_rate": 1.8770799518202964e-06, "loss": 0.7476, "step": 4210 }, { "epoch": 0.8078850811770066, "grad_norm": 0.8925407655168333, "learning_rate": 1.8734567196135622e-06, "loss": 0.7121, "step": 4211 }, { "epoch": 0.8080769323005348, "grad_norm": 1.1144039527197305, "learning_rate": 1.8698366262040858e-06, "loss": 0.7548, "step": 4212 }, { "epoch": 0.8082687834240629, "grad_norm": 0.907244188284157, "learning_rate": 1.8662196729900905e-06, "loss": 0.7687, "step": 4213 }, { "epoch": 0.8084606345475911, "grad_norm": 0.6223631914518453, "learning_rate": 1.8626058613685882e-06, "loss": 0.2757, "step": 4214 }, { "epoch": 0.8086524856711192, "grad_norm": 1.110489269998428, "learning_rate": 1.85899519273538e-06, "loss": 0.816, "step": 4215 }, { "epoch": 0.8088443367946474, "grad_norm": 0.7998824947207365, "learning_rate": 1.8553876684850492e-06, "loss": 0.7787, "step": 4216 }, { "epoch": 0.8090361879181754, "grad_norm": 0.9634226344239197, "learning_rate": 1.8517832900109667e-06, "loss": 0.7233, "step": 4217 }, { "epoch": 0.8092280390417036, "grad_norm": 0.8913307546385988, "learning_rate": 1.848182058705288e-06, "loss": 0.7753, "step": 4218 }, { "epoch": 0.8094198901652317, "grad_norm": 0.981053797772542, "learning_rate": 1.8445839759589545e-06, "loss": 0.785, "step": 4219 }, { "epoch": 0.8096117412887599, "grad_norm": 0.9572704512367566, "learning_rate": 1.8409890431616895e-06, "loss": 0.7734, "step": 4220 }, { "epoch": 0.809803592412288, "grad_norm": 1.013621377855103, "learning_rate": 1.8373972617019964e-06, "loss": 0.7415, "step": 4221 }, { "epoch": 0.8099954435358162, "grad_norm": 0.8875075336737177, "learning_rate": 1.8338086329671734e-06, "loss": 0.7525, "step": 4222 }, { "epoch": 0.8101872946593444, "grad_norm": 0.8601481854212492, "learning_rate": 1.8302231583432883e-06, "loss": 0.7015, "step": 4223 }, { "epoch": 0.8103791457828725, "grad_norm": 0.9671609176894445, "learning_rate": 1.8266408392151957e-06, "loss": 0.8939, "step": 4224 }, { "epoch": 0.8105709969064007, "grad_norm": 0.9657217270363271, "learning_rate": 1.8230616769665278e-06, "loss": 0.7659, "step": 4225 }, { "epoch": 0.8107628480299288, "grad_norm": 1.018563441830787, "learning_rate": 1.8194856729797084e-06, "loss": 0.7786, "step": 4226 }, { "epoch": 0.810954699153457, "grad_norm": 0.862110343667887, "learning_rate": 1.8159128286359284e-06, "loss": 0.7733, "step": 4227 }, { "epoch": 0.811146550276985, "grad_norm": 0.9931018414160524, "learning_rate": 1.8123431453151652e-06, "loss": 0.7025, "step": 4228 }, { "epoch": 0.8113384014005132, "grad_norm": 0.9082861555755263, "learning_rate": 1.8087766243961758e-06, "loss": 0.7123, "step": 4229 }, { "epoch": 0.8115302525240413, "grad_norm": 0.8425680820032317, "learning_rate": 1.8052132672564859e-06, "loss": 0.7828, "step": 4230 }, { "epoch": 0.8117221036475695, "grad_norm": 0.9166042994929295, "learning_rate": 1.801653075272416e-06, "loss": 0.7768, "step": 4231 }, { "epoch": 0.8119139547710976, "grad_norm": 0.8595390513725785, "learning_rate": 1.7980960498190514e-06, "loss": 0.8084, "step": 4232 }, { "epoch": 0.8121058058946258, "grad_norm": 1.0185123540017789, "learning_rate": 1.7945421922702589e-06, "loss": 0.7675, "step": 4233 }, { "epoch": 0.8122976570181539, "grad_norm": 0.6180321395781558, "learning_rate": 1.7909915039986803e-06, "loss": 0.3318, "step": 4234 }, { "epoch": 0.8124895081416821, "grad_norm": 0.6605882415788863, "learning_rate": 1.787443986375732e-06, "loss": 0.3074, "step": 4235 }, { "epoch": 0.8126813592652102, "grad_norm": 0.967378273109867, "learning_rate": 1.7838996407716124e-06, "loss": 0.7654, "step": 4236 }, { "epoch": 0.8128732103887384, "grad_norm": 0.9472542008968625, "learning_rate": 1.7803584685552877e-06, "loss": 0.7513, "step": 4237 }, { "epoch": 0.8130650615122664, "grad_norm": 1.0407245691523692, "learning_rate": 1.7768204710945002e-06, "loss": 0.7595, "step": 4238 }, { "epoch": 0.8132569126357946, "grad_norm": 0.8863621931790115, "learning_rate": 1.773285649755767e-06, "loss": 0.6997, "step": 4239 }, { "epoch": 0.8134487637593227, "grad_norm": 0.8372455164822793, "learning_rate": 1.769754005904375e-06, "loss": 0.7861, "step": 4240 }, { "epoch": 0.8136406148828509, "grad_norm": 1.0766119332763433, "learning_rate": 1.7662255409043938e-06, "loss": 0.787, "step": 4241 }, { "epoch": 0.813832466006379, "grad_norm": 0.8212257568650438, "learning_rate": 1.7627002561186534e-06, "loss": 0.7478, "step": 4242 }, { "epoch": 0.8140243171299072, "grad_norm": 0.9767536033210432, "learning_rate": 1.7591781529087636e-06, "loss": 0.6974, "step": 4243 }, { "epoch": 0.8142161682534353, "grad_norm": 0.936185852808815, "learning_rate": 1.7556592326350974e-06, "loss": 0.7517, "step": 4244 }, { "epoch": 0.8144080193769635, "grad_norm": 0.9256808002171266, "learning_rate": 1.7521434966568028e-06, "loss": 0.7756, "step": 4245 }, { "epoch": 0.8145998705004917, "grad_norm": 0.9083688468257758, "learning_rate": 1.748630946331804e-06, "loss": 0.7737, "step": 4246 }, { "epoch": 0.8147917216240198, "grad_norm": 0.8015686581500376, "learning_rate": 1.7451215830167845e-06, "loss": 0.6941, "step": 4247 }, { "epoch": 0.814983572747548, "grad_norm": 0.8889032002199468, "learning_rate": 1.7416154080672031e-06, "loss": 0.7591, "step": 4248 }, { "epoch": 0.815175423871076, "grad_norm": 1.216138294684814, "learning_rate": 1.7381124228372848e-06, "loss": 0.7666, "step": 4249 }, { "epoch": 0.8153672749946042, "grad_norm": 1.0035041278391617, "learning_rate": 1.7346126286800202e-06, "loss": 0.6899, "step": 4250 }, { "epoch": 0.8155591261181323, "grad_norm": 0.966585032898605, "learning_rate": 1.731116026947175e-06, "loss": 0.7846, "step": 4251 }, { "epoch": 0.8157509772416605, "grad_norm": 1.0197067704675409, "learning_rate": 1.7276226189892763e-06, "loss": 0.7213, "step": 4252 }, { "epoch": 0.8159428283651886, "grad_norm": 0.8991173684467543, "learning_rate": 1.7241324061556164e-06, "loss": 0.7667, "step": 4253 }, { "epoch": 0.8161346794887168, "grad_norm": 1.0935116197291095, "learning_rate": 1.7206453897942576e-06, "loss": 0.7659, "step": 4254 }, { "epoch": 0.8163265306122449, "grad_norm": 0.9650082443479747, "learning_rate": 1.7171615712520218e-06, "loss": 0.7379, "step": 4255 }, { "epoch": 0.8165183817357731, "grad_norm": 0.9447294333312312, "learning_rate": 1.713680951874508e-06, "loss": 0.7869, "step": 4256 }, { "epoch": 0.8167102328593012, "grad_norm": 1.0144380116908636, "learning_rate": 1.7102035330060617e-06, "loss": 0.711, "step": 4257 }, { "epoch": 0.8169020839828294, "grad_norm": 1.001912800009289, "learning_rate": 1.7067293159898067e-06, "loss": 0.7917, "step": 4258 }, { "epoch": 0.8170939351063574, "grad_norm": 1.021951163890505, "learning_rate": 1.703258302167624e-06, "loss": 0.7292, "step": 4259 }, { "epoch": 0.8172857862298856, "grad_norm": 0.9894837004945511, "learning_rate": 1.6997904928801556e-06, "loss": 0.7654, "step": 4260 }, { "epoch": 0.8174776373534137, "grad_norm": 0.8832935049819942, "learning_rate": 1.6963258894668134e-06, "loss": 0.7717, "step": 4261 }, { "epoch": 0.8176694884769419, "grad_norm": 0.9303118949529631, "learning_rate": 1.6928644932657657e-06, "loss": 0.7065, "step": 4262 }, { "epoch": 0.81786133960047, "grad_norm": 0.8233774234941607, "learning_rate": 1.6894063056139398e-06, "loss": 0.6995, "step": 4263 }, { "epoch": 0.8180531907239982, "grad_norm": 0.9445744348352585, "learning_rate": 1.6859513278470274e-06, "loss": 0.7454, "step": 4264 }, { "epoch": 0.8182450418475263, "grad_norm": 0.9113233210561168, "learning_rate": 1.682499561299482e-06, "loss": 0.7438, "step": 4265 }, { "epoch": 0.8184368929710545, "grad_norm": 0.9182027571690241, "learning_rate": 1.679051007304513e-06, "loss": 0.7925, "step": 4266 }, { "epoch": 0.8186287440945826, "grad_norm": 1.0992772855046116, "learning_rate": 1.6756056671940902e-06, "loss": 0.6736, "step": 4267 }, { "epoch": 0.8188205952181108, "grad_norm": 0.9392984421217938, "learning_rate": 1.6721635422989414e-06, "loss": 0.7715, "step": 4268 }, { "epoch": 0.8190124463416388, "grad_norm": 1.0808511893117376, "learning_rate": 1.668724633948553e-06, "loss": 0.7121, "step": 4269 }, { "epoch": 0.819204297465167, "grad_norm": 0.8463986908815916, "learning_rate": 1.665288943471175e-06, "loss": 0.7493, "step": 4270 }, { "epoch": 0.8193961485886951, "grad_norm": 1.0196319183811013, "learning_rate": 1.6618564721938024e-06, "loss": 0.7565, "step": 4271 }, { "epoch": 0.8195879997122233, "grad_norm": 0.8722724261274903, "learning_rate": 1.658427221442196e-06, "loss": 0.7304, "step": 4272 }, { "epoch": 0.8197798508357514, "grad_norm": 0.8645024920043235, "learning_rate": 1.6550011925408694e-06, "loss": 0.7959, "step": 4273 }, { "epoch": 0.8199717019592796, "grad_norm": 0.8467082509245281, "learning_rate": 1.6515783868130908e-06, "loss": 0.7668, "step": 4274 }, { "epoch": 0.8201635530828078, "grad_norm": 1.0198362397048144, "learning_rate": 1.6481588055808894e-06, "loss": 0.724, "step": 4275 }, { "epoch": 0.8203554042063359, "grad_norm": 1.0815820104727656, "learning_rate": 1.6447424501650432e-06, "loss": 0.7487, "step": 4276 }, { "epoch": 0.8205472553298641, "grad_norm": 0.9424548273927058, "learning_rate": 1.6413293218850857e-06, "loss": 0.7678, "step": 4277 }, { "epoch": 0.8207391064533922, "grad_norm": 1.0387628537520182, "learning_rate": 1.637919422059303e-06, "loss": 0.7417, "step": 4278 }, { "epoch": 0.8209309575769204, "grad_norm": 0.9266264192003888, "learning_rate": 1.6345127520047333e-06, "loss": 0.7425, "step": 4279 }, { "epoch": 0.8211228087004484, "grad_norm": 0.885993771529705, "learning_rate": 1.6311093130371747e-06, "loss": 0.7821, "step": 4280 }, { "epoch": 0.8213146598239766, "grad_norm": 0.9282149597860657, "learning_rate": 1.6277091064711692e-06, "loss": 0.7673, "step": 4281 }, { "epoch": 0.8215065109475047, "grad_norm": 0.9124267671105892, "learning_rate": 1.624312133620013e-06, "loss": 0.7573, "step": 4282 }, { "epoch": 0.8216983620710329, "grad_norm": 0.9808185071564421, "learning_rate": 1.6209183957957541e-06, "loss": 0.697, "step": 4283 }, { "epoch": 0.821890213194561, "grad_norm": 0.8887234766188237, "learning_rate": 1.6175278943091888e-06, "loss": 0.7651, "step": 4284 }, { "epoch": 0.8220820643180892, "grad_norm": 0.6371549719811245, "learning_rate": 1.614140630469867e-06, "loss": 0.3059, "step": 4285 }, { "epoch": 0.8222739154416173, "grad_norm": 0.8698036610351338, "learning_rate": 1.6107566055860834e-06, "loss": 0.7926, "step": 4286 }, { "epoch": 0.8224657665651455, "grad_norm": 0.6046876664192394, "learning_rate": 1.607375820964887e-06, "loss": 0.3115, "step": 4287 }, { "epoch": 0.8226576176886736, "grad_norm": 1.1402588539453755, "learning_rate": 1.60399827791207e-06, "loss": 0.7534, "step": 4288 }, { "epoch": 0.8228494688122018, "grad_norm": 0.9302158554257085, "learning_rate": 1.6006239777321742e-06, "loss": 0.7287, "step": 4289 }, { "epoch": 0.8230413199357298, "grad_norm": 0.6316035149199368, "learning_rate": 1.5972529217284949e-06, "loss": 0.2977, "step": 4290 }, { "epoch": 0.823233171059258, "grad_norm": 0.8708374644051059, "learning_rate": 1.5938851112030651e-06, "loss": 0.7758, "step": 4291 }, { "epoch": 0.8234250221827861, "grad_norm": 1.0021855744491521, "learning_rate": 1.590520547456671e-06, "loss": 0.7707, "step": 4292 }, { "epoch": 0.8236168733063143, "grad_norm": 0.9507166245551465, "learning_rate": 1.5871592317888395e-06, "loss": 0.7742, "step": 4293 }, { "epoch": 0.8238087244298424, "grad_norm": 0.8519207874219937, "learning_rate": 1.5838011654978458e-06, "loss": 0.7424, "step": 4294 }, { "epoch": 0.8240005755533706, "grad_norm": 0.9567823286932946, "learning_rate": 1.580446349880712e-06, "loss": 0.7511, "step": 4295 }, { "epoch": 0.8241924266768987, "grad_norm": 1.0669808549223105, "learning_rate": 1.577094786233202e-06, "loss": 0.8193, "step": 4296 }, { "epoch": 0.8243842778004269, "grad_norm": 1.0301080812202743, "learning_rate": 1.5737464758498243e-06, "loss": 0.7671, "step": 4297 }, { "epoch": 0.824576128923955, "grad_norm": 0.969177510278237, "learning_rate": 1.5704014200238305e-06, "loss": 0.8142, "step": 4298 }, { "epoch": 0.8247679800474832, "grad_norm": 0.956442736691097, "learning_rate": 1.5670596200472143e-06, "loss": 0.7579, "step": 4299 }, { "epoch": 0.8249598311710113, "grad_norm": 0.8588725274601118, "learning_rate": 1.5637210772107148e-06, "loss": 0.7252, "step": 4300 }, { "epoch": 0.8251516822945394, "grad_norm": 1.0786717621541584, "learning_rate": 1.5603857928038102e-06, "loss": 0.7524, "step": 4301 }, { "epoch": 0.8253435334180675, "grad_norm": 0.9637257859112377, "learning_rate": 1.5570537681147214e-06, "loss": 0.7032, "step": 4302 }, { "epoch": 0.8255353845415957, "grad_norm": 1.0283521550791002, "learning_rate": 1.5537250044304076e-06, "loss": 0.6815, "step": 4303 }, { "epoch": 0.8257272356651238, "grad_norm": 1.0899598439868816, "learning_rate": 1.550399503036576e-06, "loss": 0.7078, "step": 4304 }, { "epoch": 0.825919086788652, "grad_norm": 0.9770160492391581, "learning_rate": 1.547077265217667e-06, "loss": 0.7631, "step": 4305 }, { "epoch": 0.8261109379121802, "grad_norm": 1.0352144093870739, "learning_rate": 1.5437582922568607e-06, "loss": 0.8147, "step": 4306 }, { "epoch": 0.8263027890357083, "grad_norm": 0.9269892036279069, "learning_rate": 1.5404425854360783e-06, "loss": 0.7252, "step": 4307 }, { "epoch": 0.8264946401592365, "grad_norm": 0.9170705987090536, "learning_rate": 1.5371301460359777e-06, "loss": 0.7983, "step": 4308 }, { "epoch": 0.8266864912827646, "grad_norm": 0.9613252052615929, "learning_rate": 1.5338209753359589e-06, "loss": 0.7389, "step": 4309 }, { "epoch": 0.8268783424062928, "grad_norm": 0.9032964142931548, "learning_rate": 1.530515074614155e-06, "loss": 0.776, "step": 4310 }, { "epoch": 0.8270701935298208, "grad_norm": 0.9478788783330039, "learning_rate": 1.5272124451474378e-06, "loss": 0.7906, "step": 4311 }, { "epoch": 0.827262044653349, "grad_norm": 1.0567959143847707, "learning_rate": 1.523913088211415e-06, "loss": 0.7356, "step": 4312 }, { "epoch": 0.8274538957768771, "grad_norm": 0.9671217815828272, "learning_rate": 1.5206170050804303e-06, "loss": 0.7469, "step": 4313 }, { "epoch": 0.8276457469004053, "grad_norm": 0.8943546847006756, "learning_rate": 1.5173241970275642e-06, "loss": 0.6758, "step": 4314 }, { "epoch": 0.8278375980239334, "grad_norm": 0.8282856561734788, "learning_rate": 1.5140346653246308e-06, "loss": 0.7796, "step": 4315 }, { "epoch": 0.8280294491474616, "grad_norm": 0.8894681807747947, "learning_rate": 1.51074841124218e-06, "loss": 0.7597, "step": 4316 }, { "epoch": 0.8282213002709897, "grad_norm": 0.9079170297753669, "learning_rate": 1.5074654360494934e-06, "loss": 0.7597, "step": 4317 }, { "epoch": 0.8284131513945179, "grad_norm": 1.0207587104631037, "learning_rate": 1.5041857410145878e-06, "loss": 0.7618, "step": 4318 }, { "epoch": 0.828605002518046, "grad_norm": 0.8971620254978271, "learning_rate": 1.5009093274042164e-06, "loss": 0.7547, "step": 4319 }, { "epoch": 0.8287968536415742, "grad_norm": 0.8766648089437936, "learning_rate": 1.4976361964838592e-06, "loss": 0.7281, "step": 4320 }, { "epoch": 0.8289887047651023, "grad_norm": 0.9747154462739372, "learning_rate": 1.4943663495177307e-06, "loss": 0.709, "step": 4321 }, { "epoch": 0.8291805558886304, "grad_norm": 0.999271868736543, "learning_rate": 1.4910997877687783e-06, "loss": 0.681, "step": 4322 }, { "epoch": 0.8293724070121585, "grad_norm": 0.9464956333547949, "learning_rate": 1.4878365124986748e-06, "loss": 0.7347, "step": 4323 }, { "epoch": 0.8295642581356867, "grad_norm": 0.9076506761300228, "learning_rate": 1.4845765249678356e-06, "loss": 0.6944, "step": 4324 }, { "epoch": 0.8297561092592148, "grad_norm": 0.9534704110882652, "learning_rate": 1.4813198264353934e-06, "loss": 0.6432, "step": 4325 }, { "epoch": 0.829947960382743, "grad_norm": 0.8560386782608076, "learning_rate": 1.4780664181592185e-06, "loss": 0.7689, "step": 4326 }, { "epoch": 0.8301398115062711, "grad_norm": 0.656097869412413, "learning_rate": 1.474816301395906e-06, "loss": 0.3027, "step": 4327 }, { "epoch": 0.8303316626297993, "grad_norm": 1.308661371358647, "learning_rate": 1.4715694774007827e-06, "loss": 0.7539, "step": 4328 }, { "epoch": 0.8305235137533274, "grad_norm": 0.9812627491194791, "learning_rate": 1.4683259474279011e-06, "loss": 0.7322, "step": 4329 }, { "epoch": 0.8307153648768556, "grad_norm": 0.6387597369545638, "learning_rate": 1.4650857127300444e-06, "loss": 0.2833, "step": 4330 }, { "epoch": 0.8309072160003838, "grad_norm": 0.9942082931012501, "learning_rate": 1.4618487745587195e-06, "loss": 0.7112, "step": 4331 }, { "epoch": 0.8310990671239118, "grad_norm": 1.0808668579895397, "learning_rate": 1.458615134164163e-06, "loss": 0.7654, "step": 4332 }, { "epoch": 0.83129091824744, "grad_norm": 0.9770186772493878, "learning_rate": 1.4553847927953334e-06, "loss": 0.7194, "step": 4333 }, { "epoch": 0.8314827693709681, "grad_norm": 1.0022422841788854, "learning_rate": 1.452157751699924e-06, "loss": 0.7446, "step": 4334 }, { "epoch": 0.8316746204944963, "grad_norm": 0.9187621563433505, "learning_rate": 1.4489340121243446e-06, "loss": 0.7793, "step": 4335 }, { "epoch": 0.8318664716180244, "grad_norm": 0.848683022522571, "learning_rate": 1.445713575313733e-06, "loss": 0.7169, "step": 4336 }, { "epoch": 0.8320583227415526, "grad_norm": 0.8806692064646261, "learning_rate": 1.4424964425119513e-06, "loss": 0.7478, "step": 4337 }, { "epoch": 0.8322501738650807, "grad_norm": 0.9457253827320699, "learning_rate": 1.4392826149615835e-06, "loss": 0.7158, "step": 4338 }, { "epoch": 0.8324420249886089, "grad_norm": 0.6417865907648213, "learning_rate": 1.436072093903943e-06, "loss": 0.2695, "step": 4339 }, { "epoch": 0.832633876112137, "grad_norm": 0.8573535742967843, "learning_rate": 1.432864880579058e-06, "loss": 0.7425, "step": 4340 }, { "epoch": 0.8328257272356652, "grad_norm": 0.6550660557665281, "learning_rate": 1.4296609762256853e-06, "loss": 0.3085, "step": 4341 }, { "epoch": 0.8330175783591932, "grad_norm": 0.9385773484527677, "learning_rate": 1.4264603820813006e-06, "loss": 0.7763, "step": 4342 }, { "epoch": 0.8332094294827214, "grad_norm": 1.0063548423216233, "learning_rate": 1.4232630993821017e-06, "loss": 0.7648, "step": 4343 }, { "epoch": 0.8334012806062495, "grad_norm": 0.8579163456005474, "learning_rate": 1.4200691293630086e-06, "loss": 0.7408, "step": 4344 }, { "epoch": 0.8335931317297777, "grad_norm": 0.9221191705771803, "learning_rate": 1.416878473257659e-06, "loss": 0.7138, "step": 4345 }, { "epoch": 0.8337849828533058, "grad_norm": 0.8964136243866592, "learning_rate": 1.4136911322984126e-06, "loss": 0.7287, "step": 4346 }, { "epoch": 0.833976833976834, "grad_norm": 0.8837352059349257, "learning_rate": 1.410507107716348e-06, "loss": 0.7639, "step": 4347 }, { "epoch": 0.8341686851003621, "grad_norm": 0.9155099699104979, "learning_rate": 1.4073264007412657e-06, "loss": 0.7446, "step": 4348 }, { "epoch": 0.8343605362238903, "grad_norm": 0.6554553003966201, "learning_rate": 1.4041490126016811e-06, "loss": 0.3234, "step": 4349 }, { "epoch": 0.8345523873474184, "grad_norm": 1.038000060232625, "learning_rate": 1.4009749445248288e-06, "loss": 0.8334, "step": 4350 }, { "epoch": 0.8347442384709466, "grad_norm": 0.5953547899946653, "learning_rate": 1.397804197736663e-06, "loss": 0.2754, "step": 4351 }, { "epoch": 0.8349360895944747, "grad_norm": 0.9892193856721048, "learning_rate": 1.3946367734618481e-06, "loss": 0.725, "step": 4352 }, { "epoch": 0.8351279407180028, "grad_norm": 0.924641000289643, "learning_rate": 1.391472672923777e-06, "loss": 0.7253, "step": 4353 }, { "epoch": 0.8353197918415309, "grad_norm": 1.030544021851813, "learning_rate": 1.3883118973445497e-06, "loss": 0.8374, "step": 4354 }, { "epoch": 0.8355116429650591, "grad_norm": 1.082142588971962, "learning_rate": 1.3851544479449852e-06, "loss": 0.7378, "step": 4355 }, { "epoch": 0.8357034940885872, "grad_norm": 1.0841867096189812, "learning_rate": 1.3820003259446157e-06, "loss": 0.663, "step": 4356 }, { "epoch": 0.8358953452121154, "grad_norm": 0.9584198096499018, "learning_rate": 1.3788495325616912e-06, "loss": 0.7261, "step": 4357 }, { "epoch": 0.8360871963356435, "grad_norm": 0.8541116975095506, "learning_rate": 1.375702069013175e-06, "loss": 0.7551, "step": 4358 }, { "epoch": 0.8362790474591717, "grad_norm": 1.03425734168408, "learning_rate": 1.3725579365147412e-06, "loss": 0.7038, "step": 4359 }, { "epoch": 0.8364708985826999, "grad_norm": 0.9047166304562462, "learning_rate": 1.3694171362807828e-06, "loss": 0.7488, "step": 4360 }, { "epoch": 0.836662749706228, "grad_norm": 1.019542545367237, "learning_rate": 1.366279669524402e-06, "loss": 0.7749, "step": 4361 }, { "epoch": 0.8368546008297562, "grad_norm": 0.9963134568158726, "learning_rate": 1.363145537457411e-06, "loss": 0.8078, "step": 4362 }, { "epoch": 0.8370464519532842, "grad_norm": 1.0116671196778837, "learning_rate": 1.3600147412903442e-06, "loss": 0.7486, "step": 4363 }, { "epoch": 0.8372383030768124, "grad_norm": 0.5812811451289132, "learning_rate": 1.3568872822324363e-06, "loss": 0.2855, "step": 4364 }, { "epoch": 0.8374301542003405, "grad_norm": 0.9172170438507781, "learning_rate": 1.353763161491639e-06, "loss": 0.7653, "step": 4365 }, { "epoch": 0.8376220053238687, "grad_norm": 0.9378856187752421, "learning_rate": 1.3506423802746128e-06, "loss": 0.7678, "step": 4366 }, { "epoch": 0.8378138564473968, "grad_norm": 1.1221108981521615, "learning_rate": 1.3475249397867263e-06, "loss": 0.7163, "step": 4367 }, { "epoch": 0.838005707570925, "grad_norm": 0.903275049412431, "learning_rate": 1.3444108412320644e-06, "loss": 0.748, "step": 4368 }, { "epoch": 0.8381975586944531, "grad_norm": 0.8682159435809057, "learning_rate": 1.341300085813415e-06, "loss": 0.7835, "step": 4369 }, { "epoch": 0.8383894098179813, "grad_norm": 0.8955592607926756, "learning_rate": 1.338192674732276e-06, "loss": 0.7498, "step": 4370 }, { "epoch": 0.8385812609415094, "grad_norm": 1.0035935430256964, "learning_rate": 1.3350886091888583e-06, "loss": 0.7729, "step": 4371 }, { "epoch": 0.8387731120650376, "grad_norm": 1.0240950262368447, "learning_rate": 1.3319878903820682e-06, "loss": 0.7614, "step": 4372 }, { "epoch": 0.8389649631885657, "grad_norm": 0.966817879482393, "learning_rate": 1.3288905195095358e-06, "loss": 0.7327, "step": 4373 }, { "epoch": 0.8391568143120938, "grad_norm": 1.005910773255386, "learning_rate": 1.3257964977675874e-06, "loss": 0.7453, "step": 4374 }, { "epoch": 0.8393486654356219, "grad_norm": 0.9465869596432804, "learning_rate": 1.3227058263512572e-06, "loss": 0.7471, "step": 4375 }, { "epoch": 0.8395405165591501, "grad_norm": 1.084416071551489, "learning_rate": 1.3196185064542867e-06, "loss": 0.7212, "step": 4376 }, { "epoch": 0.8397323676826782, "grad_norm": 0.9388811660239298, "learning_rate": 1.3165345392691253e-06, "loss": 0.7557, "step": 4377 }, { "epoch": 0.8399242188062064, "grad_norm": 0.940062500317532, "learning_rate": 1.3134539259869228e-06, "loss": 0.7016, "step": 4378 }, { "epoch": 0.8401160699297345, "grad_norm": 0.8951929855138435, "learning_rate": 1.3103766677975372e-06, "loss": 0.7858, "step": 4379 }, { "epoch": 0.8403079210532627, "grad_norm": 0.8675666357311513, "learning_rate": 1.3073027658895266e-06, "loss": 0.7726, "step": 4380 }, { "epoch": 0.8404997721767908, "grad_norm": 0.9080904465459845, "learning_rate": 1.3042322214501557e-06, "loss": 0.7434, "step": 4381 }, { "epoch": 0.840691623300319, "grad_norm": 0.9388644731379108, "learning_rate": 1.3011650356653959e-06, "loss": 0.7716, "step": 4382 }, { "epoch": 0.8408834744238471, "grad_norm": 0.9446466824252613, "learning_rate": 1.2981012097199142e-06, "loss": 0.82, "step": 4383 }, { "epoch": 0.8410753255473752, "grad_norm": 1.0196501574194579, "learning_rate": 1.2950407447970837e-06, "loss": 0.752, "step": 4384 }, { "epoch": 0.8412671766709033, "grad_norm": 0.9744175436211062, "learning_rate": 1.2919836420789823e-06, "loss": 0.7414, "step": 4385 }, { "epoch": 0.8414590277944315, "grad_norm": 0.8674826928587225, "learning_rate": 1.2889299027463763e-06, "loss": 0.7139, "step": 4386 }, { "epoch": 0.8416508789179596, "grad_norm": 0.9487051954499719, "learning_rate": 1.2858795279787517e-06, "loss": 0.7095, "step": 4387 }, { "epoch": 0.8418427300414878, "grad_norm": 0.8844326798310979, "learning_rate": 1.2828325189542824e-06, "loss": 0.7403, "step": 4388 }, { "epoch": 0.842034581165016, "grad_norm": 0.8919070703743277, "learning_rate": 1.2797888768498456e-06, "loss": 0.7542, "step": 4389 }, { "epoch": 0.8422264322885441, "grad_norm": 1.0083179799781796, "learning_rate": 1.276748602841018e-06, "loss": 0.7234, "step": 4390 }, { "epoch": 0.8424182834120723, "grad_norm": 0.9288424578271836, "learning_rate": 1.2737116981020737e-06, "loss": 0.7958, "step": 4391 }, { "epoch": 0.8426101345356004, "grad_norm": 0.883310256478905, "learning_rate": 1.2706781638059917e-06, "loss": 0.7332, "step": 4392 }, { "epoch": 0.8428019856591286, "grad_norm": 0.9470513011691055, "learning_rate": 1.267648001124442e-06, "loss": 0.7593, "step": 4393 }, { "epoch": 0.8429938367826567, "grad_norm": 0.9117759498611084, "learning_rate": 1.2646212112277956e-06, "loss": 0.6564, "step": 4394 }, { "epoch": 0.8431856879061848, "grad_norm": 0.9291837669655212, "learning_rate": 1.2615977952851188e-06, "loss": 0.7293, "step": 4395 }, { "epoch": 0.8433775390297129, "grad_norm": 0.9354106265139308, "learning_rate": 1.2585777544641765e-06, "loss": 0.7333, "step": 4396 }, { "epoch": 0.8435693901532411, "grad_norm": 0.9773383269828421, "learning_rate": 1.2555610899314318e-06, "loss": 0.7165, "step": 4397 }, { "epoch": 0.8437612412767692, "grad_norm": 0.9052561331496262, "learning_rate": 1.2525478028520432e-06, "loss": 0.7346, "step": 4398 }, { "epoch": 0.8439530924002974, "grad_norm": 0.8158936030799474, "learning_rate": 1.2495378943898584e-06, "loss": 0.7048, "step": 4399 }, { "epoch": 0.8441449435238255, "grad_norm": 1.1085992558890632, "learning_rate": 1.2465313657074263e-06, "loss": 0.7643, "step": 4400 }, { "epoch": 0.8443367946473537, "grad_norm": 0.8906231906036717, "learning_rate": 1.243528217965988e-06, "loss": 0.7044, "step": 4401 }, { "epoch": 0.8445286457708818, "grad_norm": 0.9318243424455106, "learning_rate": 1.2405284523254823e-06, "loss": 0.8173, "step": 4402 }, { "epoch": 0.84472049689441, "grad_norm": 0.9418753986809999, "learning_rate": 1.2375320699445391e-06, "loss": 0.7173, "step": 4403 }, { "epoch": 0.8449123480179381, "grad_norm": 0.8801134806380528, "learning_rate": 1.2345390719804806e-06, "loss": 0.7632, "step": 4404 }, { "epoch": 0.8451041991414662, "grad_norm": 1.0503782462362397, "learning_rate": 1.2315494595893218e-06, "loss": 0.7493, "step": 4405 }, { "epoch": 0.8452960502649943, "grad_norm": 0.9441052132229762, "learning_rate": 1.2285632339257703e-06, "loss": 0.7303, "step": 4406 }, { "epoch": 0.8454879013885225, "grad_norm": 0.9677434590893963, "learning_rate": 1.2255803961432299e-06, "loss": 0.8043, "step": 4407 }, { "epoch": 0.8456797525120506, "grad_norm": 0.6074228633210468, "learning_rate": 1.2226009473937906e-06, "loss": 0.3188, "step": 4408 }, { "epoch": 0.8458716036355788, "grad_norm": 1.057231150606863, "learning_rate": 1.2196248888282348e-06, "loss": 0.8114, "step": 4409 }, { "epoch": 0.8460634547591069, "grad_norm": 1.0628967941510967, "learning_rate": 1.2166522215960352e-06, "loss": 0.7832, "step": 4410 }, { "epoch": 0.8462553058826351, "grad_norm": 0.637276266739842, "learning_rate": 1.2136829468453549e-06, "loss": 0.304, "step": 4411 }, { "epoch": 0.8464471570061632, "grad_norm": 0.8879682428449356, "learning_rate": 1.2107170657230515e-06, "loss": 0.7475, "step": 4412 }, { "epoch": 0.8466390081296914, "grad_norm": 0.9201285117109449, "learning_rate": 1.2077545793746626e-06, "loss": 0.7834, "step": 4413 }, { "epoch": 0.8468308592532195, "grad_norm": 0.9793780893223796, "learning_rate": 1.2047954889444191e-06, "loss": 0.6897, "step": 4414 }, { "epoch": 0.8470227103767476, "grad_norm": 0.9073474932245863, "learning_rate": 1.2018397955752414e-06, "loss": 0.7494, "step": 4415 }, { "epoch": 0.8472145615002757, "grad_norm": 0.9121545142608827, "learning_rate": 1.1988875004087385e-06, "loss": 0.7605, "step": 4416 }, { "epoch": 0.8474064126238039, "grad_norm": 0.9568702175532476, "learning_rate": 1.195938604585205e-06, "loss": 0.758, "step": 4417 }, { "epoch": 0.847598263747332, "grad_norm": 0.930604511897546, "learning_rate": 1.192993109243622e-06, "loss": 0.7366, "step": 4418 }, { "epoch": 0.8477901148708602, "grad_norm": 1.1204650183295395, "learning_rate": 1.1900510155216583e-06, "loss": 0.6956, "step": 4419 }, { "epoch": 0.8479819659943884, "grad_norm": 0.6131934845956133, "learning_rate": 1.187112324555667e-06, "loss": 0.3022, "step": 4420 }, { "epoch": 0.8481738171179165, "grad_norm": 0.9077109625990194, "learning_rate": 1.1841770374806915e-06, "loss": 0.7275, "step": 4421 }, { "epoch": 0.8483656682414447, "grad_norm": 0.9741882514686228, "learning_rate": 1.181245155430457e-06, "loss": 0.7423, "step": 4422 }, { "epoch": 0.8485575193649728, "grad_norm": 0.9321382093569964, "learning_rate": 1.1783166795373724e-06, "loss": 0.7358, "step": 4423 }, { "epoch": 0.848749370488501, "grad_norm": 0.9137080592854756, "learning_rate": 1.1753916109325336e-06, "loss": 0.7484, "step": 4424 }, { "epoch": 0.8489412216120291, "grad_norm": 0.9149951183895209, "learning_rate": 1.1724699507457194e-06, "loss": 0.7486, "step": 4425 }, { "epoch": 0.8491330727355572, "grad_norm": 0.8988549788955831, "learning_rate": 1.1695517001053913e-06, "loss": 0.7018, "step": 4426 }, { "epoch": 0.8493249238590853, "grad_norm": 0.8665789620398222, "learning_rate": 1.1666368601386968e-06, "loss": 0.7896, "step": 4427 }, { "epoch": 0.8495167749826135, "grad_norm": 0.9035158602911613, "learning_rate": 1.163725431971463e-06, "loss": 0.7107, "step": 4428 }, { "epoch": 0.8497086261061416, "grad_norm": 0.894969737769863, "learning_rate": 1.1608174167282004e-06, "loss": 0.7323, "step": 4429 }, { "epoch": 0.8499004772296698, "grad_norm": 0.8971383522729176, "learning_rate": 1.1579128155320984e-06, "loss": 0.7372, "step": 4430 }, { "epoch": 0.8500923283531979, "grad_norm": 0.9925160556750878, "learning_rate": 1.1550116295050363e-06, "loss": 0.721, "step": 4431 }, { "epoch": 0.8502841794767261, "grad_norm": 1.097975240897539, "learning_rate": 1.152113859767565e-06, "loss": 0.6607, "step": 4432 }, { "epoch": 0.8504760306002542, "grad_norm": 0.8652279720982038, "learning_rate": 1.1492195074389211e-06, "loss": 0.7018, "step": 4433 }, { "epoch": 0.8506678817237824, "grad_norm": 0.9676682065810913, "learning_rate": 1.1463285736370178e-06, "loss": 0.7453, "step": 4434 }, { "epoch": 0.8508597328473105, "grad_norm": 0.878305713235828, "learning_rate": 1.1434410594784494e-06, "loss": 0.7673, "step": 4435 }, { "epoch": 0.8510515839708386, "grad_norm": 0.8742923647599405, "learning_rate": 1.1405569660784932e-06, "loss": 0.7883, "step": 4436 }, { "epoch": 0.8512434350943667, "grad_norm": 0.959358525783699, "learning_rate": 1.1376762945510988e-06, "loss": 0.8013, "step": 4437 }, { "epoch": 0.8514352862178949, "grad_norm": 1.2642306371386551, "learning_rate": 1.1347990460088998e-06, "loss": 0.7819, "step": 4438 }, { "epoch": 0.851627137341423, "grad_norm": 0.7780209158134291, "learning_rate": 1.1319252215632026e-06, "loss": 0.701, "step": 4439 }, { "epoch": 0.8518189884649512, "grad_norm": 0.9301972204892178, "learning_rate": 1.1290548223239938e-06, "loss": 0.741, "step": 4440 }, { "epoch": 0.8520108395884793, "grad_norm": 0.9074891255593749, "learning_rate": 1.1261878493999389e-06, "loss": 0.7925, "step": 4441 }, { "epoch": 0.8522026907120075, "grad_norm": 0.974663826429861, "learning_rate": 1.1233243038983755e-06, "loss": 0.7376, "step": 4442 }, { "epoch": 0.8523945418355356, "grad_norm": 0.9948671945350999, "learning_rate": 1.1204641869253219e-06, "loss": 0.7985, "step": 4443 }, { "epoch": 0.8525863929590638, "grad_norm": 0.6329188367699562, "learning_rate": 1.117607499585468e-06, "loss": 0.3511, "step": 4444 }, { "epoch": 0.852778244082592, "grad_norm": 0.9314747375817183, "learning_rate": 1.1147542429821812e-06, "loss": 0.7672, "step": 4445 }, { "epoch": 0.8529700952061201, "grad_norm": 0.8884037040196184, "learning_rate": 1.1119044182175054e-06, "loss": 0.7674, "step": 4446 }, { "epoch": 0.8531619463296481, "grad_norm": 0.7914890423528304, "learning_rate": 1.109058026392158e-06, "loss": 0.7518, "step": 4447 }, { "epoch": 0.8533537974531763, "grad_norm": 0.9744218529438126, "learning_rate": 1.1062150686055272e-06, "loss": 0.8031, "step": 4448 }, { "epoch": 0.8535456485767045, "grad_norm": 0.9131950579698429, "learning_rate": 1.1033755459556793e-06, "loss": 0.8192, "step": 4449 }, { "epoch": 0.8537374997002326, "grad_norm": 0.9242751375791228, "learning_rate": 1.1005394595393493e-06, "loss": 0.7764, "step": 4450 }, { "epoch": 0.8539293508237608, "grad_norm": 0.914538093798662, "learning_rate": 1.0977068104519516e-06, "loss": 0.8214, "step": 4451 }, { "epoch": 0.8541212019472889, "grad_norm": 0.8677476318686238, "learning_rate": 1.094877599787567e-06, "loss": 0.7145, "step": 4452 }, { "epoch": 0.8543130530708171, "grad_norm": 0.894041350893823, "learning_rate": 1.0920518286389503e-06, "loss": 0.8013, "step": 4453 }, { "epoch": 0.8545049041943452, "grad_norm": 0.6415742210158704, "learning_rate": 1.0892294980975282e-06, "loss": 0.3228, "step": 4454 }, { "epoch": 0.8546967553178734, "grad_norm": 0.8996463404048258, "learning_rate": 1.0864106092533977e-06, "loss": 0.7515, "step": 4455 }, { "epoch": 0.8548886064414015, "grad_norm": 0.8817062754261986, "learning_rate": 1.0835951631953256e-06, "loss": 0.758, "step": 4456 }, { "epoch": 0.8550804575649296, "grad_norm": 0.6178914514772497, "learning_rate": 1.0807831610107521e-06, "loss": 0.3011, "step": 4457 }, { "epoch": 0.8552723086884577, "grad_norm": 0.8339961864025538, "learning_rate": 1.0779746037857842e-06, "loss": 0.7592, "step": 4458 }, { "epoch": 0.8554641598119859, "grad_norm": 0.9311724233894971, "learning_rate": 1.075169492605198e-06, "loss": 0.7756, "step": 4459 }, { "epoch": 0.855656010935514, "grad_norm": 0.9451738116265073, "learning_rate": 1.072367828552444e-06, "loss": 0.7316, "step": 4460 }, { "epoch": 0.8558478620590422, "grad_norm": 1.0485811106967342, "learning_rate": 1.0695696127096355e-06, "loss": 0.8246, "step": 4461 }, { "epoch": 0.8560397131825703, "grad_norm": 0.7855508918262053, "learning_rate": 1.0667748461575544e-06, "loss": 0.7704, "step": 4462 }, { "epoch": 0.8562315643060985, "grad_norm": 0.8979058486612296, "learning_rate": 1.063983529975654e-06, "loss": 0.7189, "step": 4463 }, { "epoch": 0.8564234154296266, "grad_norm": 1.1552401152691922, "learning_rate": 1.0611956652420485e-06, "loss": 0.7122, "step": 4464 }, { "epoch": 0.8566152665531548, "grad_norm": 0.8958565916658486, "learning_rate": 1.058411253033529e-06, "loss": 0.7591, "step": 4465 }, { "epoch": 0.8568071176766829, "grad_norm": 0.6123835691863804, "learning_rate": 1.0556302944255437e-06, "loss": 0.2937, "step": 4466 }, { "epoch": 0.8569989688002111, "grad_norm": 0.9763584128806764, "learning_rate": 1.0528527904922114e-06, "loss": 0.7018, "step": 4467 }, { "epoch": 0.8571908199237391, "grad_norm": 0.8945783427781242, "learning_rate": 1.0500787423063164e-06, "loss": 0.7801, "step": 4468 }, { "epoch": 0.8573826710472673, "grad_norm": 0.9807807771544357, "learning_rate": 1.0473081509393045e-06, "loss": 0.7611, "step": 4469 }, { "epoch": 0.8575745221707954, "grad_norm": 0.9959240322885287, "learning_rate": 1.0445410174612934e-06, "loss": 0.766, "step": 4470 }, { "epoch": 0.8577663732943236, "grad_norm": 0.9673413313326914, "learning_rate": 1.0417773429410582e-06, "loss": 0.7579, "step": 4471 }, { "epoch": 0.8579582244178517, "grad_norm": 0.9630588793497041, "learning_rate": 1.0390171284460416e-06, "loss": 0.6855, "step": 4472 }, { "epoch": 0.8581500755413799, "grad_norm": 0.8855638715089543, "learning_rate": 1.0362603750423494e-06, "loss": 0.74, "step": 4473 }, { "epoch": 0.858341926664908, "grad_norm": 0.9806608893651872, "learning_rate": 1.033507083794748e-06, "loss": 0.8112, "step": 4474 }, { "epoch": 0.8585337777884362, "grad_norm": 0.9138024260150804, "learning_rate": 1.0307572557666745e-06, "loss": 0.7296, "step": 4475 }, { "epoch": 0.8587256289119644, "grad_norm": 0.9326304153087143, "learning_rate": 1.0280108920202192e-06, "loss": 0.7722, "step": 4476 }, { "epoch": 0.8589174800354925, "grad_norm": 0.8785755115578965, "learning_rate": 1.0252679936161392e-06, "loss": 0.7524, "step": 4477 }, { "epoch": 0.8591093311590206, "grad_norm": 0.928260596817734, "learning_rate": 1.0225285616138515e-06, "loss": 0.7012, "step": 4478 }, { "epoch": 0.8593011822825487, "grad_norm": 1.0251067715132347, "learning_rate": 1.0197925970714317e-06, "loss": 0.7643, "step": 4479 }, { "epoch": 0.8594930334060769, "grad_norm": 0.7902342811327004, "learning_rate": 1.017060101045625e-06, "loss": 0.6766, "step": 4480 }, { "epoch": 0.859684884529605, "grad_norm": 0.8849351573819926, "learning_rate": 1.0143310745918279e-06, "loss": 0.7576, "step": 4481 }, { "epoch": 0.8598767356531332, "grad_norm": 1.0130248805225048, "learning_rate": 1.0116055187641005e-06, "loss": 0.7298, "step": 4482 }, { "epoch": 0.8600685867766613, "grad_norm": 0.9503194175815818, "learning_rate": 1.008883434615161e-06, "loss": 0.7515, "step": 4483 }, { "epoch": 0.8602604379001895, "grad_norm": 0.9903706596453343, "learning_rate": 1.0061648231963894e-06, "loss": 0.7895, "step": 4484 }, { "epoch": 0.8604522890237176, "grad_norm": 0.9103807508234805, "learning_rate": 1.0034496855578202e-06, "loss": 0.8017, "step": 4485 }, { "epoch": 0.8606441401472458, "grad_norm": 0.8993263525576805, "learning_rate": 1.00073802274815e-06, "loss": 0.74, "step": 4486 }, { "epoch": 0.8608359912707739, "grad_norm": 0.9844937382302132, "learning_rate": 9.980298358147323e-07, "loss": 0.7528, "step": 4487 }, { "epoch": 0.861027842394302, "grad_norm": 0.7905119065354798, "learning_rate": 9.953251258035757e-07, "loss": 0.7166, "step": 4488 }, { "epoch": 0.8612196935178301, "grad_norm": 0.9344011083294722, "learning_rate": 9.926238937593479e-07, "loss": 0.7172, "step": 4489 }, { "epoch": 0.8614115446413583, "grad_norm": 0.9432713625570368, "learning_rate": 9.899261407253757e-07, "loss": 0.7524, "step": 4490 }, { "epoch": 0.8616033957648864, "grad_norm": 0.9833651599121062, "learning_rate": 9.872318677436387e-07, "loss": 0.7409, "step": 4491 }, { "epoch": 0.8617952468884146, "grad_norm": 0.9240039804241971, "learning_rate": 9.845410758547724e-07, "loss": 0.7753, "step": 4492 }, { "epoch": 0.8619870980119427, "grad_norm": 0.9525796469631305, "learning_rate": 9.818537660980666e-07, "loss": 0.8016, "step": 4493 }, { "epoch": 0.8621789491354709, "grad_norm": 0.8784372188029909, "learning_rate": 9.791699395114728e-07, "loss": 0.7188, "step": 4494 }, { "epoch": 0.862370800258999, "grad_norm": 0.9858561033599224, "learning_rate": 9.764895971315914e-07, "loss": 0.7846, "step": 4495 }, { "epoch": 0.8625626513825272, "grad_norm": 0.8680360912895427, "learning_rate": 9.738127399936758e-07, "loss": 0.7939, "step": 4496 }, { "epoch": 0.8627545025060553, "grad_norm": 0.8536121961528268, "learning_rate": 9.711393691316383e-07, "loss": 0.7052, "step": 4497 }, { "epoch": 0.8629463536295835, "grad_norm": 1.0258740249910676, "learning_rate": 9.684694855780397e-07, "loss": 0.735, "step": 4498 }, { "epoch": 0.8631382047531115, "grad_norm": 0.8097616496732483, "learning_rate": 9.65803090364098e-07, "loss": 0.6933, "step": 4499 }, { "epoch": 0.8633300558766397, "grad_norm": 1.0601389121306206, "learning_rate": 9.63140184519682e-07, "loss": 0.7747, "step": 4500 }, { "epoch": 0.8635219070001678, "grad_norm": 0.9389640246091377, "learning_rate": 9.604807690733109e-07, "loss": 0.7424, "step": 4501 }, { "epoch": 0.863713758123696, "grad_norm": 0.9968856888573251, "learning_rate": 9.578248450521598e-07, "loss": 0.7965, "step": 4502 }, { "epoch": 0.8639056092472241, "grad_norm": 0.9271474896503822, "learning_rate": 9.551724134820506e-07, "loss": 0.8211, "step": 4503 }, { "epoch": 0.8640974603707523, "grad_norm": 0.9090027047613805, "learning_rate": 9.525234753874623e-07, "loss": 0.7861, "step": 4504 }, { "epoch": 0.8642893114942805, "grad_norm": 0.8746106848486978, "learning_rate": 9.498780317915201e-07, "loss": 0.7906, "step": 4505 }, { "epoch": 0.8644811626178086, "grad_norm": 0.9504841952253358, "learning_rate": 9.472360837159989e-07, "loss": 0.7276, "step": 4506 }, { "epoch": 0.8646730137413368, "grad_norm": 1.0424824916254125, "learning_rate": 9.445976321813277e-07, "loss": 0.7956, "step": 4507 }, { "epoch": 0.8648648648648649, "grad_norm": 1.0474078193940592, "learning_rate": 9.419626782065771e-07, "loss": 0.8047, "step": 4508 }, { "epoch": 0.865056715988393, "grad_norm": 0.9215776509612069, "learning_rate": 9.393312228094798e-07, "loss": 0.7403, "step": 4509 }, { "epoch": 0.8652485671119211, "grad_norm": 0.9993389184254422, "learning_rate": 9.367032670064035e-07, "loss": 0.762, "step": 4510 }, { "epoch": 0.8654404182354493, "grad_norm": 0.9569152770543506, "learning_rate": 9.340788118123745e-07, "loss": 0.7726, "step": 4511 }, { "epoch": 0.8656322693589774, "grad_norm": 0.9401166492894565, "learning_rate": 9.314578582410593e-07, "loss": 0.7492, "step": 4512 }, { "epoch": 0.8658241204825056, "grad_norm": 0.8746128130077693, "learning_rate": 9.288404073047763e-07, "loss": 0.7422, "step": 4513 }, { "epoch": 0.8660159716060337, "grad_norm": 1.0354443398661144, "learning_rate": 9.26226460014491e-07, "loss": 0.7507, "step": 4514 }, { "epoch": 0.8662078227295619, "grad_norm": 0.9724874768960031, "learning_rate": 9.236160173798148e-07, "loss": 0.6956, "step": 4515 }, { "epoch": 0.86639967385309, "grad_norm": 0.9232991560156528, "learning_rate": 9.210090804090044e-07, "loss": 0.7251, "step": 4516 }, { "epoch": 0.8665915249766182, "grad_norm": 0.9523127476800638, "learning_rate": 9.184056501089633e-07, "loss": 0.7642, "step": 4517 }, { "epoch": 0.8667833761001463, "grad_norm": 1.1699913693083484, "learning_rate": 9.158057274852405e-07, "loss": 0.7649, "step": 4518 }, { "epoch": 0.8669752272236745, "grad_norm": 0.8942346831737112, "learning_rate": 9.132093135420339e-07, "loss": 0.7248, "step": 4519 }, { "epoch": 0.8671670783472025, "grad_norm": 0.6315524913635933, "learning_rate": 9.106164092821801e-07, "loss": 0.2949, "step": 4520 }, { "epoch": 0.8673589294707307, "grad_norm": 0.6328333254454078, "learning_rate": 9.080270157071625e-07, "loss": 0.2623, "step": 4521 }, { "epoch": 0.8675507805942588, "grad_norm": 0.9034837801559692, "learning_rate": 9.054411338171099e-07, "loss": 0.7648, "step": 4522 }, { "epoch": 0.867742631717787, "grad_norm": 0.872797063712405, "learning_rate": 9.028587646107911e-07, "loss": 0.6544, "step": 4523 }, { "epoch": 0.8679344828413151, "grad_norm": 0.8727936721180701, "learning_rate": 9.002799090856251e-07, "loss": 0.7665, "step": 4524 }, { "epoch": 0.8681263339648433, "grad_norm": 0.8852846102413808, "learning_rate": 8.977045682376672e-07, "loss": 0.7235, "step": 4525 }, { "epoch": 0.8683181850883714, "grad_norm": 0.9319826804472976, "learning_rate": 8.951327430616175e-07, "loss": 0.773, "step": 4526 }, { "epoch": 0.8685100362118996, "grad_norm": 0.8892904414263818, "learning_rate": 8.9256443455082e-07, "loss": 0.7236, "step": 4527 }, { "epoch": 0.8687018873354277, "grad_norm": 0.8771744920524357, "learning_rate": 8.899996436972525e-07, "loss": 0.6799, "step": 4528 }, { "epoch": 0.8688937384589559, "grad_norm": 0.860815273542738, "learning_rate": 8.874383714915469e-07, "loss": 0.8054, "step": 4529 }, { "epoch": 0.8690855895824839, "grad_norm": 1.0450864159033582, "learning_rate": 8.848806189229664e-07, "loss": 0.7664, "step": 4530 }, { "epoch": 0.8692774407060121, "grad_norm": 1.1029686003203247, "learning_rate": 8.823263869794185e-07, "loss": 0.6726, "step": 4531 }, { "epoch": 0.8694692918295402, "grad_norm": 1.0498797727405584, "learning_rate": 8.797756766474474e-07, "loss": 0.7016, "step": 4532 }, { "epoch": 0.8696611429530684, "grad_norm": 0.9143599006445796, "learning_rate": 8.772284889122451e-07, "loss": 0.7218, "step": 4533 }, { "epoch": 0.8698529940765966, "grad_norm": 0.9138552236495243, "learning_rate": 8.746848247576334e-07, "loss": 0.747, "step": 4534 }, { "epoch": 0.8700448452001247, "grad_norm": 0.943036157955944, "learning_rate": 8.721446851660797e-07, "loss": 0.6864, "step": 4535 }, { "epoch": 0.8702366963236529, "grad_norm": 0.9000275756396924, "learning_rate": 8.696080711186872e-07, "loss": 0.7464, "step": 4536 }, { "epoch": 0.870428547447181, "grad_norm": 0.9755407990066779, "learning_rate": 8.670749835951964e-07, "loss": 0.7361, "step": 4537 }, { "epoch": 0.8706203985707092, "grad_norm": 1.0242108103260326, "learning_rate": 8.645454235739903e-07, "loss": 0.7474, "step": 4538 }, { "epoch": 0.8708122496942373, "grad_norm": 1.1405259201704854, "learning_rate": 8.620193920320852e-07, "loss": 0.6686, "step": 4539 }, { "epoch": 0.8710041008177654, "grad_norm": 0.8658607368464281, "learning_rate": 8.594968899451383e-07, "loss": 0.7253, "step": 4540 }, { "epoch": 0.8711959519412935, "grad_norm": 0.8752112186929646, "learning_rate": 8.56977918287436e-07, "loss": 0.7067, "step": 4541 }, { "epoch": 0.8713878030648217, "grad_norm": 0.8751258239717997, "learning_rate": 8.544624780319089e-07, "loss": 0.6936, "step": 4542 }, { "epoch": 0.8715796541883498, "grad_norm": 0.9416798894878121, "learning_rate": 8.519505701501219e-07, "loss": 0.7838, "step": 4543 }, { "epoch": 0.871771505311878, "grad_norm": 0.8909393531316171, "learning_rate": 8.494421956122734e-07, "loss": 0.8328, "step": 4544 }, { "epoch": 0.8719633564354061, "grad_norm": 0.9074859163189887, "learning_rate": 8.469373553871996e-07, "loss": 0.7488, "step": 4545 }, { "epoch": 0.8721552075589343, "grad_norm": 0.9558781754931496, "learning_rate": 8.444360504423699e-07, "loss": 0.7652, "step": 4546 }, { "epoch": 0.8723470586824624, "grad_norm": 0.9215185640307311, "learning_rate": 8.419382817438859e-07, "loss": 0.767, "step": 4547 }, { "epoch": 0.8725389098059906, "grad_norm": 0.989780876640017, "learning_rate": 8.394440502564915e-07, "loss": 0.7212, "step": 4548 }, { "epoch": 0.8727307609295187, "grad_norm": 0.9807171215217254, "learning_rate": 8.369533569435551e-07, "loss": 0.7427, "step": 4549 }, { "epoch": 0.8729226120530469, "grad_norm": 0.8740266032314246, "learning_rate": 8.344662027670847e-07, "loss": 0.7535, "step": 4550 }, { "epoch": 0.8731144631765749, "grad_norm": 1.1327079414662466, "learning_rate": 8.319825886877176e-07, "loss": 0.7422, "step": 4551 }, { "epoch": 0.8733063143001031, "grad_norm": 0.9122713089917354, "learning_rate": 8.29502515664723e-07, "loss": 0.7157, "step": 4552 }, { "epoch": 0.8734981654236312, "grad_norm": 0.9825521379143864, "learning_rate": 8.270259846560114e-07, "loss": 0.7573, "step": 4553 }, { "epoch": 0.8736900165471594, "grad_norm": 0.9047041163277897, "learning_rate": 8.245529966181121e-07, "loss": 0.8241, "step": 4554 }, { "epoch": 0.8738818676706875, "grad_norm": 0.9555908146683991, "learning_rate": 8.220835525061954e-07, "loss": 0.6886, "step": 4555 }, { "epoch": 0.8740737187942157, "grad_norm": 0.9629162547801708, "learning_rate": 8.196176532740585e-07, "loss": 0.7286, "step": 4556 }, { "epoch": 0.8742655699177438, "grad_norm": 1.0101810685370238, "learning_rate": 8.171552998741295e-07, "loss": 0.7559, "step": 4557 }, { "epoch": 0.874457421041272, "grad_norm": 0.9107074108856309, "learning_rate": 8.146964932574719e-07, "loss": 0.7499, "step": 4558 }, { "epoch": 0.8746492721648002, "grad_norm": 0.947587557214617, "learning_rate": 8.122412343737729e-07, "loss": 0.7175, "step": 4559 }, { "epoch": 0.8748411232883283, "grad_norm": 0.9420698476343865, "learning_rate": 8.097895241713527e-07, "loss": 0.8068, "step": 4560 }, { "epoch": 0.8750329744118563, "grad_norm": 0.8203164132593601, "learning_rate": 8.073413635971606e-07, "loss": 0.7373, "step": 4561 }, { "epoch": 0.8752248255353845, "grad_norm": 0.9489589592483354, "learning_rate": 8.048967535967722e-07, "loss": 0.7987, "step": 4562 }, { "epoch": 0.8754166766589127, "grad_norm": 0.8197440649449819, "learning_rate": 8.024556951143969e-07, "loss": 0.7405, "step": 4563 }, { "epoch": 0.8756085277824408, "grad_norm": 0.8988820896116033, "learning_rate": 8.000181890928683e-07, "loss": 0.805, "step": 4564 }, { "epoch": 0.875800378905969, "grad_norm": 0.9554793414338361, "learning_rate": 7.975842364736497e-07, "loss": 0.7725, "step": 4565 }, { "epoch": 0.8759922300294971, "grad_norm": 0.9524213902047028, "learning_rate": 7.951538381968304e-07, "loss": 0.7988, "step": 4566 }, { "epoch": 0.8761840811530253, "grad_norm": 1.0404437334490504, "learning_rate": 7.927269952011285e-07, "loss": 0.7455, "step": 4567 }, { "epoch": 0.8763759322765534, "grad_norm": 0.9362222935673528, "learning_rate": 7.903037084238873e-07, "loss": 0.735, "step": 4568 }, { "epoch": 0.8765677834000816, "grad_norm": 0.9659079725997624, "learning_rate": 7.878839788010784e-07, "loss": 0.6916, "step": 4569 }, { "epoch": 0.8767596345236097, "grad_norm": 1.044293638102686, "learning_rate": 7.854678072672972e-07, "loss": 0.7858, "step": 4570 }, { "epoch": 0.8769514856471379, "grad_norm": 0.964279139685954, "learning_rate": 7.830551947557663e-07, "loss": 0.737, "step": 4571 }, { "epoch": 0.8771433367706659, "grad_norm": 0.9538784978221343, "learning_rate": 7.806461421983358e-07, "loss": 0.7674, "step": 4572 }, { "epoch": 0.8773351878941941, "grad_norm": 0.6629440954143151, "learning_rate": 7.782406505254769e-07, "loss": 0.3282, "step": 4573 }, { "epoch": 0.8775270390177222, "grad_norm": 0.8707156061280021, "learning_rate": 7.758387206662865e-07, "loss": 0.7422, "step": 4574 }, { "epoch": 0.8777188901412504, "grad_norm": 1.0663518291644283, "learning_rate": 7.73440353548488e-07, "loss": 0.7824, "step": 4575 }, { "epoch": 0.8779107412647785, "grad_norm": 0.8638596545152747, "learning_rate": 7.710455500984237e-07, "loss": 0.7414, "step": 4576 }, { "epoch": 0.8781025923883067, "grad_norm": 1.1303376878805238, "learning_rate": 7.686543112410672e-07, "loss": 0.7708, "step": 4577 }, { "epoch": 0.8782944435118348, "grad_norm": 1.1640235311142089, "learning_rate": 7.662666379000084e-07, "loss": 0.7382, "step": 4578 }, { "epoch": 0.878486294635363, "grad_norm": 0.9818295915034202, "learning_rate": 7.638825309974629e-07, "loss": 0.8037, "step": 4579 }, { "epoch": 0.8786781457588911, "grad_norm": 0.9904215816794483, "learning_rate": 7.615019914542699e-07, "loss": 0.7746, "step": 4580 }, { "epoch": 0.8788699968824193, "grad_norm": 1.0646002227825324, "learning_rate": 7.59125020189887e-07, "loss": 0.752, "step": 4581 }, { "epoch": 0.8790618480059473, "grad_norm": 1.1127576809599544, "learning_rate": 7.567516181223966e-07, "loss": 0.7723, "step": 4582 }, { "epoch": 0.8792536991294755, "grad_norm": 0.9632478357327557, "learning_rate": 7.543817861685033e-07, "loss": 0.7315, "step": 4583 }, { "epoch": 0.8794455502530036, "grad_norm": 0.9060776761174434, "learning_rate": 7.520155252435302e-07, "loss": 0.7833, "step": 4584 }, { "epoch": 0.8796374013765318, "grad_norm": 0.607179692344775, "learning_rate": 7.496528362614219e-07, "loss": 0.3083, "step": 4585 }, { "epoch": 0.8798292525000599, "grad_norm": 0.9067737753039397, "learning_rate": 7.472937201347429e-07, "loss": 0.7248, "step": 4586 }, { "epoch": 0.8800211036235881, "grad_norm": 0.9579033780182813, "learning_rate": 7.449381777746811e-07, "loss": 0.7811, "step": 4587 }, { "epoch": 0.8802129547471162, "grad_norm": 1.127189468675678, "learning_rate": 7.425862100910408e-07, "loss": 0.77, "step": 4588 }, { "epoch": 0.8804048058706444, "grad_norm": 0.9155971865569684, "learning_rate": 7.402378179922465e-07, "loss": 0.7616, "step": 4589 }, { "epoch": 0.8805966569941726, "grad_norm": 0.8354032364134342, "learning_rate": 7.378930023853392e-07, "loss": 0.7225, "step": 4590 }, { "epoch": 0.8807885081177007, "grad_norm": 0.8863573727763525, "learning_rate": 7.355517641759824e-07, "loss": 0.7975, "step": 4591 }, { "epoch": 0.8809803592412289, "grad_norm": 0.8988459677035203, "learning_rate": 7.332141042684571e-07, "loss": 0.8087, "step": 4592 }, { "epoch": 0.8811722103647569, "grad_norm": 0.9845091228186302, "learning_rate": 7.30880023565661e-07, "loss": 0.731, "step": 4593 }, { "epoch": 0.881364061488285, "grad_norm": 0.9713317629021645, "learning_rate": 7.285495229691097e-07, "loss": 0.7768, "step": 4594 }, { "epoch": 0.8815559126118132, "grad_norm": 0.8807962425554843, "learning_rate": 7.262226033789355e-07, "loss": 0.7034, "step": 4595 }, { "epoch": 0.8817477637353414, "grad_norm": 0.9196626562323335, "learning_rate": 7.238992656938882e-07, "loss": 0.7428, "step": 4596 }, { "epoch": 0.8819396148588695, "grad_norm": 0.8773254573980503, "learning_rate": 7.215795108113343e-07, "loss": 0.7225, "step": 4597 }, { "epoch": 0.8821314659823977, "grad_norm": 0.9548877963444146, "learning_rate": 7.192633396272563e-07, "loss": 0.733, "step": 4598 }, { "epoch": 0.8823233171059258, "grad_norm": 0.9578544801664496, "learning_rate": 7.16950753036253e-07, "loss": 0.6883, "step": 4599 }, { "epoch": 0.882515168229454, "grad_norm": 1.233103488466857, "learning_rate": 7.146417519315373e-07, "loss": 0.7583, "step": 4600 }, { "epoch": 0.8827070193529821, "grad_norm": 0.967792283278396, "learning_rate": 7.123363372049374e-07, "loss": 0.7703, "step": 4601 }, { "epoch": 0.8828988704765103, "grad_norm": 0.9688985585690956, "learning_rate": 7.100345097468997e-07, "loss": 0.7704, "step": 4602 }, { "epoch": 0.8830907216000383, "grad_norm": 0.9095851227316164, "learning_rate": 7.077362704464808e-07, "loss": 0.7295, "step": 4603 }, { "epoch": 0.8832825727235665, "grad_norm": 0.9978283717531867, "learning_rate": 7.054416201913527e-07, "loss": 0.7533, "step": 4604 }, { "epoch": 0.8834744238470946, "grad_norm": 1.0166371394623488, "learning_rate": 7.031505598678013e-07, "loss": 0.7528, "step": 4605 }, { "epoch": 0.8836662749706228, "grad_norm": 0.8656543776717492, "learning_rate": 7.008630903607261e-07, "loss": 0.7696, "step": 4606 }, { "epoch": 0.8838581260941509, "grad_norm": 0.9781282556452232, "learning_rate": 6.985792125536406e-07, "loss": 0.809, "step": 4607 }, { "epoch": 0.8840499772176791, "grad_norm": 0.9251478013623083, "learning_rate": 6.962989273286702e-07, "loss": 0.8237, "step": 4608 }, { "epoch": 0.8842418283412072, "grad_norm": 0.9470125708626623, "learning_rate": 6.940222355665516e-07, "loss": 0.7778, "step": 4609 }, { "epoch": 0.8844336794647354, "grad_norm": 0.9984560020581685, "learning_rate": 6.917491381466346e-07, "loss": 0.6915, "step": 4610 }, { "epoch": 0.8846255305882635, "grad_norm": 1.0105268919136718, "learning_rate": 6.894796359468825e-07, "loss": 0.6642, "step": 4611 }, { "epoch": 0.8848173817117917, "grad_norm": 0.9614468334541437, "learning_rate": 6.872137298438653e-07, "loss": 0.733, "step": 4612 }, { "epoch": 0.8850092328353197, "grad_norm": 0.9306401856797347, "learning_rate": 6.849514207127683e-07, "loss": 0.7737, "step": 4613 }, { "epoch": 0.8852010839588479, "grad_norm": 0.9734941859779266, "learning_rate": 6.826927094273861e-07, "loss": 0.7742, "step": 4614 }, { "epoch": 0.885392935082376, "grad_norm": 0.7943242489708909, "learning_rate": 6.804375968601229e-07, "loss": 0.7176, "step": 4615 }, { "epoch": 0.8855847862059042, "grad_norm": 0.9097830551801449, "learning_rate": 6.781860838819953e-07, "loss": 0.7987, "step": 4616 }, { "epoch": 0.8857766373294323, "grad_norm": 0.8379463964427163, "learning_rate": 6.759381713626267e-07, "loss": 0.7428, "step": 4617 }, { "epoch": 0.8859684884529605, "grad_norm": 0.9333276550528544, "learning_rate": 6.736938601702525e-07, "loss": 0.7341, "step": 4618 }, { "epoch": 0.8861603395764887, "grad_norm": 0.9056516891715265, "learning_rate": 6.714531511717137e-07, "loss": 0.7377, "step": 4619 }, { "epoch": 0.8863521907000168, "grad_norm": 0.9108204384009128, "learning_rate": 6.692160452324625e-07, "loss": 0.7461, "step": 4620 }, { "epoch": 0.886544041823545, "grad_norm": 0.9732026573191209, "learning_rate": 6.669825432165622e-07, "loss": 0.7352, "step": 4621 }, { "epoch": 0.8867358929470731, "grad_norm": 0.7976759839167974, "learning_rate": 6.647526459866771e-07, "loss": 0.7772, "step": 4622 }, { "epoch": 0.8869277440706013, "grad_norm": 0.9324745876468638, "learning_rate": 6.62526354404085e-07, "loss": 0.7922, "step": 4623 }, { "epoch": 0.8871195951941293, "grad_norm": 0.8885862606487628, "learning_rate": 6.60303669328668e-07, "loss": 0.7682, "step": 4624 }, { "epoch": 0.8873114463176575, "grad_norm": 0.8808327377738684, "learning_rate": 6.580845916189183e-07, "loss": 0.7592, "step": 4625 }, { "epoch": 0.8875032974411856, "grad_norm": 0.9870856365276107, "learning_rate": 6.558691221319302e-07, "loss": 0.7503, "step": 4626 }, { "epoch": 0.8876951485647138, "grad_norm": 1.064599505081429, "learning_rate": 6.536572617234082e-07, "loss": 0.7029, "step": 4627 }, { "epoch": 0.8878869996882419, "grad_norm": 0.9167057301079724, "learning_rate": 6.514490112476612e-07, "loss": 0.7324, "step": 4628 }, { "epoch": 0.8880788508117701, "grad_norm": 0.8844701268742412, "learning_rate": 6.492443715576046e-07, "loss": 0.7594, "step": 4629 }, { "epoch": 0.8882707019352982, "grad_norm": 1.0608432461512447, "learning_rate": 6.470433435047574e-07, "loss": 0.8124, "step": 4630 }, { "epoch": 0.8884625530588264, "grad_norm": 0.9372526434215296, "learning_rate": 6.448459279392472e-07, "loss": 0.752, "step": 4631 }, { "epoch": 0.8886544041823545, "grad_norm": 0.9779619736710751, "learning_rate": 6.42652125709804e-07, "loss": 0.7349, "step": 4632 }, { "epoch": 0.8888462553058827, "grad_norm": 1.0383966857005615, "learning_rate": 6.404619376637599e-07, "loss": 0.7328, "step": 4633 }, { "epoch": 0.8890381064294107, "grad_norm": 1.0955111048639556, "learning_rate": 6.382753646470563e-07, "loss": 0.7012, "step": 4634 }, { "epoch": 0.8892299575529389, "grad_norm": 0.8115664353572308, "learning_rate": 6.360924075042319e-07, "loss": 0.7135, "step": 4635 }, { "epoch": 0.889421808676467, "grad_norm": 0.931833371460469, "learning_rate": 6.339130670784366e-07, "loss": 0.7049, "step": 4636 }, { "epoch": 0.8896136597999952, "grad_norm": 0.9109632701304036, "learning_rate": 6.317373442114172e-07, "loss": 0.7056, "step": 4637 }, { "epoch": 0.8898055109235233, "grad_norm": 0.9056346612137572, "learning_rate": 6.295652397435248e-07, "loss": 0.7641, "step": 4638 }, { "epoch": 0.8899973620470515, "grad_norm": 0.8824110148010835, "learning_rate": 6.273967545137138e-07, "loss": 0.8113, "step": 4639 }, { "epoch": 0.8901892131705796, "grad_norm": 0.9747761252330316, "learning_rate": 6.2523188935954e-07, "loss": 0.6841, "step": 4640 }, { "epoch": 0.8903810642941078, "grad_norm": 0.6163406416524079, "learning_rate": 6.23070645117162e-07, "loss": 0.3167, "step": 4641 }, { "epoch": 0.890572915417636, "grad_norm": 1.2882344989094034, "learning_rate": 6.209130226213378e-07, "loss": 0.6753, "step": 4642 }, { "epoch": 0.8907647665411641, "grad_norm": 0.9834388404887126, "learning_rate": 6.187590227054296e-07, "loss": 0.7815, "step": 4643 }, { "epoch": 0.8909566176646923, "grad_norm": 1.0790296573409297, "learning_rate": 6.166086462013965e-07, "loss": 0.7356, "step": 4644 }, { "epoch": 0.8911484687882203, "grad_norm": 0.9084640680734688, "learning_rate": 6.144618939398006e-07, "loss": 0.717, "step": 4645 }, { "epoch": 0.8913403199117484, "grad_norm": 0.8724664534270665, "learning_rate": 6.123187667498054e-07, "loss": 0.7986, "step": 4646 }, { "epoch": 0.8915321710352766, "grad_norm": 0.9120515308621108, "learning_rate": 6.101792654591721e-07, "loss": 0.7373, "step": 4647 }, { "epoch": 0.8917240221588048, "grad_norm": 0.8938932003774677, "learning_rate": 6.080433908942618e-07, "loss": 0.7955, "step": 4648 }, { "epoch": 0.8919158732823329, "grad_norm": 1.0603888603027647, "learning_rate": 6.059111438800325e-07, "loss": 0.7841, "step": 4649 }, { "epoch": 0.8921077244058611, "grad_norm": 1.0051160114056126, "learning_rate": 6.037825252400487e-07, "loss": 0.7414, "step": 4650 }, { "epoch": 0.8922995755293892, "grad_norm": 1.021252839871871, "learning_rate": 6.016575357964649e-07, "loss": 0.7508, "step": 4651 }, { "epoch": 0.8924914266529174, "grad_norm": 0.9234269658020258, "learning_rate": 5.995361763700381e-07, "loss": 0.7454, "step": 4652 }, { "epoch": 0.8926832777764455, "grad_norm": 1.0441653432392535, "learning_rate": 5.974184477801215e-07, "loss": 0.6874, "step": 4653 }, { "epoch": 0.8928751288999737, "grad_norm": 0.919105673462512, "learning_rate": 5.953043508446687e-07, "loss": 0.7402, "step": 4654 }, { "epoch": 0.8930669800235017, "grad_norm": 0.9683575709290377, "learning_rate": 5.931938863802289e-07, "loss": 0.7213, "step": 4655 }, { "epoch": 0.8932588311470299, "grad_norm": 1.008184025028861, "learning_rate": 5.910870552019455e-07, "loss": 0.6864, "step": 4656 }, { "epoch": 0.893450682270558, "grad_norm": 0.8393775212245048, "learning_rate": 5.889838581235641e-07, "loss": 0.7198, "step": 4657 }, { "epoch": 0.8936425333940862, "grad_norm": 0.8742736326924418, "learning_rate": 5.868842959574228e-07, "loss": 0.6948, "step": 4658 }, { "epoch": 0.8938343845176143, "grad_norm": 1.0109547283958238, "learning_rate": 5.847883695144551e-07, "loss": 0.7522, "step": 4659 }, { "epoch": 0.8940262356411425, "grad_norm": 0.8867735334619169, "learning_rate": 5.826960796041948e-07, "loss": 0.7684, "step": 4660 }, { "epoch": 0.8942180867646706, "grad_norm": 1.0270648098880477, "learning_rate": 5.806074270347673e-07, "loss": 0.7009, "step": 4661 }, { "epoch": 0.8944099378881988, "grad_norm": 0.9515536355437705, "learning_rate": 5.78522412612893e-07, "loss": 0.7661, "step": 4662 }, { "epoch": 0.8946017890117269, "grad_norm": 0.8887016908412309, "learning_rate": 5.7644103714389e-07, "loss": 0.7731, "step": 4663 }, { "epoch": 0.8947936401352551, "grad_norm": 0.8679985561014002, "learning_rate": 5.743633014316663e-07, "loss": 0.7487, "step": 4664 }, { "epoch": 0.8949854912587832, "grad_norm": 0.8609106369358643, "learning_rate": 5.7228920627873e-07, "loss": 0.6919, "step": 4665 }, { "epoch": 0.8951773423823113, "grad_norm": 0.9280966418087297, "learning_rate": 5.702187524861791e-07, "loss": 0.7436, "step": 4666 }, { "epoch": 0.8953691935058394, "grad_norm": 0.8807550398837906, "learning_rate": 5.681519408537039e-07, "loss": 0.7121, "step": 4667 }, { "epoch": 0.8955610446293676, "grad_norm": 1.020158798504358, "learning_rate": 5.660887721795927e-07, "loss": 0.794, "step": 4668 }, { "epoch": 0.8957528957528957, "grad_norm": 1.0294925102599632, "learning_rate": 5.640292472607223e-07, "loss": 0.7846, "step": 4669 }, { "epoch": 0.8959447468764239, "grad_norm": 0.9461545384327694, "learning_rate": 5.619733668925653e-07, "loss": 0.7456, "step": 4670 }, { "epoch": 0.896136597999952, "grad_norm": 0.9603323242041439, "learning_rate": 5.599211318691833e-07, "loss": 0.7873, "step": 4671 }, { "epoch": 0.8963284491234802, "grad_norm": 0.9709495348514336, "learning_rate": 5.578725429832344e-07, "loss": 0.7371, "step": 4672 }, { "epoch": 0.8965203002470084, "grad_norm": 1.0294346883615333, "learning_rate": 5.558276010259634e-07, "loss": 0.7055, "step": 4673 }, { "epoch": 0.8967121513705365, "grad_norm": 0.9129879808827314, "learning_rate": 5.537863067872085e-07, "loss": 0.7578, "step": 4674 }, { "epoch": 0.8969040024940647, "grad_norm": 1.055780054701892, "learning_rate": 5.517486610554024e-07, "loss": 0.6865, "step": 4675 }, { "epoch": 0.8970958536175927, "grad_norm": 0.9807244949154481, "learning_rate": 5.497146646175644e-07, "loss": 0.7537, "step": 4676 }, { "epoch": 0.8972877047411209, "grad_norm": 0.9166779511170401, "learning_rate": 5.476843182593062e-07, "loss": 0.7741, "step": 4677 }, { "epoch": 0.897479555864649, "grad_norm": 0.9042993243179467, "learning_rate": 5.45657622764828e-07, "loss": 0.7583, "step": 4678 }, { "epoch": 0.8976714069881772, "grad_norm": 0.9835261567785301, "learning_rate": 5.436345789169196e-07, "loss": 0.749, "step": 4679 }, { "epoch": 0.8978632581117053, "grad_norm": 0.9966618894681296, "learning_rate": 5.416151874969644e-07, "loss": 0.7497, "step": 4680 }, { "epoch": 0.8980551092352335, "grad_norm": 0.8281833879788774, "learning_rate": 5.395994492849321e-07, "loss": 0.7224, "step": 4681 }, { "epoch": 0.8982469603587616, "grad_norm": 0.9623064213654604, "learning_rate": 5.375873650593822e-07, "loss": 0.7573, "step": 4682 }, { "epoch": 0.8984388114822898, "grad_norm": 0.8649669216375139, "learning_rate": 5.355789355974605e-07, "loss": 0.7064, "step": 4683 }, { "epoch": 0.8986306626058179, "grad_norm": 0.8782515145346934, "learning_rate": 5.335741616749013e-07, "loss": 0.7358, "step": 4684 }, { "epoch": 0.8988225137293461, "grad_norm": 0.8332308474509599, "learning_rate": 5.315730440660338e-07, "loss": 0.7996, "step": 4685 }, { "epoch": 0.8990143648528741, "grad_norm": 0.9737116679980754, "learning_rate": 5.295755835437666e-07, "loss": 0.7036, "step": 4686 }, { "epoch": 0.8992062159764023, "grad_norm": 0.9733472575995588, "learning_rate": 5.275817808796013e-07, "loss": 0.8009, "step": 4687 }, { "epoch": 0.8993980670999304, "grad_norm": 0.9272571178921959, "learning_rate": 5.255916368436209e-07, "loss": 0.7603, "step": 4688 }, { "epoch": 0.8995899182234586, "grad_norm": 0.9697450499505401, "learning_rate": 5.236051522045027e-07, "loss": 0.6978, "step": 4689 }, { "epoch": 0.8997817693469867, "grad_norm": 1.1185318404857176, "learning_rate": 5.216223277295063e-07, "loss": 0.7507, "step": 4690 }, { "epoch": 0.8999736204705149, "grad_norm": 0.613460904092967, "learning_rate": 5.196431641844768e-07, "loss": 0.3097, "step": 4691 }, { "epoch": 0.900165471594043, "grad_norm": 0.9504333708706504, "learning_rate": 5.176676623338461e-07, "loss": 0.7946, "step": 4692 }, { "epoch": 0.9003573227175712, "grad_norm": 0.8681331719732248, "learning_rate": 5.156958229406328e-07, "loss": 0.7853, "step": 4693 }, { "epoch": 0.9005491738410993, "grad_norm": 0.9047175226731026, "learning_rate": 5.137276467664421e-07, "loss": 0.7805, "step": 4694 }, { "epoch": 0.9007410249646275, "grad_norm": 0.8876979002871619, "learning_rate": 5.117631345714635e-07, "loss": 0.7437, "step": 4695 }, { "epoch": 0.9009328760881556, "grad_norm": 1.1698432213244787, "learning_rate": 5.098022871144659e-07, "loss": 0.7338, "step": 4696 }, { "epoch": 0.9011247272116837, "grad_norm": 0.9844763623695645, "learning_rate": 5.078451051528099e-07, "loss": 0.7459, "step": 4697 }, { "epoch": 0.9013165783352118, "grad_norm": 0.9037101277663118, "learning_rate": 5.058915894424377e-07, "loss": 0.7082, "step": 4698 }, { "epoch": 0.90150842945874, "grad_norm": 0.8936001454931506, "learning_rate": 5.039417407378755e-07, "loss": 0.7391, "step": 4699 }, { "epoch": 0.9017002805822681, "grad_norm": 1.1100941869582843, "learning_rate": 5.019955597922332e-07, "loss": 0.7269, "step": 4700 }, { "epoch": 0.9018921317057963, "grad_norm": 0.8451802292836127, "learning_rate": 5.000530473572041e-07, "loss": 0.7174, "step": 4701 }, { "epoch": 0.9020839828293244, "grad_norm": 0.9645813520471532, "learning_rate": 4.981142041830645e-07, "loss": 0.7705, "step": 4702 }, { "epoch": 0.9022758339528526, "grad_norm": 0.9749623520486426, "learning_rate": 4.961790310186721e-07, "loss": 0.8089, "step": 4703 }, { "epoch": 0.9024676850763808, "grad_norm": 0.9219170058160452, "learning_rate": 4.942475286114712e-07, "loss": 0.7866, "step": 4704 }, { "epoch": 0.9026595361999089, "grad_norm": 0.9117614375068478, "learning_rate": 4.923196977074829e-07, "loss": 0.7523, "step": 4705 }, { "epoch": 0.9028513873234371, "grad_norm": 0.9272099021775976, "learning_rate": 4.903955390513149e-07, "loss": 0.7934, "step": 4706 }, { "epoch": 0.9030432384469651, "grad_norm": 0.9613581748349587, "learning_rate": 4.884750533861537e-07, "loss": 0.7809, "step": 4707 }, { "epoch": 0.9032350895704933, "grad_norm": 0.9028688804007189, "learning_rate": 4.865582414537662e-07, "loss": 0.7734, "step": 4708 }, { "epoch": 0.9034269406940214, "grad_norm": 0.9766615417602952, "learning_rate": 4.846451039945066e-07, "loss": 0.7619, "step": 4709 }, { "epoch": 0.9036187918175496, "grad_norm": 1.0006785303913368, "learning_rate": 4.827356417473006e-07, "loss": 0.7447, "step": 4710 }, { "epoch": 0.9038106429410777, "grad_norm": 0.604657729265449, "learning_rate": 4.808298554496627e-07, "loss": 0.3019, "step": 4711 }, { "epoch": 0.9040024940646059, "grad_norm": 1.0077178905648052, "learning_rate": 4.789277458376807e-07, "loss": 0.7541, "step": 4712 }, { "epoch": 0.904194345188134, "grad_norm": 1.0151647030097506, "learning_rate": 4.770293136460269e-07, "loss": 0.7954, "step": 4713 }, { "epoch": 0.9043861963116622, "grad_norm": 0.9597308979619199, "learning_rate": 4.7513455960795484e-07, "loss": 0.7737, "step": 4714 }, { "epoch": 0.9045780474351903, "grad_norm": 1.074750600378515, "learning_rate": 4.7324348445529133e-07, "loss": 0.7436, "step": 4715 }, { "epoch": 0.9047698985587185, "grad_norm": 0.9651357285949463, "learning_rate": 4.713560889184476e-07, "loss": 0.8117, "step": 4716 }, { "epoch": 0.9049617496822466, "grad_norm": 1.0376955905836636, "learning_rate": 4.6947237372640954e-07, "loss": 0.7168, "step": 4717 }, { "epoch": 0.9051536008057747, "grad_norm": 0.9392587130143706, "learning_rate": 4.6759233960674387e-07, "loss": 0.789, "step": 4718 }, { "epoch": 0.9053454519293028, "grad_norm": 1.0614027390482503, "learning_rate": 4.6571598728559655e-07, "loss": 0.7107, "step": 4719 }, { "epoch": 0.905537303052831, "grad_norm": 0.931595232359118, "learning_rate": 4.6384331748769104e-07, "loss": 0.7133, "step": 4720 }, { "epoch": 0.9057291541763591, "grad_norm": 0.9057085879425704, "learning_rate": 4.619743309363245e-07, "loss": 0.8172, "step": 4721 }, { "epoch": 0.9059210052998873, "grad_norm": 1.140400833195682, "learning_rate": 4.601090283533771e-07, "loss": 0.7445, "step": 4722 }, { "epoch": 0.9061128564234154, "grad_norm": 0.8793918096795441, "learning_rate": 4.5824741045930265e-07, "loss": 0.7164, "step": 4723 }, { "epoch": 0.9063047075469436, "grad_norm": 0.8854110174111329, "learning_rate": 4.563894779731337e-07, "loss": 0.7806, "step": 4724 }, { "epoch": 0.9064965586704717, "grad_norm": 0.980817025575441, "learning_rate": 4.5453523161247624e-07, "loss": 0.76, "step": 4725 }, { "epoch": 0.9066884097939999, "grad_norm": 0.9319044901987823, "learning_rate": 4.526846720935163e-07, "loss": 0.8058, "step": 4726 }, { "epoch": 0.906880260917528, "grad_norm": 0.9382957960619545, "learning_rate": 4.5083780013101206e-07, "loss": 0.7101, "step": 4727 }, { "epoch": 0.9070721120410561, "grad_norm": 0.8731930332395966, "learning_rate": 4.48994616438303e-07, "loss": 0.8009, "step": 4728 }, { "epoch": 0.9072639631645842, "grad_norm": 0.9475227707815179, "learning_rate": 4.4715512172729846e-07, "loss": 0.7878, "step": 4729 }, { "epoch": 0.9074558142881124, "grad_norm": 1.0183480552198507, "learning_rate": 4.453193167084868e-07, "loss": 0.7228, "step": 4730 }, { "epoch": 0.9076476654116405, "grad_norm": 0.9406717488571423, "learning_rate": 4.4348720209092753e-07, "loss": 0.7301, "step": 4731 }, { "epoch": 0.9078395165351687, "grad_norm": 0.9649934689860803, "learning_rate": 4.416587785822568e-07, "loss": 0.7787, "step": 4732 }, { "epoch": 0.9080313676586969, "grad_norm": 0.9837402350960502, "learning_rate": 4.398340468886875e-07, "loss": 0.7749, "step": 4733 }, { "epoch": 0.908223218782225, "grad_norm": 0.9694379250269303, "learning_rate": 4.3801300771500353e-07, "loss": 0.721, "step": 4734 }, { "epoch": 0.9084150699057532, "grad_norm": 0.9777049428717582, "learning_rate": 4.361956617645624e-07, "loss": 0.7481, "step": 4735 }, { "epoch": 0.9086069210292813, "grad_norm": 0.9245647986325718, "learning_rate": 4.34382009739297e-07, "loss": 0.7311, "step": 4736 }, { "epoch": 0.9087987721528095, "grad_norm": 1.0291535635679774, "learning_rate": 4.3257205233971145e-07, "loss": 0.8146, "step": 4737 }, { "epoch": 0.9089906232763376, "grad_norm": 0.9420295631299436, "learning_rate": 4.307657902648843e-07, "loss": 0.7793, "step": 4738 }, { "epoch": 0.9091824743998657, "grad_norm": 1.0417065947073414, "learning_rate": 4.289632242124664e-07, "loss": 0.7954, "step": 4739 }, { "epoch": 0.9093743255233938, "grad_norm": 1.1465802754676848, "learning_rate": 4.271643548786819e-07, "loss": 0.6994, "step": 4740 }, { "epoch": 0.909566176646922, "grad_norm": 0.662868077271047, "learning_rate": 4.253691829583262e-07, "loss": 0.3141, "step": 4741 }, { "epoch": 0.9097580277704501, "grad_norm": 0.9471631262284959, "learning_rate": 4.235777091447646e-07, "loss": 0.8286, "step": 4742 }, { "epoch": 0.9099498788939783, "grad_norm": 0.9137034148369256, "learning_rate": 4.217899341299403e-07, "loss": 0.8319, "step": 4743 }, { "epoch": 0.9101417300175064, "grad_norm": 0.9193649292371971, "learning_rate": 4.2000585860435983e-07, "loss": 0.7289, "step": 4744 }, { "epoch": 0.9103335811410346, "grad_norm": 0.8664419863239474, "learning_rate": 4.1822548325710757e-07, "loss": 0.7245, "step": 4745 }, { "epoch": 0.9105254322645627, "grad_norm": 0.8790216944905359, "learning_rate": 4.164488087758345e-07, "loss": 0.7466, "step": 4746 }, { "epoch": 0.9107172833880909, "grad_norm": 0.9017430680044219, "learning_rate": 4.1467583584676395e-07, "loss": 0.7623, "step": 4747 }, { "epoch": 0.910909134511619, "grad_norm": 0.9415240626232474, "learning_rate": 4.129065651546893e-07, "loss": 0.7898, "step": 4748 }, { "epoch": 0.9111009856351471, "grad_norm": 0.8594408750918743, "learning_rate": 4.1114099738297385e-07, "loss": 0.7782, "step": 4749 }, { "epoch": 0.9112928367586752, "grad_norm": 0.8147010025648087, "learning_rate": 4.0937913321355104e-07, "loss": 0.6672, "step": 4750 }, { "epoch": 0.9114846878822034, "grad_norm": 1.113850403476127, "learning_rate": 4.076209733269232e-07, "loss": 0.8043, "step": 4751 }, { "epoch": 0.9116765390057315, "grad_norm": 0.9064179269027995, "learning_rate": 4.058665184021626e-07, "loss": 0.7654, "step": 4752 }, { "epoch": 0.9118683901292597, "grad_norm": 0.8798803559904521, "learning_rate": 4.0411576911690955e-07, "loss": 0.8103, "step": 4753 }, { "epoch": 0.9120602412527878, "grad_norm": 0.9423714428565734, "learning_rate": 4.023687261473741e-07, "loss": 0.7394, "step": 4754 }, { "epoch": 0.912252092376316, "grad_norm": 0.8481009318727788, "learning_rate": 4.0062539016833437e-07, "loss": 0.7467, "step": 4755 }, { "epoch": 0.9124439434998441, "grad_norm": 1.0228952048485453, "learning_rate": 3.988857618531372e-07, "loss": 0.7411, "step": 4756 }, { "epoch": 0.9126357946233723, "grad_norm": 1.0143480746608595, "learning_rate": 3.971498418736952e-07, "loss": 0.6978, "step": 4757 }, { "epoch": 0.9128276457469005, "grad_norm": 0.9816504901700938, "learning_rate": 3.95417630900492e-07, "loss": 0.7884, "step": 4758 }, { "epoch": 0.9130194968704285, "grad_norm": 0.9169396590021553, "learning_rate": 3.936891296025769e-07, "loss": 0.7225, "step": 4759 }, { "epoch": 0.9132113479939566, "grad_norm": 0.8699872933976907, "learning_rate": 3.919643386475658e-07, "loss": 0.7219, "step": 4760 }, { "epoch": 0.9134031991174848, "grad_norm": 1.0616037105269938, "learning_rate": 3.902432587016436e-07, "loss": 0.7672, "step": 4761 }, { "epoch": 0.913595050241013, "grad_norm": 1.0394553078744957, "learning_rate": 3.885258904295575e-07, "loss": 0.7367, "step": 4762 }, { "epoch": 0.9137869013645411, "grad_norm": 1.0378325696427135, "learning_rate": 3.868122344946268e-07, "loss": 0.7583, "step": 4763 }, { "epoch": 0.9139787524880693, "grad_norm": 1.0981869399508144, "learning_rate": 3.8510229155873435e-07, "loss": 0.8158, "step": 4764 }, { "epoch": 0.9141706036115974, "grad_norm": 0.8988510931935265, "learning_rate": 3.833960622823263e-07, "loss": 0.797, "step": 4765 }, { "epoch": 0.9143624547351256, "grad_norm": 0.9405617355169807, "learning_rate": 3.816935473244199e-07, "loss": 0.7315, "step": 4766 }, { "epoch": 0.9145543058586537, "grad_norm": 0.8954614322803036, "learning_rate": 3.7999474734259245e-07, "loss": 0.7643, "step": 4767 }, { "epoch": 0.9147461569821819, "grad_norm": 0.9325702058744996, "learning_rate": 3.7829966299299026e-07, "loss": 0.7734, "step": 4768 }, { "epoch": 0.91493800810571, "grad_norm": 1.0845228916670722, "learning_rate": 3.766082949303218e-07, "loss": 0.7476, "step": 4769 }, { "epoch": 0.9151298592292381, "grad_norm": 0.9839934551010763, "learning_rate": 3.749206438078612e-07, "loss": 0.7398, "step": 4770 }, { "epoch": 0.9153217103527662, "grad_norm": 1.0366338471066825, "learning_rate": 3.73236710277447e-07, "loss": 0.7738, "step": 4771 }, { "epoch": 0.9155135614762944, "grad_norm": 1.0100897507775015, "learning_rate": 3.715564949894834e-07, "loss": 0.7679, "step": 4772 }, { "epoch": 0.9157054125998225, "grad_norm": 1.0726218771297091, "learning_rate": 3.698799985929369e-07, "loss": 0.8214, "step": 4773 }, { "epoch": 0.9158972637233507, "grad_norm": 0.9187024948754988, "learning_rate": 3.6820722173533605e-07, "loss": 0.7166, "step": 4774 }, { "epoch": 0.9160891148468788, "grad_norm": 0.5935212423283632, "learning_rate": 3.665381650627764e-07, "loss": 0.2932, "step": 4775 }, { "epoch": 0.916280965970407, "grad_norm": 0.9559271832973348, "learning_rate": 3.648728292199111e-07, "loss": 0.7542, "step": 4776 }, { "epoch": 0.9164728170939351, "grad_norm": 1.0269729475477267, "learning_rate": 3.6321121484996447e-07, "loss": 0.7404, "step": 4777 }, { "epoch": 0.9166646682174633, "grad_norm": 1.031722556345574, "learning_rate": 3.615533225947154e-07, "loss": 0.6711, "step": 4778 }, { "epoch": 0.9168565193409914, "grad_norm": 0.8778028426268042, "learning_rate": 3.598991530945106e-07, "loss": 0.7245, "step": 4779 }, { "epoch": 0.9170483704645195, "grad_norm": 0.9834308105739354, "learning_rate": 3.5824870698825455e-07, "loss": 0.6763, "step": 4780 }, { "epoch": 0.9172402215880476, "grad_norm": 1.034915938162101, "learning_rate": 3.5660198491341725e-07, "loss": 0.7604, "step": 4781 }, { "epoch": 0.9174320727115758, "grad_norm": 0.8188794008343833, "learning_rate": 3.5495898750602773e-07, "loss": 0.7309, "step": 4782 }, { "epoch": 0.9176239238351039, "grad_norm": 1.0020524410998028, "learning_rate": 3.5331971540067953e-07, "loss": 0.681, "step": 4783 }, { "epoch": 0.9178157749586321, "grad_norm": 0.9014557747091305, "learning_rate": 3.5168416923052284e-07, "loss": 0.7685, "step": 4784 }, { "epoch": 0.9180076260821602, "grad_norm": 0.9366547061156589, "learning_rate": 3.500523496272734e-07, "loss": 0.7056, "step": 4785 }, { "epoch": 0.9181994772056884, "grad_norm": 0.9443082446900251, "learning_rate": 3.4842425722120263e-07, "loss": 0.7148, "step": 4786 }, { "epoch": 0.9183913283292165, "grad_norm": 0.929566677154309, "learning_rate": 3.4679989264114866e-07, "loss": 0.7095, "step": 4787 }, { "epoch": 0.9185831794527447, "grad_norm": 0.9290067614383395, "learning_rate": 3.4517925651450515e-07, "loss": 0.7581, "step": 4788 }, { "epoch": 0.9187750305762729, "grad_norm": 0.990117896145595, "learning_rate": 3.435623494672258e-07, "loss": 0.7549, "step": 4789 }, { "epoch": 0.918966881699801, "grad_norm": 0.9450509470385187, "learning_rate": 3.419491721238255e-07, "loss": 0.7227, "step": 4790 }, { "epoch": 0.919158732823329, "grad_norm": 0.9794184818624614, "learning_rate": 3.40339725107377e-07, "loss": 0.7543, "step": 4791 }, { "epoch": 0.9193505839468572, "grad_norm": 0.9495419675716157, "learning_rate": 3.3873400903951636e-07, "loss": 0.734, "step": 4792 }, { "epoch": 0.9195424350703854, "grad_norm": 0.9804499947835079, "learning_rate": 3.3713202454043305e-07, "loss": 0.7365, "step": 4793 }, { "epoch": 0.9197342861939135, "grad_norm": 0.9244176949310292, "learning_rate": 3.3553377222887874e-07, "loss": 0.7587, "step": 4794 }, { "epoch": 0.9199261373174417, "grad_norm": 0.9942814204876628, "learning_rate": 3.33939252722163e-07, "loss": 0.7138, "step": 4795 }, { "epoch": 0.9201179884409698, "grad_norm": 1.003082406347499, "learning_rate": 3.3234846663615205e-07, "loss": 0.8118, "step": 4796 }, { "epoch": 0.920309839564498, "grad_norm": 0.9689857995585462, "learning_rate": 3.307614145852722e-07, "loss": 0.8046, "step": 4797 }, { "epoch": 0.9205016906880261, "grad_norm": 0.9507946239412726, "learning_rate": 3.291780971825065e-07, "loss": 0.7411, "step": 4798 }, { "epoch": 0.9206935418115543, "grad_norm": 0.8744601514639845, "learning_rate": 3.2759851503939567e-07, "loss": 0.743, "step": 4799 }, { "epoch": 0.9208853929350824, "grad_norm": 0.8891890353057219, "learning_rate": 3.2602266876603727e-07, "loss": 0.6965, "step": 4800 }, { "epoch": 0.9210772440586105, "grad_norm": 1.0014562854493698, "learning_rate": 3.244505589710867e-07, "loss": 0.7257, "step": 4801 }, { "epoch": 0.9212690951821386, "grad_norm": 0.6836955110681159, "learning_rate": 3.2288218626175704e-07, "loss": 0.2785, "step": 4802 }, { "epoch": 0.9214609463056668, "grad_norm": 0.9613953336587677, "learning_rate": 3.213175512438149e-07, "loss": 0.7253, "step": 4803 }, { "epoch": 0.9216527974291949, "grad_norm": 0.9437407371760229, "learning_rate": 3.197566545215869e-07, "loss": 0.7702, "step": 4804 }, { "epoch": 0.9218446485527231, "grad_norm": 0.8394670158314109, "learning_rate": 3.181994966979518e-07, "loss": 0.7212, "step": 4805 }, { "epoch": 0.9220364996762512, "grad_norm": 1.0189653196465172, "learning_rate": 3.166460783743486e-07, "loss": 0.7099, "step": 4806 }, { "epoch": 0.9222283507997794, "grad_norm": 1.0222384527164394, "learning_rate": 3.1509640015076946e-07, "loss": 0.7945, "step": 4807 }, { "epoch": 0.9224202019233075, "grad_norm": 0.9915691636198649, "learning_rate": 3.135504626257613e-07, "loss": 0.7762, "step": 4808 }, { "epoch": 0.9226120530468357, "grad_norm": 0.8909322012952189, "learning_rate": 3.1200826639642747e-07, "loss": 0.7323, "step": 4809 }, { "epoch": 0.9228039041703638, "grad_norm": 0.9514215131733044, "learning_rate": 3.1046981205842707e-07, "loss": 0.7549, "step": 4810 }, { "epoch": 0.922995755293892, "grad_norm": 0.9438983300469136, "learning_rate": 3.089351002059726e-07, "loss": 0.7342, "step": 4811 }, { "epoch": 0.92318760641742, "grad_norm": 0.9896155141093781, "learning_rate": 3.07404131431831e-07, "loss": 0.7797, "step": 4812 }, { "epoch": 0.9233794575409482, "grad_norm": 0.9313511028015814, "learning_rate": 3.058769063273237e-07, "loss": 0.7466, "step": 4813 }, { "epoch": 0.9235713086644763, "grad_norm": 1.091969271145992, "learning_rate": 3.043534254823266e-07, "loss": 0.7678, "step": 4814 }, { "epoch": 0.9237631597880045, "grad_norm": 0.8579184982605089, "learning_rate": 3.0283368948526794e-07, "loss": 0.7667, "step": 4815 }, { "epoch": 0.9239550109115326, "grad_norm": 0.9631659383899929, "learning_rate": 3.013176989231337e-07, "loss": 0.7736, "step": 4816 }, { "epoch": 0.9241468620350608, "grad_norm": 0.9995528646012796, "learning_rate": 2.998054543814577e-07, "loss": 0.7474, "step": 4817 }, { "epoch": 0.924338713158589, "grad_norm": 0.8887241912021133, "learning_rate": 2.9829695644433055e-07, "loss": 0.7866, "step": 4818 }, { "epoch": 0.9245305642821171, "grad_norm": 0.9014557431059705, "learning_rate": 2.96792205694395e-07, "loss": 0.6902, "step": 4819 }, { "epoch": 0.9247224154056453, "grad_norm": 0.9273969945909705, "learning_rate": 2.9529120271284515e-07, "loss": 0.7813, "step": 4820 }, { "epoch": 0.9249142665291734, "grad_norm": 1.0375933618898918, "learning_rate": 2.937939480794294e-07, "loss": 0.7704, "step": 4821 }, { "epoch": 0.9251061176527015, "grad_norm": 0.9647866625098073, "learning_rate": 2.923004423724474e-07, "loss": 0.771, "step": 4822 }, { "epoch": 0.9252979687762296, "grad_norm": 0.8964142960417346, "learning_rate": 2.90810686168751e-07, "loss": 0.707, "step": 4823 }, { "epoch": 0.9254898198997578, "grad_norm": 0.9464040650255455, "learning_rate": 2.893246800437455e-07, "loss": 0.7917, "step": 4824 }, { "epoch": 0.9256816710232859, "grad_norm": 0.9352569307716334, "learning_rate": 2.8784242457138176e-07, "loss": 0.7428, "step": 4825 }, { "epoch": 0.9258735221468141, "grad_norm": 0.9494914284758597, "learning_rate": 2.863639203241697e-07, "loss": 0.7803, "step": 4826 }, { "epoch": 0.9260653732703422, "grad_norm": 1.0714221290906696, "learning_rate": 2.848891678731669e-07, "loss": 0.7303, "step": 4827 }, { "epoch": 0.9262572243938704, "grad_norm": 0.9733771974234607, "learning_rate": 2.8341816778798215e-07, "loss": 0.7258, "step": 4828 }, { "epoch": 0.9264490755173985, "grad_norm": 1.0119761371332094, "learning_rate": 2.8195092063677323e-07, "loss": 0.7344, "step": 4829 }, { "epoch": 0.9266409266409267, "grad_norm": 1.122542694752021, "learning_rate": 2.804874269862501e-07, "loss": 0.6814, "step": 4830 }, { "epoch": 0.9268327777644548, "grad_norm": 1.076719465092514, "learning_rate": 2.79027687401674e-07, "loss": 0.8135, "step": 4831 }, { "epoch": 0.9270246288879829, "grad_norm": 1.0080800013357476, "learning_rate": 2.775717024468538e-07, "loss": 0.7056, "step": 4832 }, { "epoch": 0.927216480011511, "grad_norm": 1.0195356482707345, "learning_rate": 2.7611947268415095e-07, "loss": 0.8191, "step": 4833 }, { "epoch": 0.9274083311350392, "grad_norm": 0.9018181867511355, "learning_rate": 2.746709986744733e-07, "loss": 0.7689, "step": 4834 }, { "epoch": 0.9276001822585673, "grad_norm": 0.8687772183996367, "learning_rate": 2.73226280977279e-07, "loss": 0.7521, "step": 4835 }, { "epoch": 0.9277920333820955, "grad_norm": 1.1056897057689479, "learning_rate": 2.7178532015057826e-07, "loss": 0.7943, "step": 4836 }, { "epoch": 0.9279838845056236, "grad_norm": 0.9109548982605385, "learning_rate": 2.703481167509281e-07, "loss": 0.7573, "step": 4837 }, { "epoch": 0.9281757356291518, "grad_norm": 0.9188171901732087, "learning_rate": 2.689146713334312e-07, "loss": 0.7701, "step": 4838 }, { "epoch": 0.9283675867526799, "grad_norm": 0.9232674062511551, "learning_rate": 2.674849844517446e-07, "loss": 0.7747, "step": 4839 }, { "epoch": 0.9285594378762081, "grad_norm": 0.9135672515517173, "learning_rate": 2.6605905665806784e-07, "loss": 0.7674, "step": 4840 }, { "epoch": 0.9287512889997362, "grad_norm": 1.052598280867297, "learning_rate": 2.6463688850315473e-07, "loss": 0.7285, "step": 4841 }, { "epoch": 0.9289431401232644, "grad_norm": 1.0376979681969918, "learning_rate": 2.632184805363025e-07, "loss": 0.7813, "step": 4842 }, { "epoch": 0.9291349912467924, "grad_norm": 0.8474156056795152, "learning_rate": 2.618038333053563e-07, "loss": 0.7818, "step": 4843 }, { "epoch": 0.9293268423703206, "grad_norm": 0.998574898173265, "learning_rate": 2.6039294735671015e-07, "loss": 0.7662, "step": 4844 }, { "epoch": 0.9295186934938487, "grad_norm": 1.1261911225939425, "learning_rate": 2.5898582323530597e-07, "loss": 0.6874, "step": 4845 }, { "epoch": 0.9297105446173769, "grad_norm": 0.638232845771464, "learning_rate": 2.5758246148463005e-07, "loss": 0.3127, "step": 4846 }, { "epoch": 0.929902395740905, "grad_norm": 0.880546320243267, "learning_rate": 2.561828626467189e-07, "loss": 0.7717, "step": 4847 }, { "epoch": 0.9300942468644332, "grad_norm": 0.8659249556051017, "learning_rate": 2.547870272621511e-07, "loss": 0.7983, "step": 4848 }, { "epoch": 0.9302860979879614, "grad_norm": 0.9554062417456805, "learning_rate": 2.5339495587005657e-07, "loss": 0.7246, "step": 4849 }, { "epoch": 0.9304779491114895, "grad_norm": 0.9538858792985729, "learning_rate": 2.5200664900810744e-07, "loss": 0.7261, "step": 4850 }, { "epoch": 0.9306698002350177, "grad_norm": 0.9777417473384, "learning_rate": 2.50622107212527e-07, "loss": 0.739, "step": 4851 }, { "epoch": 0.9308616513585458, "grad_norm": 1.0074490566931593, "learning_rate": 2.4924133101807636e-07, "loss": 0.7249, "step": 4852 }, { "epoch": 0.9310535024820739, "grad_norm": 1.0479796724142276, "learning_rate": 2.4786432095807e-07, "loss": 0.8222, "step": 4853 }, { "epoch": 0.931245353605602, "grad_norm": 0.8776035327277425, "learning_rate": 2.464910775643614e-07, "loss": 0.7414, "step": 4854 }, { "epoch": 0.9314372047291302, "grad_norm": 0.9578061444315863, "learning_rate": 2.4512160136735517e-07, "loss": 0.7802, "step": 4855 }, { "epoch": 0.9316290558526583, "grad_norm": 1.0363233775650686, "learning_rate": 2.437558928959971e-07, "loss": 0.774, "step": 4856 }, { "epoch": 0.9318209069761865, "grad_norm": 0.8036118781777869, "learning_rate": 2.423939526777774e-07, "loss": 0.7638, "step": 4857 }, { "epoch": 0.9320127580997146, "grad_norm": 1.0569005600638293, "learning_rate": 2.4103578123873427e-07, "loss": 0.8454, "step": 4858 }, { "epoch": 0.9322046092232428, "grad_norm": 0.9235142804026353, "learning_rate": 2.396813791034458e-07, "loss": 0.7494, "step": 4859 }, { "epoch": 0.9323964603467709, "grad_norm": 0.9197791681201388, "learning_rate": 2.383307467950391e-07, "loss": 0.7619, "step": 4860 }, { "epoch": 0.9325883114702991, "grad_norm": 0.6157333804425438, "learning_rate": 2.369838848351802e-07, "loss": 0.2818, "step": 4861 }, { "epoch": 0.9327801625938272, "grad_norm": 1.0588661486635547, "learning_rate": 2.356407937440819e-07, "loss": 0.7471, "step": 4862 }, { "epoch": 0.9329720137173554, "grad_norm": 0.9809413788744666, "learning_rate": 2.343014740405003e-07, "loss": 0.7373, "step": 4863 }, { "epoch": 0.9331638648408834, "grad_norm": 0.9109178241366014, "learning_rate": 2.3296592624173496e-07, "loss": 0.6748, "step": 4864 }, { "epoch": 0.9333557159644116, "grad_norm": 0.8251355613289516, "learning_rate": 2.3163415086362662e-07, "loss": 0.7482, "step": 4865 }, { "epoch": 0.9335475670879397, "grad_norm": 0.8494914761100774, "learning_rate": 2.3030614842056153e-07, "loss": 0.7301, "step": 4866 }, { "epoch": 0.9337394182114679, "grad_norm": 0.9753177795183641, "learning_rate": 2.289819194254661e-07, "loss": 0.7584, "step": 4867 }, { "epoch": 0.933931269334996, "grad_norm": 0.9620116432441305, "learning_rate": 2.276614643898134e-07, "loss": 0.7258, "step": 4868 }, { "epoch": 0.9341231204585242, "grad_norm": 0.9308896373701816, "learning_rate": 2.2634478382361213e-07, "loss": 0.7731, "step": 4869 }, { "epoch": 0.9343149715820523, "grad_norm": 0.9283037436638325, "learning_rate": 2.2503187823542106e-07, "loss": 0.7273, "step": 4870 }, { "epoch": 0.9345068227055805, "grad_norm": 1.1551854739656027, "learning_rate": 2.2372274813233562e-07, "loss": 0.728, "step": 4871 }, { "epoch": 0.9346986738291087, "grad_norm": 0.8789791016941431, "learning_rate": 2.2241739401999474e-07, "loss": 0.7514, "step": 4872 }, { "epoch": 0.9348905249526368, "grad_norm": 0.8884980761715535, "learning_rate": 2.211158164025784e-07, "loss": 0.7111, "step": 4873 }, { "epoch": 0.9350823760761648, "grad_norm": 0.92458190445487, "learning_rate": 2.198180157828056e-07, "loss": 0.7904, "step": 4874 }, { "epoch": 0.935274227199693, "grad_norm": 0.9353591360002758, "learning_rate": 2.1852399266194312e-07, "loss": 0.6787, "step": 4875 }, { "epoch": 0.9354660783232212, "grad_norm": 1.301843193439189, "learning_rate": 2.1723374753979343e-07, "loss": 0.738, "step": 4876 }, { "epoch": 0.9356579294467493, "grad_norm": 0.9499779017371281, "learning_rate": 2.1594728091470008e-07, "loss": 0.7587, "step": 4877 }, { "epoch": 0.9358497805702775, "grad_norm": 0.8969502905280864, "learning_rate": 2.1466459328354783e-07, "loss": 0.749, "step": 4878 }, { "epoch": 0.9360416316938056, "grad_norm": 0.9798032936019591, "learning_rate": 2.1338568514176372e-07, "loss": 0.714, "step": 4879 }, { "epoch": 0.9362334828173338, "grad_norm": 0.9397468330463936, "learning_rate": 2.121105569833115e-07, "loss": 0.7898, "step": 4880 }, { "epoch": 0.9364253339408619, "grad_norm": 1.098912920149511, "learning_rate": 2.1083920930069835e-07, "loss": 0.7561, "step": 4881 }, { "epoch": 0.9366171850643901, "grad_norm": 0.9619443952207284, "learning_rate": 2.0957164258497031e-07, "loss": 0.7396, "step": 4882 }, { "epoch": 0.9368090361879182, "grad_norm": 0.857167277689833, "learning_rate": 2.0830785732570913e-07, "loss": 0.7242, "step": 4883 }, { "epoch": 0.9370008873114463, "grad_norm": 1.0400777506218508, "learning_rate": 2.070478540110432e-07, "loss": 0.6775, "step": 4884 }, { "epoch": 0.9371927384349744, "grad_norm": 0.9619954001238877, "learning_rate": 2.0579163312763552e-07, "loss": 0.7406, "step": 4885 }, { "epoch": 0.9373845895585026, "grad_norm": 0.9303423904562859, "learning_rate": 2.0453919516068786e-07, "loss": 0.7524, "step": 4886 }, { "epoch": 0.9375764406820307, "grad_norm": 0.8876127059269947, "learning_rate": 2.0329054059394227e-07, "loss": 0.7273, "step": 4887 }, { "epoch": 0.9377682918055589, "grad_norm": 0.9570754988229447, "learning_rate": 2.0204566990967956e-07, "loss": 0.7777, "step": 4888 }, { "epoch": 0.937960142929087, "grad_norm": 1.0267938698618724, "learning_rate": 2.0080458358871958e-07, "loss": 0.7685, "step": 4889 }, { "epoch": 0.9381519940526152, "grad_norm": 0.8830614039419944, "learning_rate": 1.9956728211041886e-07, "loss": 0.7425, "step": 4890 }, { "epoch": 0.9383438451761433, "grad_norm": 1.0589826063115155, "learning_rate": 1.9833376595267294e-07, "loss": 0.7661, "step": 4891 }, { "epoch": 0.9385356962996715, "grad_norm": 0.8753071465166481, "learning_rate": 1.9710403559191516e-07, "loss": 0.841, "step": 4892 }, { "epoch": 0.9387275474231996, "grad_norm": 0.9336613181073264, "learning_rate": 1.9587809150311665e-07, "loss": 0.7399, "step": 4893 }, { "epoch": 0.9389193985467278, "grad_norm": 0.8487291598895925, "learning_rate": 1.9465593415978757e-07, "loss": 0.727, "step": 4894 }, { "epoch": 0.9391112496702558, "grad_norm": 0.8381071334509748, "learning_rate": 1.934375640339725e-07, "loss": 0.7195, "step": 4895 }, { "epoch": 0.939303100793784, "grad_norm": 0.9018526091601159, "learning_rate": 1.9222298159625508e-07, "loss": 0.7481, "step": 4896 }, { "epoch": 0.9394949519173121, "grad_norm": 1.006784007591141, "learning_rate": 1.9101218731575777e-07, "loss": 0.7826, "step": 4897 }, { "epoch": 0.9396868030408403, "grad_norm": 0.6729583014240527, "learning_rate": 1.8980518166013427e-07, "loss": 0.3036, "step": 4898 }, { "epoch": 0.9398786541643684, "grad_norm": 0.840691625490536, "learning_rate": 1.886019650955817e-07, "loss": 0.8032, "step": 4899 }, { "epoch": 0.9400705052878966, "grad_norm": 0.8738485986717818, "learning_rate": 1.874025380868294e-07, "loss": 0.7258, "step": 4900 }, { "epoch": 0.9402623564114247, "grad_norm": 0.8924481280717408, "learning_rate": 1.8620690109714569e-07, "loss": 0.7459, "step": 4901 }, { "epoch": 0.9404542075349529, "grad_norm": 1.1663635424467536, "learning_rate": 1.8501505458833114e-07, "loss": 0.6937, "step": 4902 }, { "epoch": 0.940646058658481, "grad_norm": 0.8398575460165045, "learning_rate": 1.8382699902072533e-07, "loss": 0.7513, "step": 4903 }, { "epoch": 0.9408379097820092, "grad_norm": 1.1776489637683725, "learning_rate": 1.826427348532045e-07, "loss": 0.736, "step": 4904 }, { "epoch": 0.9410297609055372, "grad_norm": 0.9633585772875094, "learning_rate": 1.8146226254317834e-07, "loss": 0.729, "step": 4905 }, { "epoch": 0.9412216120290654, "grad_norm": 0.8863992535234082, "learning_rate": 1.8028558254659213e-07, "loss": 0.7291, "step": 4906 }, { "epoch": 0.9414134631525936, "grad_norm": 1.0939073341509975, "learning_rate": 1.7911269531792786e-07, "loss": 0.8087, "step": 4907 }, { "epoch": 0.9416053142761217, "grad_norm": 0.8709936548423393, "learning_rate": 1.7794360131020094e-07, "loss": 0.7313, "step": 4908 }, { "epoch": 0.9417971653996499, "grad_norm": 0.7960193480994477, "learning_rate": 1.7677830097496129e-07, "loss": 0.6704, "step": 4909 }, { "epoch": 0.941989016523178, "grad_norm": 0.9259660859139183, "learning_rate": 1.7561679476229664e-07, "loss": 0.8207, "step": 4910 }, { "epoch": 0.9421808676467062, "grad_norm": 0.635812955785958, "learning_rate": 1.7445908312082705e-07, "loss": 0.3029, "step": 4911 }, { "epoch": 0.9423727187702343, "grad_norm": 1.2345502230222742, "learning_rate": 1.73305166497707e-07, "loss": 0.796, "step": 4912 }, { "epoch": 0.9425645698937625, "grad_norm": 1.047341743405619, "learning_rate": 1.7215504533862336e-07, "loss": 0.7873, "step": 4913 }, { "epoch": 0.9427564210172906, "grad_norm": 0.9603607783988095, "learning_rate": 1.7100872008780188e-07, "loss": 0.7449, "step": 4914 }, { "epoch": 0.9429482721408188, "grad_norm": 1.0698087956789462, "learning_rate": 1.6986619118799952e-07, "loss": 0.6913, "step": 4915 }, { "epoch": 0.9431401232643468, "grad_norm": 0.8975011014829992, "learning_rate": 1.6872745908050436e-07, "loss": 0.7755, "step": 4916 }, { "epoch": 0.943331974387875, "grad_norm": 0.8386380126679913, "learning_rate": 1.6759252420514238e-07, "loss": 0.7524, "step": 4917 }, { "epoch": 0.9435238255114031, "grad_norm": 0.8080964156686337, "learning_rate": 1.664613870002696e-07, "loss": 0.7584, "step": 4918 }, { "epoch": 0.9437156766349313, "grad_norm": 0.8831417565703376, "learning_rate": 1.6533404790277762e-07, "loss": 0.7868, "step": 4919 }, { "epoch": 0.9439075277584594, "grad_norm": 0.8693419171948336, "learning_rate": 1.642105073480904e-07, "loss": 0.6915, "step": 4920 }, { "epoch": 0.9440993788819876, "grad_norm": 1.1256759119418087, "learning_rate": 1.6309076577016413e-07, "loss": 0.7665, "step": 4921 }, { "epoch": 0.9442912300055157, "grad_norm": 1.175347818140891, "learning_rate": 1.6197482360148732e-07, "loss": 0.7812, "step": 4922 }, { "epoch": 0.9444830811290439, "grad_norm": 0.8938959983037751, "learning_rate": 1.608626812730829e-07, "loss": 0.7724, "step": 4923 }, { "epoch": 0.944674932252572, "grad_norm": 0.7978628027505266, "learning_rate": 1.597543392145029e-07, "loss": 0.6803, "step": 4924 }, { "epoch": 0.9448667833761002, "grad_norm": 0.8432092520195527, "learning_rate": 1.5864979785383482e-07, "loss": 0.7507, "step": 4925 }, { "epoch": 0.9450586344996282, "grad_norm": 0.8950233971068453, "learning_rate": 1.5754905761769634e-07, "loss": 0.6973, "step": 4926 }, { "epoch": 0.9452504856231564, "grad_norm": 0.892990580757345, "learning_rate": 1.5645211893123846e-07, "loss": 0.7545, "step": 4927 }, { "epoch": 0.9454423367466845, "grad_norm": 0.9299823101059989, "learning_rate": 1.5535898221814227e-07, "loss": 0.7126, "step": 4928 }, { "epoch": 0.9456341878702127, "grad_norm": 0.9872662924588074, "learning_rate": 1.5426964790062004e-07, "loss": 0.769, "step": 4929 }, { "epoch": 0.9458260389937408, "grad_norm": 0.94321534231748, "learning_rate": 1.5318411639941855e-07, "loss": 0.7771, "step": 4930 }, { "epoch": 0.946017890117269, "grad_norm": 1.0594497452929355, "learning_rate": 1.521023881338135e-07, "loss": 0.7902, "step": 4931 }, { "epoch": 0.9462097412407972, "grad_norm": 1.004626147806401, "learning_rate": 1.5102446352160848e-07, "loss": 0.7718, "step": 4932 }, { "epoch": 0.9464015923643253, "grad_norm": 1.210026928913996, "learning_rate": 1.499503429791449e-07, "loss": 0.7192, "step": 4933 }, { "epoch": 0.9465934434878535, "grad_norm": 0.9747544751348058, "learning_rate": 1.4888002692129088e-07, "loss": 0.7302, "step": 4934 }, { "epoch": 0.9467852946113816, "grad_norm": 0.8703639837282766, "learning_rate": 1.4781351576144354e-07, "loss": 0.7586, "step": 4935 }, { "epoch": 0.9469771457349098, "grad_norm": 0.8673592664064427, "learning_rate": 1.4675080991153335e-07, "loss": 0.7436, "step": 4936 }, { "epoch": 0.9471689968584378, "grad_norm": 0.8964590893179626, "learning_rate": 1.4569190978201975e-07, "loss": 0.7615, "step": 4937 }, { "epoch": 0.947360847981966, "grad_norm": 0.8366693535643779, "learning_rate": 1.4463681578189338e-07, "loss": 0.7251, "step": 4938 }, { "epoch": 0.9475526991054941, "grad_norm": 0.8748515007321649, "learning_rate": 1.4358552831867268e-07, "loss": 0.7534, "step": 4939 }, { "epoch": 0.9477445502290223, "grad_norm": 0.9976671104146675, "learning_rate": 1.425380477984073e-07, "loss": 0.7675, "step": 4940 }, { "epoch": 0.9479364013525504, "grad_norm": 0.6401944950368484, "learning_rate": 1.4149437462567694e-07, "loss": 0.3439, "step": 4941 }, { "epoch": 0.9481282524760786, "grad_norm": 1.2050280799793989, "learning_rate": 1.4045450920358917e-07, "loss": 0.7445, "step": 4942 }, { "epoch": 0.9483201035996067, "grad_norm": 1.0084260535803753, "learning_rate": 1.3941845193378268e-07, "loss": 0.7558, "step": 4943 }, { "epoch": 0.9485119547231349, "grad_norm": 0.9032925481402448, "learning_rate": 1.3838620321642404e-07, "loss": 0.776, "step": 4944 }, { "epoch": 0.948703805846663, "grad_norm": 0.9265810168380408, "learning_rate": 1.373577634502088e-07, "loss": 0.7652, "step": 4945 }, { "epoch": 0.9488956569701912, "grad_norm": 0.9537310671313902, "learning_rate": 1.363331330323625e-07, "loss": 0.7331, "step": 4946 }, { "epoch": 0.9490875080937192, "grad_norm": 1.240291204577373, "learning_rate": 1.3531231235863752e-07, "loss": 0.8311, "step": 4947 }, { "epoch": 0.9492793592172474, "grad_norm": 0.9389763822492966, "learning_rate": 1.342953018233162e-07, "loss": 0.7508, "step": 4948 }, { "epoch": 0.9494712103407755, "grad_norm": 1.00198926640029, "learning_rate": 1.3328210181920875e-07, "loss": 0.7527, "step": 4949 }, { "epoch": 0.9496630614643037, "grad_norm": 0.9381417473150534, "learning_rate": 1.3227271273765441e-07, "loss": 0.7656, "step": 4950 }, { "epoch": 0.9498549125878318, "grad_norm": 0.98434079508949, "learning_rate": 1.3126713496851906e-07, "loss": 0.7433, "step": 4951 }, { "epoch": 0.95004676371136, "grad_norm": 0.9497632371698812, "learning_rate": 1.302653689001965e-07, "loss": 0.8176, "step": 4952 }, { "epoch": 0.9502386148348881, "grad_norm": 0.8908085430165789, "learning_rate": 1.2926741491961047e-07, "loss": 0.6921, "step": 4953 }, { "epoch": 0.9504304659584163, "grad_norm": 1.054186450699899, "learning_rate": 1.2827327341221053e-07, "loss": 0.7802, "step": 4954 }, { "epoch": 0.9506223170819444, "grad_norm": 0.919387109265763, "learning_rate": 1.272829447619739e-07, "loss": 0.7634, "step": 4955 }, { "epoch": 0.9508141682054726, "grad_norm": 0.9557152699920408, "learning_rate": 1.2629642935140462e-07, "loss": 0.7693, "step": 4956 }, { "epoch": 0.9510060193290006, "grad_norm": 0.9161903106044434, "learning_rate": 1.2531372756153458e-07, "loss": 0.8139, "step": 4957 }, { "epoch": 0.9511978704525288, "grad_norm": 0.9311948172309072, "learning_rate": 1.243348397719235e-07, "loss": 0.714, "step": 4958 }, { "epoch": 0.951389721576057, "grad_norm": 0.8180725458685545, "learning_rate": 1.2335976636065672e-07, "loss": 0.7326, "step": 4959 }, { "epoch": 0.9515815726995851, "grad_norm": 0.9552812449445384, "learning_rate": 1.2238850770434629e-07, "loss": 0.8054, "step": 4960 }, { "epoch": 0.9517734238231133, "grad_norm": 0.89175641796169, "learning_rate": 1.2142106417813215e-07, "loss": 0.7613, "step": 4961 }, { "epoch": 0.9519652749466414, "grad_norm": 0.9267111898272237, "learning_rate": 1.2045743615567984e-07, "loss": 0.7535, "step": 4962 }, { "epoch": 0.9521571260701696, "grad_norm": 0.8343981137329688, "learning_rate": 1.1949762400918053e-07, "loss": 0.7496, "step": 4963 }, { "epoch": 0.9523489771936977, "grad_norm": 0.6214972572793863, "learning_rate": 1.185416281093521e-07, "loss": 0.2934, "step": 4964 }, { "epoch": 0.9525408283172259, "grad_norm": 0.9822410202589884, "learning_rate": 1.1758944882543921e-07, "loss": 0.7875, "step": 4965 }, { "epoch": 0.952732679440754, "grad_norm": 0.9285074791743081, "learning_rate": 1.1664108652521211e-07, "loss": 0.7357, "step": 4966 }, { "epoch": 0.9529245305642822, "grad_norm": 0.9569658829467563, "learning_rate": 1.1569654157496447e-07, "loss": 0.8072, "step": 4967 }, { "epoch": 0.9531163816878102, "grad_norm": 1.041299948842081, "learning_rate": 1.1475581433951999e-07, "loss": 0.6687, "step": 4968 }, { "epoch": 0.9533082328113384, "grad_norm": 0.9416244032755602, "learning_rate": 1.1381890518222361e-07, "loss": 0.7746, "step": 4969 }, { "epoch": 0.9535000839348665, "grad_norm": 0.9892181761405028, "learning_rate": 1.1288581446494696e-07, "loss": 0.7601, "step": 4970 }, { "epoch": 0.9536919350583947, "grad_norm": 0.9917515331709132, "learning_rate": 1.119565425480873e-07, "loss": 0.6873, "step": 4971 }, { "epoch": 0.9538837861819228, "grad_norm": 0.9033580228223587, "learning_rate": 1.1103108979056865e-07, "loss": 0.7737, "step": 4972 }, { "epoch": 0.954075637305451, "grad_norm": 0.8608854637288388, "learning_rate": 1.1010945654983617e-07, "loss": 0.77, "step": 4973 }, { "epoch": 0.9542674884289791, "grad_norm": 1.0276595447722217, "learning_rate": 1.0919164318186182e-07, "loss": 0.7324, "step": 4974 }, { "epoch": 0.9544593395525073, "grad_norm": 1.0046741284734837, "learning_rate": 1.082776500411431e-07, "loss": 0.7537, "step": 4975 }, { "epoch": 0.9546511906760354, "grad_norm": 1.0438563961472274, "learning_rate": 1.0736747748069876e-07, "loss": 0.7639, "step": 4976 }, { "epoch": 0.9548430417995636, "grad_norm": 0.8038473501896108, "learning_rate": 1.0646112585207535e-07, "loss": 0.7692, "step": 4977 }, { "epoch": 0.9550348929230916, "grad_norm": 0.9617975671299981, "learning_rate": 1.0555859550534286e-07, "loss": 0.7681, "step": 4978 }, { "epoch": 0.9552267440466198, "grad_norm": 0.9436382993797313, "learning_rate": 1.0465988678909467e-07, "loss": 0.7043, "step": 4979 }, { "epoch": 0.9554185951701479, "grad_norm": 1.1038406872293653, "learning_rate": 1.0376500005044643e-07, "loss": 0.6863, "step": 4980 }, { "epoch": 0.9556104462936761, "grad_norm": 0.8536151229172256, "learning_rate": 1.0287393563503945e-07, "loss": 0.7118, "step": 4981 }, { "epoch": 0.9558022974172042, "grad_norm": 0.8600350272019677, "learning_rate": 1.0198669388704064e-07, "loss": 0.7828, "step": 4982 }, { "epoch": 0.9559941485407324, "grad_norm": 1.0181075355639693, "learning_rate": 1.0110327514913809e-07, "loss": 0.7777, "step": 4983 }, { "epoch": 0.9561859996642605, "grad_norm": 0.9585872914115684, "learning_rate": 1.002236797625411e-07, "loss": 0.7854, "step": 4984 }, { "epoch": 0.9563778507877887, "grad_norm": 1.0369054065481955, "learning_rate": 9.934790806698791e-08, "loss": 0.8344, "step": 4985 }, { "epoch": 0.9565697019113168, "grad_norm": 0.9570024177277405, "learning_rate": 9.847596040073349e-08, "loss": 0.7608, "step": 4986 }, { "epoch": 0.956761553034845, "grad_norm": 0.8646932484197275, "learning_rate": 9.760783710056176e-08, "loss": 0.7218, "step": 4987 }, { "epoch": 0.9569534041583732, "grad_norm": 0.9423902265421303, "learning_rate": 9.674353850177565e-08, "loss": 0.7935, "step": 4988 }, { "epoch": 0.9571452552819012, "grad_norm": 0.9662678273859541, "learning_rate": 9.588306493820254e-08, "loss": 0.6784, "step": 4989 }, { "epoch": 0.9573371064054293, "grad_norm": 1.068141768787294, "learning_rate": 9.502641674219104e-08, "loss": 0.7185, "step": 4990 }, { "epoch": 0.9575289575289575, "grad_norm": 0.9132115640563255, "learning_rate": 9.417359424461203e-08, "loss": 0.7592, "step": 4991 }, { "epoch": 0.9577208086524857, "grad_norm": 0.9541839534526473, "learning_rate": 9.332459777486314e-08, "loss": 0.7094, "step": 4992 }, { "epoch": 0.9579126597760138, "grad_norm": 0.6014668315058719, "learning_rate": 9.247942766085871e-08, "loss": 0.2973, "step": 4993 }, { "epoch": 0.958104510899542, "grad_norm": 0.9296825142283708, "learning_rate": 9.163808422903653e-08, "loss": 0.8174, "step": 4994 }, { "epoch": 0.9582963620230701, "grad_norm": 0.854903771914744, "learning_rate": 9.080056780435887e-08, "loss": 0.7502, "step": 4995 }, { "epoch": 0.9584882131465983, "grad_norm": 0.9926723269785728, "learning_rate": 8.996687871030696e-08, "loss": 0.745, "step": 4996 }, { "epoch": 0.9586800642701264, "grad_norm": 0.9866188735279726, "learning_rate": 8.913701726888657e-08, "loss": 0.7504, "step": 4997 }, { "epoch": 0.9588719153936546, "grad_norm": 1.103620866861942, "learning_rate": 8.831098380062242e-08, "loss": 0.7954, "step": 4998 }, { "epoch": 0.9590637665171826, "grad_norm": 0.8603125232545279, "learning_rate": 8.748877862456152e-08, "loss": 0.763, "step": 4999 }, { "epoch": 0.9592556176407108, "grad_norm": 0.945573516817738, "learning_rate": 8.667040205827315e-08, "loss": 0.7525, "step": 5000 }, { "epoch": 0.9594474687642389, "grad_norm": 0.9042493152115316, "learning_rate": 8.585585441784672e-08, "loss": 0.7476, "step": 5001 }, { "epoch": 0.9596393198877671, "grad_norm": 0.9862210655543561, "learning_rate": 8.504513601789388e-08, "loss": 0.7625, "step": 5002 }, { "epoch": 0.9598311710112952, "grad_norm": 1.0324278947795915, "learning_rate": 8.423824717154638e-08, "loss": 0.6601, "step": 5003 }, { "epoch": 0.9600230221348234, "grad_norm": 0.9539964454571971, "learning_rate": 8.343518819045714e-08, "loss": 0.8157, "step": 5004 }, { "epoch": 0.9602148732583515, "grad_norm": 1.0080603704200672, "learning_rate": 8.263595938479806e-08, "loss": 0.7458, "step": 5005 }, { "epoch": 0.9604067243818797, "grad_norm": 1.088613906660262, "learning_rate": 8.184056106326666e-08, "loss": 0.7342, "step": 5006 }, { "epoch": 0.9605985755054078, "grad_norm": 0.9212360898170455, "learning_rate": 8.104899353307604e-08, "loss": 0.7804, "step": 5007 }, { "epoch": 0.960790426628936, "grad_norm": 1.1814361789774097, "learning_rate": 8.026125709996058e-08, "loss": 0.764, "step": 5008 }, { "epoch": 0.9609822777524641, "grad_norm": 1.0258785085415372, "learning_rate": 7.947735206817575e-08, "loss": 0.6782, "step": 5009 }, { "epoch": 0.9611741288759922, "grad_norm": 1.0052030971298926, "learning_rate": 7.869727874049826e-08, "loss": 0.8119, "step": 5010 }, { "epoch": 0.9613659799995203, "grad_norm": 0.9510611140354365, "learning_rate": 7.792103741822376e-08, "loss": 0.7446, "step": 5011 }, { "epoch": 0.9615578311230485, "grad_norm": 0.8701239592858644, "learning_rate": 7.714862840116689e-08, "loss": 0.7138, "step": 5012 }, { "epoch": 0.9617496822465766, "grad_norm": 0.9365568787110109, "learning_rate": 7.63800519876634e-08, "loss": 0.7374, "step": 5013 }, { "epoch": 0.9619415333701048, "grad_norm": 0.8970319746664738, "learning_rate": 7.561530847456921e-08, "loss": 0.7694, "step": 5014 }, { "epoch": 0.962133384493633, "grad_norm": 0.6242936055913335, "learning_rate": 7.485439815725693e-08, "loss": 0.3092, "step": 5015 }, { "epoch": 0.9623252356171611, "grad_norm": 0.9239365960242756, "learning_rate": 7.409732132962255e-08, "loss": 0.7853, "step": 5016 }, { "epoch": 0.9625170867406893, "grad_norm": 0.9659720555625848, "learning_rate": 7.334407828407885e-08, "loss": 0.6658, "step": 5017 }, { "epoch": 0.9627089378642174, "grad_norm": 0.832831980523722, "learning_rate": 7.259466931155868e-08, "loss": 0.6962, "step": 5018 }, { "epoch": 0.9629007889877456, "grad_norm": 0.8878982929324196, "learning_rate": 7.18490947015138e-08, "loss": 0.7221, "step": 5019 }, { "epoch": 0.9630926401112736, "grad_norm": 0.9419422489711899, "learning_rate": 7.1107354741915e-08, "loss": 0.7224, "step": 5020 }, { "epoch": 0.9632844912348018, "grad_norm": 1.1430419091911506, "learning_rate": 7.036944971925197e-08, "loss": 0.6847, "step": 5021 }, { "epoch": 0.9634763423583299, "grad_norm": 0.931387743624851, "learning_rate": 6.963537991853342e-08, "loss": 0.7737, "step": 5022 }, { "epoch": 0.9636681934818581, "grad_norm": 0.9982169019607844, "learning_rate": 6.890514562328699e-08, "loss": 0.6813, "step": 5023 }, { "epoch": 0.9638600446053862, "grad_norm": 0.6300435849287147, "learning_rate": 6.817874711555705e-08, "loss": 0.3182, "step": 5024 }, { "epoch": 0.9640518957289144, "grad_norm": 0.9935665731262189, "learning_rate": 6.74561846759092e-08, "loss": 0.779, "step": 5025 }, { "epoch": 0.9642437468524425, "grad_norm": 0.9780105280595277, "learning_rate": 6.673745858342572e-08, "loss": 0.7262, "step": 5026 }, { "epoch": 0.9644355979759707, "grad_norm": 1.0467770238492358, "learning_rate": 6.602256911570792e-08, "loss": 0.7929, "step": 5027 }, { "epoch": 0.9646274490994988, "grad_norm": 1.0928062098732065, "learning_rate": 6.53115165488738e-08, "loss": 0.6756, "step": 5028 }, { "epoch": 0.964819300223027, "grad_norm": 0.93855771086697, "learning_rate": 6.460430115756034e-08, "loss": 0.8036, "step": 5029 }, { "epoch": 0.965011151346555, "grad_norm": 0.8473304704725216, "learning_rate": 6.390092321492348e-08, "loss": 0.7596, "step": 5030 }, { "epoch": 0.9652030024700832, "grad_norm": 0.6101796987417812, "learning_rate": 6.320138299263589e-08, "loss": 0.3362, "step": 5031 }, { "epoch": 0.9653948535936113, "grad_norm": 1.0277751301598776, "learning_rate": 6.250568076088814e-08, "loss": 0.7548, "step": 5032 }, { "epoch": 0.9655867047171395, "grad_norm": 0.9328871959314691, "learning_rate": 6.181381678838749e-08, "loss": 0.7912, "step": 5033 }, { "epoch": 0.9657785558406676, "grad_norm": 1.1060831613527076, "learning_rate": 6.112579134236018e-08, "loss": 0.7095, "step": 5034 }, { "epoch": 0.9659704069641958, "grad_norm": 0.8975861363739339, "learning_rate": 6.044160468855032e-08, "loss": 0.7618, "step": 5035 }, { "epoch": 0.9661622580877239, "grad_norm": 1.0864138006629604, "learning_rate": 5.97612570912165e-08, "loss": 0.7161, "step": 5036 }, { "epoch": 0.9663541092112521, "grad_norm": 0.9735986276084805, "learning_rate": 5.9084748813136305e-08, "loss": 0.7339, "step": 5037 }, { "epoch": 0.9665459603347802, "grad_norm": 0.882848951400633, "learning_rate": 5.8412080115605174e-08, "loss": 0.7107, "step": 5038 }, { "epoch": 0.9667378114583084, "grad_norm": 1.0763181596949973, "learning_rate": 5.774325125843527e-08, "loss": 0.8067, "step": 5039 }, { "epoch": 0.9669296625818365, "grad_norm": 0.6377105587397659, "learning_rate": 5.7078262499953297e-08, "loss": 0.3203, "step": 5040 }, { "epoch": 0.9671215137053646, "grad_norm": 1.0341890341586253, "learning_rate": 5.641711409700712e-08, "loss": 0.7487, "step": 5041 }, { "epoch": 0.9673133648288927, "grad_norm": 0.8679641360056821, "learning_rate": 5.575980630495692e-08, "loss": 0.8139, "step": 5042 }, { "epoch": 0.9675052159524209, "grad_norm": 1.0688559197520984, "learning_rate": 5.5106339377681836e-08, "loss": 0.8198, "step": 5043 }, { "epoch": 0.967697067075949, "grad_norm": 0.9460094244570538, "learning_rate": 5.445671356757776e-08, "loss": 0.7511, "step": 5044 }, { "epoch": 0.9678889181994772, "grad_norm": 0.8730612453830444, "learning_rate": 5.381092912555508e-08, "loss": 0.6842, "step": 5045 }, { "epoch": 0.9680807693230054, "grad_norm": 0.9634798439239664, "learning_rate": 5.316898630104206e-08, "loss": 0.7164, "step": 5046 }, { "epoch": 0.9682726204465335, "grad_norm": 1.0350619731895523, "learning_rate": 5.2530885341982586e-08, "loss": 0.6996, "step": 5047 }, { "epoch": 0.9684644715700617, "grad_norm": 1.0245706634074996, "learning_rate": 5.18966264948384e-08, "loss": 0.7866, "step": 5048 }, { "epoch": 0.9686563226935898, "grad_norm": 1.0254669688191185, "learning_rate": 5.126621000458354e-08, "loss": 0.7405, "step": 5049 }, { "epoch": 0.968848173817118, "grad_norm": 0.8354969027090319, "learning_rate": 5.063963611471212e-08, "loss": 0.7593, "step": 5050 }, { "epoch": 0.969040024940646, "grad_norm": 0.9157172935984395, "learning_rate": 5.001690506723056e-08, "loss": 0.7528, "step": 5051 }, { "epoch": 0.9692318760641742, "grad_norm": 1.0564342754280742, "learning_rate": 4.939801710266312e-08, "loss": 0.752, "step": 5052 }, { "epoch": 0.9694237271877023, "grad_norm": 0.6364695180254987, "learning_rate": 4.878297246004859e-08, "loss": 0.3288, "step": 5053 }, { "epoch": 0.9696155783112305, "grad_norm": 0.9944644280535284, "learning_rate": 4.8171771376943624e-08, "loss": 0.7757, "step": 5054 }, { "epoch": 0.9698074294347586, "grad_norm": 0.8988410661499459, "learning_rate": 4.756441408941715e-08, "loss": 0.7573, "step": 5055 }, { "epoch": 0.9699992805582868, "grad_norm": 0.6331569039937135, "learning_rate": 4.6960900832054844e-08, "loss": 0.3107, "step": 5056 }, { "epoch": 0.9701911316818149, "grad_norm": 0.9107233241199391, "learning_rate": 4.636123183795915e-08, "loss": 0.8028, "step": 5057 }, { "epoch": 0.9703829828053431, "grad_norm": 1.1615288395839212, "learning_rate": 4.5765407338745907e-08, "loss": 0.7619, "step": 5058 }, { "epoch": 0.9705748339288712, "grad_norm": 0.8762257452559253, "learning_rate": 4.5173427564544347e-08, "loss": 0.6944, "step": 5059 }, { "epoch": 0.9707666850523994, "grad_norm": 0.950965149448045, "learning_rate": 4.45852927440038e-08, "loss": 0.726, "step": 5060 }, { "epoch": 0.9709585361759275, "grad_norm": 1.0271553445050448, "learning_rate": 4.400100310428368e-08, "loss": 0.7645, "step": 5061 }, { "epoch": 0.9711503872994556, "grad_norm": 1.049843314545689, "learning_rate": 4.3420558871060116e-08, "loss": 0.6746, "step": 5062 }, { "epoch": 0.9713422384229837, "grad_norm": 0.9023772235787549, "learning_rate": 4.2843960268524886e-08, "loss": 0.7939, "step": 5063 }, { "epoch": 0.9715340895465119, "grad_norm": 0.9564187301853907, "learning_rate": 4.2271207519383184e-08, "loss": 0.6753, "step": 5064 }, { "epoch": 0.97172594067004, "grad_norm": 0.8913124571277358, "learning_rate": 4.170230084485582e-08, "loss": 0.731, "step": 5065 }, { "epoch": 0.9719177917935682, "grad_norm": 0.9958671450651237, "learning_rate": 4.113724046467593e-08, "loss": 0.7702, "step": 5066 }, { "epoch": 0.9721096429170963, "grad_norm": 1.000566335463333, "learning_rate": 4.057602659709225e-08, "loss": 0.81, "step": 5067 }, { "epoch": 0.9723014940406245, "grad_norm": 0.90812492430275, "learning_rate": 4.001865945887029e-08, "loss": 0.7487, "step": 5068 }, { "epoch": 0.9724933451641526, "grad_norm": 1.0650790652197057, "learning_rate": 3.9465139265285615e-08, "loss": 0.7225, "step": 5069 }, { "epoch": 0.9726851962876808, "grad_norm": 0.8921258613836603, "learning_rate": 3.891546623013054e-08, "loss": 0.7809, "step": 5070 }, { "epoch": 0.972877047411209, "grad_norm": 0.8817304053483987, "learning_rate": 3.836964056571191e-08, "loss": 0.7559, "step": 5071 }, { "epoch": 0.973068898534737, "grad_norm": 0.9581428202636746, "learning_rate": 3.782766248284775e-08, "loss": 0.8129, "step": 5072 }, { "epoch": 0.9732607496582651, "grad_norm": 1.0362227666843395, "learning_rate": 3.7289532190873946e-08, "loss": 0.7907, "step": 5073 }, { "epoch": 0.9734526007817933, "grad_norm": 0.919887354589195, "learning_rate": 3.675524989763535e-08, "loss": 0.7501, "step": 5074 }, { "epoch": 0.9736444519053215, "grad_norm": 0.9868559663449292, "learning_rate": 3.622481580949466e-08, "loss": 0.7766, "step": 5075 }, { "epoch": 0.9738363030288496, "grad_norm": 0.9168959121720324, "learning_rate": 3.5698230131326893e-08, "loss": 0.7392, "step": 5076 }, { "epoch": 0.9740281541523778, "grad_norm": 0.9705589113445989, "learning_rate": 3.517549306652157e-08, "loss": 0.7667, "step": 5077 }, { "epoch": 0.9742200052759059, "grad_norm": 0.9068442386290337, "learning_rate": 3.46566048169783e-08, "loss": 0.6758, "step": 5078 }, { "epoch": 0.9744118563994341, "grad_norm": 0.8658310641943852, "learning_rate": 3.4141565583114547e-08, "loss": 0.7784, "step": 5079 }, { "epoch": 0.9746037075229622, "grad_norm": 0.9552454682342967, "learning_rate": 3.3630375563857843e-08, "loss": 0.7656, "step": 5080 }, { "epoch": 0.9747955586464904, "grad_norm": 0.8179538864384919, "learning_rate": 3.3123034956650256e-08, "loss": 0.7228, "step": 5081 }, { "epoch": 0.9749874097700185, "grad_norm": 0.8648652865740444, "learning_rate": 3.261954395744948e-08, "loss": 0.7744, "step": 5082 }, { "epoch": 0.9751792608935466, "grad_norm": 0.8998874397438315, "learning_rate": 3.2119902760719967e-08, "loss": 0.7392, "step": 5083 }, { "epoch": 0.9753711120170747, "grad_norm": 0.9773169263319926, "learning_rate": 3.162411155944733e-08, "loss": 0.7581, "step": 5084 }, { "epoch": 0.9755629631406029, "grad_norm": 0.9228514321918375, "learning_rate": 3.1132170545122855e-08, "loss": 0.6974, "step": 5085 }, { "epoch": 0.975754814264131, "grad_norm": 1.1913223242853173, "learning_rate": 3.0644079907756754e-08, "loss": 0.7637, "step": 5086 }, { "epoch": 0.9759466653876592, "grad_norm": 0.9654631467402065, "learning_rate": 3.0159839835866014e-08, "loss": 0.7011, "step": 5087 }, { "epoch": 0.9761385165111873, "grad_norm": 0.8687941879367147, "learning_rate": 2.9679450516485465e-08, "loss": 0.7243, "step": 5088 }, { "epoch": 0.9763303676347155, "grad_norm": 0.9941875493341079, "learning_rate": 2.9202912135161132e-08, "loss": 0.7644, "step": 5089 }, { "epoch": 0.9765222187582436, "grad_norm": 0.7969547254657827, "learning_rate": 2.873022487595134e-08, "loss": 0.7596, "step": 5090 }, { "epoch": 0.9767140698817718, "grad_norm": 0.887213475127967, "learning_rate": 2.826138892142449e-08, "loss": 0.76, "step": 5091 }, { "epoch": 0.9769059210052999, "grad_norm": 0.6107664376871738, "learning_rate": 2.7796404452666847e-08, "loss": 0.3173, "step": 5092 }, { "epoch": 0.977097772128828, "grad_norm": 0.8837670588577455, "learning_rate": 2.7335271649272522e-08, "loss": 0.7371, "step": 5093 }, { "epoch": 0.9772896232523561, "grad_norm": 0.9095560284241865, "learning_rate": 2.6877990689350152e-08, "loss": 0.8239, "step": 5094 }, { "epoch": 0.9774814743758843, "grad_norm": 1.0620516370744904, "learning_rate": 2.6424561749518464e-08, "loss": 0.7631, "step": 5095 }, { "epoch": 0.9776733254994124, "grad_norm": 0.9157823092681671, "learning_rate": 2.5974985004911803e-08, "loss": 0.7182, "step": 5096 }, { "epoch": 0.9778651766229406, "grad_norm": 0.9298120368091539, "learning_rate": 2.55292606291746e-08, "loss": 0.8391, "step": 5097 }, { "epoch": 0.9780570277464687, "grad_norm": 1.0115147397639332, "learning_rate": 2.5087388794461377e-08, "loss": 0.7262, "step": 5098 }, { "epoch": 0.9782488788699969, "grad_norm": 1.0199129606533401, "learning_rate": 2.464936967144338e-08, "loss": 0.6625, "step": 5099 }, { "epoch": 0.978440729993525, "grad_norm": 0.9452060358135717, "learning_rate": 2.4215203429299727e-08, "loss": 0.7764, "step": 5100 }, { "epoch": 0.9786325811170532, "grad_norm": 0.9265939662866387, "learning_rate": 2.3784890235724057e-08, "loss": 0.6632, "step": 5101 }, { "epoch": 0.9788244322405814, "grad_norm": 0.8738107660213543, "learning_rate": 2.3358430256918974e-08, "loss": 0.7412, "step": 5102 }, { "epoch": 0.9790162833641094, "grad_norm": 0.9119615629208895, "learning_rate": 2.2935823657601606e-08, "loss": 0.7269, "step": 5103 }, { "epoch": 0.9792081344876375, "grad_norm": 0.9247439482617181, "learning_rate": 2.251707060099917e-08, "loss": 0.7253, "step": 5104 }, { "epoch": 0.9793999856111657, "grad_norm": 0.9045901172552959, "learning_rate": 2.2102171248851166e-08, "loss": 0.7566, "step": 5105 }, { "epoch": 0.9795918367346939, "grad_norm": 0.861269602898132, "learning_rate": 2.1691125761408305e-08, "loss": 0.7554, "step": 5106 }, { "epoch": 0.979783687858222, "grad_norm": 0.9143603910775665, "learning_rate": 2.1283934297432472e-08, "loss": 0.7607, "step": 5107 }, { "epoch": 0.9799755389817502, "grad_norm": 0.8895204976304845, "learning_rate": 2.0880597014197868e-08, "loss": 0.7103, "step": 5108 }, { "epoch": 0.9801673901052783, "grad_norm": 0.8946799405365556, "learning_rate": 2.048111406748876e-08, "loss": 0.7475, "step": 5109 }, { "epoch": 0.9803592412288065, "grad_norm": 0.8832523686661224, "learning_rate": 2.008548561160284e-08, "loss": 0.7356, "step": 5110 }, { "epoch": 0.9805510923523346, "grad_norm": 0.965284851623327, "learning_rate": 1.969371179934676e-08, "loss": 0.7366, "step": 5111 }, { "epoch": 0.9807429434758628, "grad_norm": 0.9605492521086987, "learning_rate": 1.9305792782039478e-08, "loss": 0.782, "step": 5112 }, { "epoch": 0.9809347945993909, "grad_norm": 1.0079694881381478, "learning_rate": 1.8921728709510033e-08, "loss": 0.7537, "step": 5113 }, { "epoch": 0.981126645722919, "grad_norm": 0.9538743261172926, "learning_rate": 1.8541519730100876e-08, "loss": 0.7501, "step": 5114 }, { "epoch": 0.9813184968464471, "grad_norm": 0.9570287814199591, "learning_rate": 1.8165165990663425e-08, "loss": 0.7613, "step": 5115 }, { "epoch": 0.9815103479699753, "grad_norm": 1.156783052995573, "learning_rate": 1.779266763656029e-08, "loss": 0.7545, "step": 5116 }, { "epoch": 0.9817021990935034, "grad_norm": 1.0188368499318332, "learning_rate": 1.7424024811665274e-08, "loss": 0.7419, "step": 5117 }, { "epoch": 0.9818940502170316, "grad_norm": 0.8174358263175583, "learning_rate": 1.705923765836337e-08, "loss": 0.7018, "step": 5118 }, { "epoch": 0.9820859013405597, "grad_norm": 0.8702884345379216, "learning_rate": 1.669830631754965e-08, "loss": 0.7617, "step": 5119 }, { "epoch": 0.9822777524640879, "grad_norm": 0.9296219050844853, "learning_rate": 1.634123092863149e-08, "loss": 0.7926, "step": 5120 }, { "epoch": 0.982469603587616, "grad_norm": 1.0478262857230705, "learning_rate": 1.5988011629524125e-08, "loss": 0.7813, "step": 5121 }, { "epoch": 0.9826614547111442, "grad_norm": 0.9522511900713212, "learning_rate": 1.5638648556656198e-08, "loss": 0.7418, "step": 5122 }, { "epoch": 0.9828533058346723, "grad_norm": 1.0170498620161428, "learning_rate": 1.529314184496422e-08, "loss": 0.838, "step": 5123 }, { "epoch": 0.9830451569582004, "grad_norm": 0.9598354720396661, "learning_rate": 1.4951491627899218e-08, "loss": 0.8115, "step": 5124 }, { "epoch": 0.9832370080817285, "grad_norm": 0.8688541669601042, "learning_rate": 1.4613698037417857e-08, "loss": 0.7722, "step": 5125 }, { "epoch": 0.9834288592052567, "grad_norm": 0.9574782577430787, "learning_rate": 1.4279761203990216e-08, "loss": 0.7504, "step": 5126 }, { "epoch": 0.9836207103287848, "grad_norm": 0.9468641018566081, "learning_rate": 1.3949681256597569e-08, "loss": 0.8025, "step": 5127 }, { "epoch": 0.983812561452313, "grad_norm": 0.829114493750586, "learning_rate": 1.3623458322727933e-08, "loss": 0.7667, "step": 5128 }, { "epoch": 0.9840044125758411, "grad_norm": 0.9543178140566986, "learning_rate": 1.3301092528382741e-08, "loss": 0.7821, "step": 5129 }, { "epoch": 0.9841962636993693, "grad_norm": 1.0088716967718465, "learning_rate": 1.2982583998072395e-08, "loss": 0.6872, "step": 5130 }, { "epoch": 0.9843881148228975, "grad_norm": 0.9306053763436613, "learning_rate": 1.2667932854818488e-08, "loss": 0.8098, "step": 5131 }, { "epoch": 0.9845799659464256, "grad_norm": 0.928458720728488, "learning_rate": 1.2357139220150471e-08, "loss": 0.723, "step": 5132 }, { "epoch": 0.9847718170699538, "grad_norm": 0.85836808718982, "learning_rate": 1.205020321411121e-08, "loss": 0.6684, "step": 5133 }, { "epoch": 0.9849636681934819, "grad_norm": 0.8721896403370119, "learning_rate": 1.1747124955249212e-08, "loss": 0.7354, "step": 5134 }, { "epoch": 0.98515551931701, "grad_norm": 0.9783029187479191, "learning_rate": 1.1447904560627499e-08, "loss": 0.7133, "step": 5135 }, { "epoch": 0.9853473704405381, "grad_norm": 0.8550754982463011, "learning_rate": 1.1152542145816959e-08, "loss": 0.7432, "step": 5136 }, { "epoch": 0.9855392215640663, "grad_norm": 0.6386275366741111, "learning_rate": 1.0861037824896337e-08, "loss": 0.2991, "step": 5137 }, { "epoch": 0.9857310726875944, "grad_norm": 0.923530621084618, "learning_rate": 1.0573391710458902e-08, "loss": 0.7696, "step": 5138 }, { "epoch": 0.9859229238111226, "grad_norm": 0.843944212375577, "learning_rate": 1.0289603913603563e-08, "loss": 0.7506, "step": 5139 }, { "epoch": 0.9861147749346507, "grad_norm": 1.0919076282086986, "learning_rate": 1.0009674543941527e-08, "loss": 0.7206, "step": 5140 }, { "epoch": 0.9863066260581789, "grad_norm": 0.9202241394057024, "learning_rate": 9.733603709591866e-09, "loss": 0.7582, "step": 5141 }, { "epoch": 0.986498477181707, "grad_norm": 0.9248190717072972, "learning_rate": 9.461391517183726e-09, "loss": 0.7415, "step": 5142 }, { "epoch": 0.9866903283052352, "grad_norm": 0.8457205128780346, "learning_rate": 9.193038071858562e-09, "loss": 0.7141, "step": 5143 }, { "epoch": 0.9868821794287633, "grad_norm": 0.9548786374692431, "learning_rate": 8.928543477263462e-09, "loss": 0.7559, "step": 5144 }, { "epoch": 0.9870740305522914, "grad_norm": 0.6678137557059638, "learning_rate": 8.66790783555671e-09, "loss": 0.322, "step": 5145 }, { "epoch": 0.9872658816758195, "grad_norm": 0.8318838578971253, "learning_rate": 8.411131247407778e-09, "loss": 0.6701, "step": 5146 }, { "epoch": 0.9874577327993477, "grad_norm": 0.6359719356111682, "learning_rate": 8.158213811991778e-09, "loss": 0.3182, "step": 5147 }, { "epoch": 0.9876495839228758, "grad_norm": 0.9877805165725566, "learning_rate": 7.909155626998345e-09, "loss": 0.7339, "step": 5148 }, { "epoch": 0.987841435046404, "grad_norm": 1.0024616498862065, "learning_rate": 7.66395678862164e-09, "loss": 0.6888, "step": 5149 }, { "epoch": 0.9880332861699321, "grad_norm": 0.9934665835190029, "learning_rate": 7.422617391569242e-09, "loss": 0.747, "step": 5150 }, { "epoch": 0.9882251372934603, "grad_norm": 0.9939072704788143, "learning_rate": 7.185137529053254e-09, "loss": 0.6966, "step": 5151 }, { "epoch": 0.9884169884169884, "grad_norm": 0.9322321546688217, "learning_rate": 6.951517292800303e-09, "loss": 0.7795, "step": 5152 }, { "epoch": 0.9886088395405166, "grad_norm": 0.9405825825636809, "learning_rate": 6.721756773043764e-09, "loss": 0.7793, "step": 5153 }, { "epoch": 0.9888006906640447, "grad_norm": 0.8516612851743015, "learning_rate": 6.495856058524874e-09, "loss": 0.7579, "step": 5154 }, { "epoch": 0.9889925417875729, "grad_norm": 1.1395078333223392, "learning_rate": 6.273815236496062e-09, "loss": 0.8272, "step": 5155 }, { "epoch": 0.9891843929111009, "grad_norm": 0.8258333300944677, "learning_rate": 6.0556343927198336e-09, "loss": 0.7279, "step": 5156 }, { "epoch": 0.9893762440346291, "grad_norm": 0.8489054386652506, "learning_rate": 5.841313611465449e-09, "loss": 0.7175, "step": 5157 }, { "epoch": 0.9895680951581572, "grad_norm": 0.8882036652494529, "learning_rate": 5.630852975511136e-09, "loss": 0.7774, "step": 5158 }, { "epoch": 0.9897599462816854, "grad_norm": 1.0159049734797188, "learning_rate": 5.4242525661474256e-09, "loss": 0.6871, "step": 5159 }, { "epoch": 0.9899517974052136, "grad_norm": 1.6112923339477616, "learning_rate": 5.221512463169376e-09, "loss": 0.779, "step": 5160 }, { "epoch": 0.9901436485287417, "grad_norm": 0.9263857773633355, "learning_rate": 5.022632744885458e-09, "loss": 0.7803, "step": 5161 }, { "epoch": 0.9903354996522699, "grad_norm": 0.951182335965817, "learning_rate": 4.827613488109784e-09, "loss": 0.7047, "step": 5162 }, { "epoch": 0.990527350775798, "grad_norm": 0.8707575226658536, "learning_rate": 4.636454768166543e-09, "loss": 0.7123, "step": 5163 }, { "epoch": 0.9907192018993262, "grad_norm": 0.9773881849051165, "learning_rate": 4.449156658888898e-09, "loss": 0.7539, "step": 5164 }, { "epoch": 0.9909110530228543, "grad_norm": 0.9677777253690908, "learning_rate": 4.265719232620091e-09, "loss": 0.8022, "step": 5165 }, { "epoch": 0.9911029041463824, "grad_norm": 1.1361653738054198, "learning_rate": 4.086142560209005e-09, "loss": 0.7541, "step": 5166 }, { "epoch": 0.9912947552699105, "grad_norm": 1.0589368238902108, "learning_rate": 3.9104267110168235e-09, "loss": 0.8218, "step": 5167 }, { "epoch": 0.9914866063934387, "grad_norm": 0.9381631059551055, "learning_rate": 3.73857175291259e-09, "loss": 0.6712, "step": 5168 }, { "epoch": 0.9916784575169668, "grad_norm": 0.5969818128356249, "learning_rate": 3.5705777522720976e-09, "loss": 0.3093, "step": 5169 }, { "epoch": 0.991870308640495, "grad_norm": 0.9749805096024323, "learning_rate": 3.4064447739823312e-09, "loss": 0.7113, "step": 5170 }, { "epoch": 0.9920621597640231, "grad_norm": 0.9660195283892082, "learning_rate": 3.2461728814370263e-09, "loss": 0.7926, "step": 5171 }, { "epoch": 0.9922540108875513, "grad_norm": 1.0113571540116626, "learning_rate": 3.0897621365411078e-09, "loss": 0.7177, "step": 5172 }, { "epoch": 0.9924458620110794, "grad_norm": 1.1335733551121112, "learning_rate": 2.9372125997051416e-09, "loss": 0.743, "step": 5173 }, { "epoch": 0.9926377131346076, "grad_norm": 0.9058276407902832, "learning_rate": 2.7885243298508836e-09, "loss": 0.7554, "step": 5174 }, { "epoch": 0.9928295642581357, "grad_norm": 0.970104332593938, "learning_rate": 2.6436973844079506e-09, "loss": 0.7487, "step": 5175 }, { "epoch": 0.9930214153816638, "grad_norm": 0.9497690329437419, "learning_rate": 2.5027318193138193e-09, "loss": 0.7792, "step": 5176 }, { "epoch": 0.9932132665051919, "grad_norm": 0.935382615756855, "learning_rate": 2.3656276890138275e-09, "loss": 0.7217, "step": 5177 }, { "epoch": 0.9934051176287201, "grad_norm": 0.8011156861163393, "learning_rate": 2.232385046465613e-09, "loss": 0.7077, "step": 5178 }, { "epoch": 0.9935969687522482, "grad_norm": 0.91596868116725, "learning_rate": 2.103003943131343e-09, "loss": 0.8222, "step": 5179 }, { "epoch": 0.9937888198757764, "grad_norm": 0.9117320910896631, "learning_rate": 1.9774844289832674e-09, "loss": 0.7688, "step": 5180 }, { "epoch": 0.9939806709993045, "grad_norm": 0.8885690811178895, "learning_rate": 1.855826552501494e-09, "loss": 0.8092, "step": 5181 }, { "epoch": 0.9941725221228327, "grad_norm": 0.938884342016526, "learning_rate": 1.738030360677323e-09, "loss": 0.7409, "step": 5182 }, { "epoch": 0.9943643732463608, "grad_norm": 0.9349569542209606, "learning_rate": 1.6240958990054735e-09, "loss": 0.8093, "step": 5183 }, { "epoch": 0.994556224369889, "grad_norm": 1.0477217562132781, "learning_rate": 1.5140232114940757e-09, "loss": 0.7086, "step": 5184 }, { "epoch": 0.9947480754934171, "grad_norm": 0.9377557877537065, "learning_rate": 1.4078123406569e-09, "loss": 0.736, "step": 5185 }, { "epoch": 0.9949399266169453, "grad_norm": 0.8826711458357044, "learning_rate": 1.3054633275177976e-09, "loss": 0.7042, "step": 5186 }, { "epoch": 0.9951317777404733, "grad_norm": 0.9008393249972686, "learning_rate": 1.2069762116062588e-09, "loss": 0.6755, "step": 5187 }, { "epoch": 0.9953236288640015, "grad_norm": 0.9621732662471135, "learning_rate": 1.1123510309629659e-09, "loss": 0.8038, "step": 5188 }, { "epoch": 0.9955154799875297, "grad_norm": 0.9725073176479364, "learning_rate": 1.0215878221364605e-09, "loss": 0.7411, "step": 5189 }, { "epoch": 0.9957073311110578, "grad_norm": 0.9411301779469801, "learning_rate": 9.346866201820348e-10, "loss": 0.7525, "step": 5190 }, { "epoch": 0.995899182234586, "grad_norm": 1.0519041296560832, "learning_rate": 8.516474586650614e-10, "loss": 0.7651, "step": 5191 }, { "epoch": 0.9960910333581141, "grad_norm": 0.8399595483148046, "learning_rate": 7.724703696587732e-10, "loss": 0.7526, "step": 5192 }, { "epoch": 0.9962828844816423, "grad_norm": 0.8384571699895955, "learning_rate": 6.971553837442635e-10, "loss": 0.7385, "step": 5193 }, { "epoch": 0.9964747356051704, "grad_norm": 0.8925202268692175, "learning_rate": 6.25702530011596e-10, "loss": 0.7689, "step": 5194 }, { "epoch": 0.9966665867286986, "grad_norm": 0.9268970665016741, "learning_rate": 5.581118360575844e-10, "loss": 0.7485, "step": 5195 }, { "epoch": 0.9968584378522267, "grad_norm": 0.8216556311135342, "learning_rate": 4.94383327989123e-10, "loss": 0.7296, "step": 5196 }, { "epoch": 0.9970502889757548, "grad_norm": 0.8057303706764305, "learning_rate": 4.3451703042207694e-10, "loss": 0.7919, "step": 5197 }, { "epoch": 0.9972421400992829, "grad_norm": 0.9301644281272664, "learning_rate": 3.7851296647684056e-10, "loss": 0.7793, "step": 5198 }, { "epoch": 0.9974339912228111, "grad_norm": 0.9728377195843223, "learning_rate": 3.263711577861095e-10, "loss": 0.7585, "step": 5199 }, { "epoch": 0.9976258423463392, "grad_norm": 0.986498534885138, "learning_rate": 2.7809162448821924e-10, "loss": 0.7519, "step": 5200 }, { "epoch": 0.9978176934698674, "grad_norm": 0.8674199821313563, "learning_rate": 2.3367438523158593e-10, "loss": 0.7885, "step": 5201 }, { "epoch": 0.9980095445933955, "grad_norm": 1.1717875565642886, "learning_rate": 1.931194571713757e-10, "loss": 0.7743, "step": 5202 }, { "epoch": 0.9982013957169237, "grad_norm": 0.8536807351357553, "learning_rate": 1.5642685597061502e-10, "loss": 0.7423, "step": 5203 }, { "epoch": 0.9983932468404518, "grad_norm": 0.823963198060883, "learning_rate": 1.2359659580352123e-10, "loss": 0.7293, "step": 5204 }, { "epoch": 0.99858509796398, "grad_norm": 0.937227101647225, "learning_rate": 9.462868934884128e-11, "loss": 0.7069, "step": 5205 }, { "epoch": 0.9987769490875081, "grad_norm": 0.9293741561525918, "learning_rate": 6.952314779540281e-11, "loss": 0.6783, "step": 5206 }, { "epoch": 0.9989688002110363, "grad_norm": 1.2224619426209486, "learning_rate": 4.827998084100394e-11, "loss": 0.7559, "step": 5207 }, { "epoch": 0.9991606513345643, "grad_norm": 0.9721997969390358, "learning_rate": 3.089919668908259e-11, "loss": 0.7108, "step": 5208 }, { "epoch": 0.9993525024580925, "grad_norm": 1.0545582113395322, "learning_rate": 1.73808020531574e-11, "loss": 0.7447, "step": 5209 }, { "epoch": 0.9995443535816206, "grad_norm": 0.9523123061249145, "learning_rate": 7.72480215571747e-12, "loss": 0.7709, "step": 5210 }, { "epoch": 0.9997362047051488, "grad_norm": 0.9208358431801195, "learning_rate": 1.9312007248917243e-12, "loss": 0.7468, "step": 5211 }, { "epoch": 0.9999280558286769, "grad_norm": 2.3350937483655314, "learning_rate": 0.0, "loss": 0.4419, "step": 5212 }, { "epoch": 0.9999280558286769, "step": 5212, "total_flos": 8015138862923776.0, "train_loss": 0.7756117214566685, "train_runtime": 39409.1729, "train_samples_per_second": 16.929, "train_steps_per_second": 0.132 } ], "logging_steps": 1.0, "max_steps": 5212, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8015138862923776.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }