| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9995664614584236, | |
| "eval_steps": 500, | |
| "global_step": 1441, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0006936616665221538, | |
| "grad_norm": 43.466719125161504, | |
| "learning_rate": 0.0, | |
| "loss": 2.7095, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0013873233330443076, | |
| "grad_norm": 6.96466711942635, | |
| "learning_rate": 2.5595802480981545e-06, | |
| "loss": 2.0266, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.0020809849995664614, | |
| "grad_norm": 5.728387691489433, | |
| "learning_rate": 4.056838710822129e-06, | |
| "loss": 2.0082, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 0.0027746466660886152, | |
| "grad_norm": 4.686811256036109, | |
| "learning_rate": 5.119160496196309e-06, | |
| "loss": 1.9589, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.003468308332610769, | |
| "grad_norm": 3.3529589095838874, | |
| "learning_rate": 5.943161289177871e-06, | |
| "loss": 1.9014, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.004161969999132923, | |
| "grad_norm": 3.5493873116106522, | |
| "learning_rate": 6.616418958920285e-06, | |
| "loss": 1.8478, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.004855631665655077, | |
| "grad_norm": 3.2248318636630753, | |
| "learning_rate": 7.185650207899778e-06, | |
| "loss": 1.8631, | |
| "step": 7 | |
| }, | |
| { | |
| "epoch": 0.0055492933321772304, | |
| "grad_norm": 2.5559675536274558, | |
| "learning_rate": 7.678740744294464e-06, | |
| "loss": 1.8771, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.006242954998699385, | |
| "grad_norm": 2.852602324925445, | |
| "learning_rate": 8.113677421644258e-06, | |
| "loss": 1.8517, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 0.006936616665221538, | |
| "grad_norm": 2.8475514523205687, | |
| "learning_rate": 8.502741537276027e-06, | |
| "loss": 1.8444, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.007630278331743692, | |
| "grad_norm": 2.4445595331823116, | |
| "learning_rate": 8.854692840710254e-06, | |
| "loss": 1.8173, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 0.008323939998265846, | |
| "grad_norm": 2.6236339242092606, | |
| "learning_rate": 9.175999207018439e-06, | |
| "loss": 1.797, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.009017601664787999, | |
| "grad_norm": 2.7480536817929178, | |
| "learning_rate": 9.471572411831843e-06, | |
| "loss": 1.7525, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 0.009711263331310154, | |
| "grad_norm": 2.8195069251274267, | |
| "learning_rate": 9.745230455997932e-06, | |
| "loss": 1.7421, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.010404924997832308, | |
| "grad_norm": 2.574781912417627, | |
| "learning_rate": 9.999999999999999e-06, | |
| "loss": 1.7563, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.011098586664354461, | |
| "grad_norm": 3.018787395539229, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7235, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.011792248330876614, | |
| "grad_norm": 2.7321515111278085, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7372, | |
| "step": 17 | |
| }, | |
| { | |
| "epoch": 0.01248590999739877, | |
| "grad_norm": 2.535876566743008, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7513, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.013179571663920923, | |
| "grad_norm": 2.503169241059693, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7598, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 0.013873233330443076, | |
| "grad_norm": 2.5731073684349752, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7947, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.01456689499696523, | |
| "grad_norm": 2.5424918030748245, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7304, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 0.015260556663487385, | |
| "grad_norm": 2.280575429076331, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6995, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.015954218330009536, | |
| "grad_norm": 2.493960760059663, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6744, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 0.01664787999653169, | |
| "grad_norm": 2.238029250091602, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7026, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.017341541663053846, | |
| "grad_norm": 2.2414764681865362, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6437, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.018035203329575998, | |
| "grad_norm": 2.3762425127817135, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7258, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.018728864996098153, | |
| "grad_norm": 2.471220653637295, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7413, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 0.01942252666262031, | |
| "grad_norm": 2.236115513685021, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7788, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.02011618832914246, | |
| "grad_norm": 2.1158621654066128, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6955, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 0.020809849995664615, | |
| "grad_norm": 2.438210038803027, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7435, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.021503511662186767, | |
| "grad_norm": 2.2049092103165484, | |
| "learning_rate": 1e-05, | |
| "loss": 1.7103, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 0.022197173328708922, | |
| "grad_norm": 1.992834120231878, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6941, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.022890834995231077, | |
| "grad_norm": 2.3021272374064767, | |
| "learning_rate": 1e-05, | |
| "loss": 1.667, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 0.02358449666175323, | |
| "grad_norm": 2.1119253371859426, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6849, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.024278158328275384, | |
| "grad_norm": 2.3243314568193445, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6466, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.02497181999479754, | |
| "grad_norm": 2.2531985009302176, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6459, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.02566548166131969, | |
| "grad_norm": 2.1844804086893777, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6794, | |
| "step": 37 | |
| }, | |
| { | |
| "epoch": 0.026359143327841845, | |
| "grad_norm": 2.2530966998426245, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6601, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.027052804994364, | |
| "grad_norm": 2.1421735569008686, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6335, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 0.027746466660886152, | |
| "grad_norm": 2.181725062044003, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6319, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.028440128327408307, | |
| "grad_norm": 2.193692520161033, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6363, | |
| "step": 41 | |
| }, | |
| { | |
| "epoch": 0.02913378999393046, | |
| "grad_norm": 2.1086393378123818, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6735, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.029827451660452614, | |
| "grad_norm": 2.0131860874790175, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5967, | |
| "step": 43 | |
| }, | |
| { | |
| "epoch": 0.03052111332697477, | |
| "grad_norm": 2.1758701969638, | |
| "learning_rate": 1e-05, | |
| "loss": 1.667, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.03121477499349692, | |
| "grad_norm": 2.10881747915129, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6002, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.03190843666001907, | |
| "grad_norm": 2.1929235643249707, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6109, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.03260209832654123, | |
| "grad_norm": 2.2382019559100503, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6309, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 0.03329575999306338, | |
| "grad_norm": 2.204542258967164, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6123, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.03398942165958554, | |
| "grad_norm": 2.1983829138121194, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6371, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 0.03468308332610769, | |
| "grad_norm": 2.010616851796152, | |
| "learning_rate": 1e-05, | |
| "loss": 1.648, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03537674499262985, | |
| "grad_norm": 2.109337350234141, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5939, | |
| "step": 51 | |
| }, | |
| { | |
| "epoch": 0.036070406659151996, | |
| "grad_norm": 2.5304099594684657, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5806, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.03676406832567415, | |
| "grad_norm": 2.159949455039585, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6898, | |
| "step": 53 | |
| }, | |
| { | |
| "epoch": 0.037457729992196306, | |
| "grad_norm": 2.3028114991190227, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6102, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.03815139165871846, | |
| "grad_norm": 2.137637210948916, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5976, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.03884505332524062, | |
| "grad_norm": 2.283279122337232, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6182, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.039538714991762765, | |
| "grad_norm": 2.352555898736503, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5764, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 0.04023237665828492, | |
| "grad_norm": 2.2640721232017578, | |
| "learning_rate": 1e-05, | |
| "loss": 1.606, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.040926038324807075, | |
| "grad_norm": 2.2046947158269052, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6621, | |
| "step": 59 | |
| }, | |
| { | |
| "epoch": 0.04161969999132923, | |
| "grad_norm": 2.2356515653560955, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5783, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.042313361657851385, | |
| "grad_norm": 2.0914853090761656, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6386, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 0.04300702332437353, | |
| "grad_norm": 2.0219313193549335, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5409, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.04370068499089569, | |
| "grad_norm": 2.0579915512686675, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6024, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 0.044394346657417844, | |
| "grad_norm": 2.1350362922236563, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5979, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.04508800832394, | |
| "grad_norm": 2.1574547660781493, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5968, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.045781669990462154, | |
| "grad_norm": 2.032927708489895, | |
| "learning_rate": 1e-05, | |
| "loss": 1.623, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.04647533165698431, | |
| "grad_norm": 2.310697906396082, | |
| "learning_rate": 1e-05, | |
| "loss": 1.58, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 0.04716899332350646, | |
| "grad_norm": 2.082166726223734, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5855, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.04786265499002861, | |
| "grad_norm": 2.1805698151273205, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5406, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 0.04855631665655077, | |
| "grad_norm": 2.1517290677451366, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5794, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.04924997832307292, | |
| "grad_norm": 2.2324944455303286, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5897, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 0.04994363998959508, | |
| "grad_norm": 2.117363841822313, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6344, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.050637301656117226, | |
| "grad_norm": 2.0840589709094055, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5593, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 0.05133096332263938, | |
| "grad_norm": 2.139635042522081, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5604, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.052024624989161536, | |
| "grad_norm": 1.9773852269088779, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5132, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.05271828665568369, | |
| "grad_norm": 2.2497342140467227, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5689, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.053411948322205846, | |
| "grad_norm": 2.1828363324950515, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5775, | |
| "step": 77 | |
| }, | |
| { | |
| "epoch": 0.054105609988728, | |
| "grad_norm": 2.397270079677964, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5876, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.05479927165525015, | |
| "grad_norm": 2.132037167524842, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5388, | |
| "step": 79 | |
| }, | |
| { | |
| "epoch": 0.055492933321772304, | |
| "grad_norm": 2.105011522745257, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5878, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05618659498829446, | |
| "grad_norm": 2.331199900780172, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6095, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 0.056880256654816615, | |
| "grad_norm": 2.0679915620822915, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5489, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.05757391832133877, | |
| "grad_norm": 2.1615822894325154, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6244, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 0.05826757998786092, | |
| "grad_norm": 1.979313705736586, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5881, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.05896124165438307, | |
| "grad_norm": 2.1629527862643063, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5775, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.05965490332090523, | |
| "grad_norm": 2.3681176161050956, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5714, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.06034856498742738, | |
| "grad_norm": 1.9456880551179383, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5516, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 0.06104222665394954, | |
| "grad_norm": 2.1286899349051343, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5368, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.06173588832047169, | |
| "grad_norm": 2.124774583311733, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5244, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 0.06242954998699384, | |
| "grad_norm": 2.255844207300473, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6468, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.063123211653516, | |
| "grad_norm": 2.253221543014536, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5459, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 0.06381687332003814, | |
| "grad_norm": 2.161784717901113, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5295, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.0645105349865603, | |
| "grad_norm": 2.100171711959203, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5432, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 0.06520419665308246, | |
| "grad_norm": 2.0472421305517776, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5722, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.06589785831960461, | |
| "grad_norm": 2.174127269142891, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5301, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.06659151998612677, | |
| "grad_norm": 2.302981491215993, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5495, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.06728518165264892, | |
| "grad_norm": 2.093167418727749, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5604, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 0.06797884331917108, | |
| "grad_norm": 2.0951541746401263, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5769, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.06867250498569323, | |
| "grad_norm": 1.9576369412503245, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6003, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 0.06936616665221539, | |
| "grad_norm": 2.3419839237311266, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4925, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07005982831873754, | |
| "grad_norm": 2.0633427358605774, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5698, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 0.0707534899852597, | |
| "grad_norm": 2.1113080337620795, | |
| "learning_rate": 1e-05, | |
| "loss": 1.593, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.07144715165178184, | |
| "grad_norm": 2.200709143893939, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5487, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 0.07214081331830399, | |
| "grad_norm": 2.115437173154835, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5184, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.07283447498482615, | |
| "grad_norm": 1.9592926067021565, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5824, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.0735281366513483, | |
| "grad_norm": 2.0541106318447366, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5265, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.07422179831787046, | |
| "grad_norm": 1.9381187865210794, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5628, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 0.07491545998439261, | |
| "grad_norm": 2.1035504116541084, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6001, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.07560912165091477, | |
| "grad_norm": 2.143235938125612, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5153, | |
| "step": 109 | |
| }, | |
| { | |
| "epoch": 0.07630278331743692, | |
| "grad_norm": 2.0039392778273357, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5603, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07699644498395908, | |
| "grad_norm": 2.0158397924903233, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5216, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 0.07769010665048123, | |
| "grad_norm": 1.894029714001099, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5587, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.07838376831700339, | |
| "grad_norm": 2.432706928463119, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5707, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 0.07907742998352553, | |
| "grad_norm": 2.036785190887291, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4926, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.07977109165004768, | |
| "grad_norm": 1.9400189433970951, | |
| "learning_rate": 1e-05, | |
| "loss": 1.6043, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.08046475331656984, | |
| "grad_norm": 1.919202983721404, | |
| "learning_rate": 1e-05, | |
| "loss": 1.537, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.081158414983092, | |
| "grad_norm": 2.0977172125022707, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5141, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 0.08185207664961415, | |
| "grad_norm": 2.06407983728045, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5672, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.0825457383161363, | |
| "grad_norm": 2.078141165316271, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5411, | |
| "step": 119 | |
| }, | |
| { | |
| "epoch": 0.08323939998265846, | |
| "grad_norm": 2.128515674818184, | |
| "learning_rate": 1e-05, | |
| "loss": 1.574, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08393306164918062, | |
| "grad_norm": 2.204639065305811, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5411, | |
| "step": 121 | |
| }, | |
| { | |
| "epoch": 0.08462672331570277, | |
| "grad_norm": 2.028431294158661, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4471, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.08532038498222493, | |
| "grad_norm": 1.9634386333120701, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5498, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 0.08601404664874707, | |
| "grad_norm": 2.029215017631285, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5568, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.08670770831526922, | |
| "grad_norm": 2.0663413738174397, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5426, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08740136998179138, | |
| "grad_norm": 1.9619014687764207, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5133, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.08809503164831353, | |
| "grad_norm": 1.9162219051787666, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5464, | |
| "step": 127 | |
| }, | |
| { | |
| "epoch": 0.08878869331483569, | |
| "grad_norm": 2.261542371230024, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4646, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.08948235498135784, | |
| "grad_norm": 1.9715291734543514, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5132, | |
| "step": 129 | |
| }, | |
| { | |
| "epoch": 0.09017601664788, | |
| "grad_norm": 2.0203777348766834, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5354, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.09086967831440215, | |
| "grad_norm": 1.9138585711617677, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5407, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 0.09156333998092431, | |
| "grad_norm": 2.0173322994738596, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5439, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.09225700164744646, | |
| "grad_norm": 2.107269356193457, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5155, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 0.09295066331396862, | |
| "grad_norm": 1.8484907386064835, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5765, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.09364432498049076, | |
| "grad_norm": 1.9245702524265067, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5584, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.09433798664701291, | |
| "grad_norm": 2.01491446609071, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5813, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.09503164831353507, | |
| "grad_norm": 2.139603451563103, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5156, | |
| "step": 137 | |
| }, | |
| { | |
| "epoch": 0.09572530998005722, | |
| "grad_norm": 2.02926570002898, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5331, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.09641897164657938, | |
| "grad_norm": 2.0788419828866314, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5504, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 0.09711263331310153, | |
| "grad_norm": 2.1919839551775016, | |
| "learning_rate": 1e-05, | |
| "loss": 1.519, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09780629497962369, | |
| "grad_norm": 1.905930415266768, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5084, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 0.09849995664614584, | |
| "grad_norm": 2.107652533544824, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5004, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.099193618312668, | |
| "grad_norm": 1.9142412206590709, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5488, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 0.09988727997919015, | |
| "grad_norm": 1.8910378379533608, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4912, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.10058094164571231, | |
| "grad_norm": 2.0531944494577385, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5011, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.10127460331223445, | |
| "grad_norm": 1.9561470527096887, | |
| "learning_rate": 1e-05, | |
| "loss": 1.483, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.1019682649787566, | |
| "grad_norm": 2.0182745726837186, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4969, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 0.10266192664527876, | |
| "grad_norm": 2.1655704406766305, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5151, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.10335558831180092, | |
| "grad_norm": 2.067383515526746, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4932, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 0.10404924997832307, | |
| "grad_norm": 2.16565372834465, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5211, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10474291164484523, | |
| "grad_norm": 2.067199972931285, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5182, | |
| "step": 151 | |
| }, | |
| { | |
| "epoch": 0.10543657331136738, | |
| "grad_norm": 2.021560871683723, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5216, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.10613023497788954, | |
| "grad_norm": 2.0837036440044914, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4789, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 0.10682389664441169, | |
| "grad_norm": 2.105326579649833, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5532, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.10751755831093385, | |
| "grad_norm": 1.9531358624572481, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5709, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.108211219977456, | |
| "grad_norm": 2.064713519765923, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5378, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.10890488164397814, | |
| "grad_norm": 2.1618499671142226, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5176, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 0.1095985433105003, | |
| "grad_norm": 1.9086208868139072, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5546, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.11029220497702245, | |
| "grad_norm": 2.02664550739396, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4997, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 0.11098586664354461, | |
| "grad_norm": 1.999544656281972, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5003, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.11167952831006676, | |
| "grad_norm": 2.053756926485644, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5292, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 0.11237318997658892, | |
| "grad_norm": 2.094168834382519, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4676, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.11306685164311107, | |
| "grad_norm": 2.002754028656303, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5105, | |
| "step": 163 | |
| }, | |
| { | |
| "epoch": 0.11376051330963323, | |
| "grad_norm": 1.9434708295091558, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5053, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.11445417497615538, | |
| "grad_norm": 2.0330157669650815, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4888, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.11514783664267754, | |
| "grad_norm": 1.9289370322291217, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5277, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.1158414983091997, | |
| "grad_norm": 1.959995924753835, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4825, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 0.11653515997572184, | |
| "grad_norm": 1.7991848893944669, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5005, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.11722882164224399, | |
| "grad_norm": 1.8734306548761093, | |
| "learning_rate": 1e-05, | |
| "loss": 1.515, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 0.11792248330876615, | |
| "grad_norm": 2.0015044635043218, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5252, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.1186161449752883, | |
| "grad_norm": 2.2665936004262273, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4838, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 0.11930980664181046, | |
| "grad_norm": 1.9544216259159037, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4925, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.12000346830833261, | |
| "grad_norm": 2.1154532935532715, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5146, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 0.12069712997485477, | |
| "grad_norm": 2.1021548296617927, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4915, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.12139079164137692, | |
| "grad_norm": 2.0834121176679634, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5075, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.12208445330789908, | |
| "grad_norm": 2.0016972202996213, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5255, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.12277811497442123, | |
| "grad_norm": 2.0612598236678523, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5149, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 0.12347177664094337, | |
| "grad_norm": 2.0229183358200484, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5249, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.12416543830746553, | |
| "grad_norm": 2.263550407359551, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5244, | |
| "step": 179 | |
| }, | |
| { | |
| "epoch": 0.12485909997398768, | |
| "grad_norm": 2.08456086134308, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4981, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.12555276164050985, | |
| "grad_norm": 2.0774621427372386, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5143, | |
| "step": 181 | |
| }, | |
| { | |
| "epoch": 0.126246423307032, | |
| "grad_norm": 2.1010285933058626, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4504, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.12694008497355416, | |
| "grad_norm": 2.0102176748558405, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4146, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 0.1276337466400763, | |
| "grad_norm": 2.095717278113951, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5395, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.12832740830659844, | |
| "grad_norm": 2.193298827450061, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4769, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.1290210699731206, | |
| "grad_norm": 1.9388355574681662, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5375, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.12971473163964276, | |
| "grad_norm": 2.0877632629967913, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5471, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 0.1304083933061649, | |
| "grad_norm": 2.22367106492369, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4583, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.13110205497268707, | |
| "grad_norm": 1.9943605205254191, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5008, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 0.13179571663920922, | |
| "grad_norm": 2.096604510058919, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5449, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.13248937830573138, | |
| "grad_norm": 1.9264619546423505, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5068, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 0.13318303997225353, | |
| "grad_norm": 2.250824707812072, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4505, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.13387670163877569, | |
| "grad_norm": 2.166944357294215, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5341, | |
| "step": 193 | |
| }, | |
| { | |
| "epoch": 0.13457036330529784, | |
| "grad_norm": 2.0250424027409673, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4852, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.13526402497182, | |
| "grad_norm": 2.122488575543949, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4973, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.13595768663834215, | |
| "grad_norm": 2.1098754788199647, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5069, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.1366513483048643, | |
| "grad_norm": 2.088689323771004, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4936, | |
| "step": 197 | |
| }, | |
| { | |
| "epoch": 0.13734500997138646, | |
| "grad_norm": 1.88466378266683, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5068, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.13803867163790862, | |
| "grad_norm": 2.0160265524905845, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5368, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 0.13873233330443077, | |
| "grad_norm": 1.8858982582224784, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4676, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13942599497095293, | |
| "grad_norm": 1.8632802765837246, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5151, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 0.14011965663747508, | |
| "grad_norm": 1.9258461198592782, | |
| "learning_rate": 1e-05, | |
| "loss": 1.501, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.14081331830399724, | |
| "grad_norm": 1.9872609586669983, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4688, | |
| "step": 203 | |
| }, | |
| { | |
| "epoch": 0.1415069799705194, | |
| "grad_norm": 1.8678557794614834, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5344, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.14220064163704152, | |
| "grad_norm": 1.8914203459451417, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4892, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.14289430330356367, | |
| "grad_norm": 1.9911065628954558, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5092, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.14358796497008583, | |
| "grad_norm": 2.0485687976317966, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5167, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 0.14428162663660798, | |
| "grad_norm": 1.9496266310348234, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4602, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.14497528830313014, | |
| "grad_norm": 2.2871818269739754, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4685, | |
| "step": 209 | |
| }, | |
| { | |
| "epoch": 0.1456689499696523, | |
| "grad_norm": 1.984619214022057, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4409, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.14636261163617445, | |
| "grad_norm": 1.9696667123704634, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5176, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 0.1470562733026966, | |
| "grad_norm": 1.933399558736106, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5327, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.14774993496921876, | |
| "grad_norm": 2.1861585612655885, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4685, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 0.14844359663574092, | |
| "grad_norm": 2.0747570404469804, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4925, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.14913725830226307, | |
| "grad_norm": 2.0747837540555096, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5019, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.14983091996878523, | |
| "grad_norm": 2.0326660466120297, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4597, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.15052458163530738, | |
| "grad_norm": 1.7823779998232254, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4539, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 0.15121824330182954, | |
| "grad_norm": 2.157188291473536, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5397, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.1519119049683517, | |
| "grad_norm": 2.0183890562420905, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4753, | |
| "step": 219 | |
| }, | |
| { | |
| "epoch": 0.15260556663487385, | |
| "grad_norm": 1.9531135223586058, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4315, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.153299228301396, | |
| "grad_norm": 2.038347402060851, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4246, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 0.15399288996791816, | |
| "grad_norm": 2.0488940765067967, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4461, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.1546865516344403, | |
| "grad_norm": 2.231661501840028, | |
| "learning_rate": 1e-05, | |
| "loss": 1.472, | |
| "step": 223 | |
| }, | |
| { | |
| "epoch": 0.15538021330096247, | |
| "grad_norm": 1.9308119943251083, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5128, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.15607387496748462, | |
| "grad_norm": 2.229149512340474, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4479, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.15676753663400678, | |
| "grad_norm": 2.0928578049018483, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5009, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.1574611983005289, | |
| "grad_norm": 1.9663752374718868, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4818, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 0.15815485996705106, | |
| "grad_norm": 2.1034138941034786, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4859, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.15884852163357321, | |
| "grad_norm": 2.3560738411841626, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4779, | |
| "step": 229 | |
| }, | |
| { | |
| "epoch": 0.15954218330009537, | |
| "grad_norm": 2.4332643261654403, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4809, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.16023584496661752, | |
| "grad_norm": 2.0278596830202757, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4559, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 0.16092950663313968, | |
| "grad_norm": 2.3341820259604984, | |
| "learning_rate": 1e-05, | |
| "loss": 1.479, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.16162316829966183, | |
| "grad_norm": 1.9872750396163408, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4715, | |
| "step": 233 | |
| }, | |
| { | |
| "epoch": 0.162316829966184, | |
| "grad_norm": 1.9999402472599845, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5459, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.16301049163270614, | |
| "grad_norm": 2.128103039977492, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5096, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.1637041532992283, | |
| "grad_norm": 1.9768244155184505, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5127, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.16439781496575046, | |
| "grad_norm": 1.788660125975532, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4742, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 0.1650914766322726, | |
| "grad_norm": 2.091066870692497, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4978, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.16578513829879477, | |
| "grad_norm": 1.9576004965176508, | |
| "learning_rate": 1e-05, | |
| "loss": 1.45, | |
| "step": 239 | |
| }, | |
| { | |
| "epoch": 0.16647879996531692, | |
| "grad_norm": 2.0698620965680528, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4502, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.16717246163183908, | |
| "grad_norm": 2.07030748449516, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5028, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 0.16786612329836123, | |
| "grad_norm": 2.0470194343255455, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4714, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.16855978496488339, | |
| "grad_norm": 2.0084641094309794, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4819, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 0.16925344663140554, | |
| "grad_norm": 2.0146863278209106, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4604, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.1699471082979277, | |
| "grad_norm": 2.068474055525701, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4494, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.17064076996444985, | |
| "grad_norm": 1.9451803439502662, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4424, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.171334431630972, | |
| "grad_norm": 2.0197781186907835, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4614, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 0.17202809329749413, | |
| "grad_norm": 1.9032139101620693, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4731, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.1727217549640163, | |
| "grad_norm": 2.115774791694279, | |
| "learning_rate": 1e-05, | |
| "loss": 1.416, | |
| "step": 249 | |
| }, | |
| { | |
| "epoch": 0.17341541663053844, | |
| "grad_norm": 2.0399470329500047, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4481, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1741090782970606, | |
| "grad_norm": 2.2047084098275027, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4822, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 0.17480273996358275, | |
| "grad_norm": 2.0604716275395374, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4698, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.1754964016301049, | |
| "grad_norm": 2.0292160669190107, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4292, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 0.17619006329662706, | |
| "grad_norm": 1.8814965372884114, | |
| "learning_rate": 1e-05, | |
| "loss": 1.448, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.17688372496314922, | |
| "grad_norm": 2.1057756496393805, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4335, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.17757738662967137, | |
| "grad_norm": 2.3132846447554227, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4856, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.17827104829619353, | |
| "grad_norm": 2.0736038589358734, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4483, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 0.17896470996271568, | |
| "grad_norm": 2.1910317153749985, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4896, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.17965837162923784, | |
| "grad_norm": 2.054248882735396, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4202, | |
| "step": 259 | |
| }, | |
| { | |
| "epoch": 0.18035203329576, | |
| "grad_norm": 1.9735586840820536, | |
| "learning_rate": 1e-05, | |
| "loss": 1.492, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.18104569496228215, | |
| "grad_norm": 2.0486128891661357, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4842, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 0.1817393566288043, | |
| "grad_norm": 2.0487679336561535, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3914, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.18243301829532646, | |
| "grad_norm": 2.122214099406227, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5043, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 0.18312667996184862, | |
| "grad_norm": 1.9819401665450367, | |
| "learning_rate": 1e-05, | |
| "loss": 1.461, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.18382034162837077, | |
| "grad_norm": 1.8894079827925012, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5248, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.18451400329489293, | |
| "grad_norm": 1.9721789820827966, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4289, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.18520766496141508, | |
| "grad_norm": 1.9579977527698131, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4313, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 0.18590132662793724, | |
| "grad_norm": 2.0444647256611885, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4645, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.1865949882944594, | |
| "grad_norm": 2.1214502305742275, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4712, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 0.18728864996098152, | |
| "grad_norm": 1.9384086787313417, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4641, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.18798231162750367, | |
| "grad_norm": 1.9903323950267982, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4438, | |
| "step": 271 | |
| }, | |
| { | |
| "epoch": 0.18867597329402583, | |
| "grad_norm": 2.2552584575632264, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4954, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.18936963496054798, | |
| "grad_norm": 2.0400461455788075, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4574, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 0.19006329662707014, | |
| "grad_norm": 1.985049295968063, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4714, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.1907569582935923, | |
| "grad_norm": 2.0084753961869173, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4557, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.19145061996011445, | |
| "grad_norm": 2.0212614192536473, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4841, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.1921442816266366, | |
| "grad_norm": 2.1484850741149035, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5103, | |
| "step": 277 | |
| }, | |
| { | |
| "epoch": 0.19283794329315876, | |
| "grad_norm": 1.9196012631959583, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4313, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.19353160495968091, | |
| "grad_norm": 1.77676382629001, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4468, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 0.19422526662620307, | |
| "grad_norm": 1.9938658500301698, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.19491892829272522, | |
| "grad_norm": 2.00675288394433, | |
| "learning_rate": 1e-05, | |
| "loss": 1.393, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 0.19561258995924738, | |
| "grad_norm": 1.9133179166179877, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4856, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.19630625162576953, | |
| "grad_norm": 2.059048555946398, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4583, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 0.1969999132922917, | |
| "grad_norm": 1.9633303202331707, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4713, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.19769357495881384, | |
| "grad_norm": 2.062438630313021, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4695, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.198387236625336, | |
| "grad_norm": 2.0881034716487017, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4088, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.19908089829185815, | |
| "grad_norm": 2.1703550326161416, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5003, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 0.1997745599583803, | |
| "grad_norm": 2.074897325155734, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4233, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.20046822162490247, | |
| "grad_norm": 2.1455316048865667, | |
| "learning_rate": 1e-05, | |
| "loss": 1.499, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 0.20116188329142462, | |
| "grad_norm": 2.101826626794352, | |
| "learning_rate": 1e-05, | |
| "loss": 1.423, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.20185554495794678, | |
| "grad_norm": 1.9433232261964963, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4258, | |
| "step": 291 | |
| }, | |
| { | |
| "epoch": 0.2025492066244689, | |
| "grad_norm": 2.0443499477866878, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4755, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.20324286829099106, | |
| "grad_norm": 2.0522507133871795, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4472, | |
| "step": 293 | |
| }, | |
| { | |
| "epoch": 0.2039365299575132, | |
| "grad_norm": 2.138230646253678, | |
| "learning_rate": 1e-05, | |
| "loss": 1.49, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.20463019162403537, | |
| "grad_norm": 1.8226561062473423, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4556, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.20532385329055752, | |
| "grad_norm": 2.038783861493903, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4549, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.20601751495707968, | |
| "grad_norm": 2.0515757165248054, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4662, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 0.20671117662360183, | |
| "grad_norm": 2.0235486233300564, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4422, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.207404838290124, | |
| "grad_norm": 1.9481327456484323, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4871, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 0.20809849995664614, | |
| "grad_norm": 2.1490389171940962, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4438, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.2087921616231683, | |
| "grad_norm": 1.8636272783701093, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3833, | |
| "step": 301 | |
| }, | |
| { | |
| "epoch": 0.20948582328969045, | |
| "grad_norm": 2.0132879418102525, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5079, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.2101794849562126, | |
| "grad_norm": 1.982488095685725, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4497, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 0.21087314662273476, | |
| "grad_norm": 1.8238922950218728, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4535, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.21156680828925692, | |
| "grad_norm": 1.93657599411593, | |
| "learning_rate": 1e-05, | |
| "loss": 1.413, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.21226046995577907, | |
| "grad_norm": 1.8702247940122898, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4419, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.21295413162230123, | |
| "grad_norm": 1.888122691714043, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4674, | |
| "step": 307 | |
| }, | |
| { | |
| "epoch": 0.21364779328882338, | |
| "grad_norm": 1.937294530390563, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5099, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.21434145495534554, | |
| "grad_norm": 1.7678460926752717, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4653, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 0.2150351166218677, | |
| "grad_norm": 1.9151083060534004, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4605, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.21572877828838985, | |
| "grad_norm": 1.7787361825304597, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4693, | |
| "step": 311 | |
| }, | |
| { | |
| "epoch": 0.216422439954912, | |
| "grad_norm": 1.9120724944581113, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4881, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.21711610162143413, | |
| "grad_norm": 1.779761445996506, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4091, | |
| "step": 313 | |
| }, | |
| { | |
| "epoch": 0.2178097632879563, | |
| "grad_norm": 2.0485121226808527, | |
| "learning_rate": 1e-05, | |
| "loss": 1.434, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.21850342495447844, | |
| "grad_norm": 1.8510715617371953, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4502, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.2191970866210006, | |
| "grad_norm": 2.0639411090008717, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4663, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.21989074828752275, | |
| "grad_norm": 1.8671225612100188, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4679, | |
| "step": 317 | |
| }, | |
| { | |
| "epoch": 0.2205844099540449, | |
| "grad_norm": 2.089367239670572, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4195, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.22127807162056706, | |
| "grad_norm": 1.989736808079142, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4397, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 0.22197173328708922, | |
| "grad_norm": 1.9101605077692294, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3985, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.22266539495361137, | |
| "grad_norm": 1.9384620738328688, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4223, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 0.22335905662013353, | |
| "grad_norm": 2.1584155527633198, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4836, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.22405271828665568, | |
| "grad_norm": 2.0224370906370694, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4454, | |
| "step": 323 | |
| }, | |
| { | |
| "epoch": 0.22474637995317784, | |
| "grad_norm": 2.0788397833125765, | |
| "learning_rate": 1e-05, | |
| "loss": 1.5232, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.2254400416197, | |
| "grad_norm": 2.0341494419793427, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4617, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.22613370328622215, | |
| "grad_norm": 2.008192362429119, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4375, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.2268273649527443, | |
| "grad_norm": 1.7720050916534584, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4517, | |
| "step": 327 | |
| }, | |
| { | |
| "epoch": 0.22752102661926646, | |
| "grad_norm": 1.9703464005250477, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4201, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.2282146882857886, | |
| "grad_norm": 1.8046683048934846, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4441, | |
| "step": 329 | |
| }, | |
| { | |
| "epoch": 0.22890834995231077, | |
| "grad_norm": 1.996448453239083, | |
| "learning_rate": 1e-05, | |
| "loss": 1.411, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.22960201161883292, | |
| "grad_norm": 2.202209909202205, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4443, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 0.23029567328535508, | |
| "grad_norm": 1.9288611183096158, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4261, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.23098933495187723, | |
| "grad_norm": 1.9931802186149232, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4228, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 0.2316829966183994, | |
| "grad_norm": 1.9300657348149677, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4791, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.23237665828492152, | |
| "grad_norm": 1.952656400463476, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4682, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.23307031995144367, | |
| "grad_norm": 1.920902486845839, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4286, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.23376398161796583, | |
| "grad_norm": 1.9689037515231558, | |
| "learning_rate": 1e-05, | |
| "loss": 1.439, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 0.23445764328448798, | |
| "grad_norm": 1.928911178735969, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4464, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.23515130495101014, | |
| "grad_norm": 2.1288190614130134, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4386, | |
| "step": 339 | |
| }, | |
| { | |
| "epoch": 0.2358449666175323, | |
| "grad_norm": 2.076072187290826, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4928, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.23653862828405445, | |
| "grad_norm": 1.8705047682268778, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4536, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 0.2372322899505766, | |
| "grad_norm": 1.9565604386223752, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4783, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.23792595161709876, | |
| "grad_norm": 2.028892379215102, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4272, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 0.2386196132836209, | |
| "grad_norm": 1.8650321597118786, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4742, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.23931327495014307, | |
| "grad_norm": 2.1612306768116083, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4405, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.24000693661666522, | |
| "grad_norm": 2.0151472915975748, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4246, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.24070059828318738, | |
| "grad_norm": 2.1329263531073472, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4483, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 0.24139425994970953, | |
| "grad_norm": 1.8503952674647635, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3852, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.2420879216162317, | |
| "grad_norm": 1.9232118601702997, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4704, | |
| "step": 349 | |
| }, | |
| { | |
| "epoch": 0.24278158328275384, | |
| "grad_norm": 1.8955902858682858, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4472, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.243475244949276, | |
| "grad_norm": 2.0209412893438503, | |
| "learning_rate": 1e-05, | |
| "loss": 1.453, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 0.24416890661579815, | |
| "grad_norm": 2.0049406945887736, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4429, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.2448625682823203, | |
| "grad_norm": 1.8970813615917181, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4145, | |
| "step": 353 | |
| }, | |
| { | |
| "epoch": 0.24555622994884246, | |
| "grad_norm": 1.998898929474657, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3887, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.24624989161536462, | |
| "grad_norm": 2.0390603685379944, | |
| "learning_rate": 1e-05, | |
| "loss": 1.477, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.24694355328188675, | |
| "grad_norm": 1.9473625966599237, | |
| "learning_rate": 1e-05, | |
| "loss": 1.422, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.2476372149484089, | |
| "grad_norm": 1.8621154509930413, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4551, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 0.24833087661493106, | |
| "grad_norm": 2.079525202308318, | |
| "learning_rate": 1e-05, | |
| "loss": 1.496, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.2490245382814532, | |
| "grad_norm": 2.0606010137774162, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3982, | |
| "step": 359 | |
| }, | |
| { | |
| "epoch": 0.24971819994797537, | |
| "grad_norm": 2.4565497491600015, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4833, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.2504118616144975, | |
| "grad_norm": 1.9825019010439706, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4271, | |
| "step": 361 | |
| }, | |
| { | |
| "epoch": 0.2511055232810197, | |
| "grad_norm": 1.9086652448767583, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4125, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.25179918494754183, | |
| "grad_norm": 2.0999318583687625, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4235, | |
| "step": 363 | |
| }, | |
| { | |
| "epoch": 0.252492846614064, | |
| "grad_norm": 1.985190014569069, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4501, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.25318650828058614, | |
| "grad_norm": 1.910068963847788, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4658, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.2538801699471083, | |
| "grad_norm": 2.1692090204353764, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4418, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.25457383161363045, | |
| "grad_norm": 1.9609194594229094, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4975, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 0.2552674932801526, | |
| "grad_norm": 1.9921583707202606, | |
| "learning_rate": 1e-05, | |
| "loss": 1.46, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.25596115494667476, | |
| "grad_norm": 1.946762435726228, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4346, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 0.2566548166131969, | |
| "grad_norm": 1.9131156156498506, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4145, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.2573484782797191, | |
| "grad_norm": 2.0667230542895885, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4428, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 0.2580421399462412, | |
| "grad_norm": 1.8834877477820704, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4198, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.2587358016127634, | |
| "grad_norm": 2.192277081033706, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4732, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 0.2594294632792855, | |
| "grad_norm": 2.0923048016266406, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4748, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.2601231249458077, | |
| "grad_norm": 2.086280677890939, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4405, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2608167866123298, | |
| "grad_norm": 1.905489769808509, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4445, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.261510448278852, | |
| "grad_norm": 2.0450091978264466, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4186, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 0.26220410994537413, | |
| "grad_norm": 2.133929617755548, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4572, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.2628977716118963, | |
| "grad_norm": 2.040274726964717, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4238, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 0.26359143327841844, | |
| "grad_norm": 2.031802523503596, | |
| "learning_rate": 1e-05, | |
| "loss": 1.485, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.2642850949449406, | |
| "grad_norm": 1.8546387757797609, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4579, | |
| "step": 381 | |
| }, | |
| { | |
| "epoch": 0.26497875661146275, | |
| "grad_norm": 2.0419344295588893, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4663, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.26567241827798493, | |
| "grad_norm": 1.9135093773488887, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3924, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 0.26636607994450706, | |
| "grad_norm": 1.9016139415831943, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4308, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.26705974161102924, | |
| "grad_norm": 1.9082693062876364, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3594, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.26775340327755137, | |
| "grad_norm": 1.9587042651997673, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4879, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.26844706494407355, | |
| "grad_norm": 1.8806230175431726, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4101, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 0.2691407266105957, | |
| "grad_norm": 1.7730356951796558, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4196, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.2698343882771178, | |
| "grad_norm": 2.0320726341653255, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4207, | |
| "step": 389 | |
| }, | |
| { | |
| "epoch": 0.27052804994364, | |
| "grad_norm": 1.9758337171264837, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3527, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.2712217116101621, | |
| "grad_norm": 1.8958218154268665, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4802, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 0.2719153732766843, | |
| "grad_norm": 2.0207609598652, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4304, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.27260903494320643, | |
| "grad_norm": 1.9475691858544313, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4018, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 0.2733026966097286, | |
| "grad_norm": 1.9389041388735107, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4343, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.27399635827625074, | |
| "grad_norm": 1.905336185660673, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4283, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.2746900199427729, | |
| "grad_norm": 1.729440202579088, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4066, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.27538368160929505, | |
| "grad_norm": 1.7537140656073078, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3722, | |
| "step": 397 | |
| }, | |
| { | |
| "epoch": 0.27607734327581723, | |
| "grad_norm": 1.8075317974729948, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3875, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.27677100494233936, | |
| "grad_norm": 1.8284022154266728, | |
| "learning_rate": 1e-05, | |
| "loss": 1.401, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 0.27746466660886154, | |
| "grad_norm": 2.0107348857071563, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4018, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.27815832827538367, | |
| "grad_norm": 2.117911173802508, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4517, | |
| "step": 401 | |
| }, | |
| { | |
| "epoch": 0.27885198994190585, | |
| "grad_norm": 1.8033673093925715, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3847, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.279545651608428, | |
| "grad_norm": 1.7714377429685437, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4638, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 0.28023931327495016, | |
| "grad_norm": 1.8489883317533833, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4183, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.2809329749414723, | |
| "grad_norm": 2.0970756827183625, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4531, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.2816266366079945, | |
| "grad_norm": 2.0300925713263247, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4129, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.2823202982745166, | |
| "grad_norm": 2.0694538236815365, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4346, | |
| "step": 407 | |
| }, | |
| { | |
| "epoch": 0.2830139599410388, | |
| "grad_norm": 2.125152836002329, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4636, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.2837076216075609, | |
| "grad_norm": 1.783820266730962, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4327, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 0.28440128327408304, | |
| "grad_norm": 2.0534367028415943, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4691, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.2850949449406052, | |
| "grad_norm": 1.876558701718411, | |
| "learning_rate": 1e-05, | |
| "loss": 1.383, | |
| "step": 411 | |
| }, | |
| { | |
| "epoch": 0.28578860660712735, | |
| "grad_norm": 2.0521158454990855, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4213, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 0.28648226827364953, | |
| "grad_norm": 2.0220971535707877, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4658, | |
| "step": 413 | |
| }, | |
| { | |
| "epoch": 0.28717592994017166, | |
| "grad_norm": 2.038148661267878, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4389, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 0.28786959160669384, | |
| "grad_norm": 1.9405516561969085, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4077, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.28856325327321597, | |
| "grad_norm": 1.7499414272985196, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4215, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 0.28925691493973815, | |
| "grad_norm": 1.8287074775541738, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4734, | |
| "step": 417 | |
| }, | |
| { | |
| "epoch": 0.2899505766062603, | |
| "grad_norm": 2.029489052381792, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3857, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 0.29064423827278246, | |
| "grad_norm": 1.9688645010655113, | |
| "learning_rate": 1e-05, | |
| "loss": 1.377, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 0.2913378999393046, | |
| "grad_norm": 1.925094261068192, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4246, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.2920315616058268, | |
| "grad_norm": 2.117190679742464, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4595, | |
| "step": 421 | |
| }, | |
| { | |
| "epoch": 0.2927252232723489, | |
| "grad_norm": 2.076005784675454, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4242, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 0.2934188849388711, | |
| "grad_norm": 1.9173678536303644, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4209, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 0.2941125466053932, | |
| "grad_norm": 1.9453593461528418, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4067, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 0.2948062082719154, | |
| "grad_norm": 2.067561860466064, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4637, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.2954998699384375, | |
| "grad_norm": 1.899343422880033, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4355, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 0.2961935316049597, | |
| "grad_norm": 1.9847242096071747, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4292, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 0.29688719327148183, | |
| "grad_norm": 1.854907399156512, | |
| "learning_rate": 1e-05, | |
| "loss": 1.443, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 0.297580854938004, | |
| "grad_norm": 1.955263267566865, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3928, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 0.29827451660452614, | |
| "grad_norm": 2.053105452037698, | |
| "learning_rate": 1e-05, | |
| "loss": 1.378, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.2989681782710483, | |
| "grad_norm": 1.9109805438113896, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3645, | |
| "step": 431 | |
| }, | |
| { | |
| "epoch": 0.29966183993757045, | |
| "grad_norm": 2.043933400997838, | |
| "learning_rate": 1e-05, | |
| "loss": 1.435, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 0.3003555016040926, | |
| "grad_norm": 2.1971134552732785, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4391, | |
| "step": 433 | |
| }, | |
| { | |
| "epoch": 0.30104916327061476, | |
| "grad_norm": 1.9910023447928422, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4084, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 0.3017428249371369, | |
| "grad_norm": 2.0432125795374607, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4423, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.30243648660365907, | |
| "grad_norm": 1.9609905480132668, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4511, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 0.3031301482701812, | |
| "grad_norm": 1.8755027815135972, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3937, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 0.3038238099367034, | |
| "grad_norm": 1.9069584207191346, | |
| "learning_rate": 1e-05, | |
| "loss": 1.38, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 0.3045174716032255, | |
| "grad_norm": 2.174136818483425, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4535, | |
| "step": 439 | |
| }, | |
| { | |
| "epoch": 0.3052111332697477, | |
| "grad_norm": 1.8297620514780848, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4285, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.3059047949362698, | |
| "grad_norm": 1.8949261520180545, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4336, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 0.306598456602792, | |
| "grad_norm": 1.9549110409659416, | |
| "learning_rate": 1e-05, | |
| "loss": 1.373, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 0.30729211826931413, | |
| "grad_norm": 1.9504761362373537, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3915, | |
| "step": 443 | |
| }, | |
| { | |
| "epoch": 0.3079857799358363, | |
| "grad_norm": 1.8994399368824695, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3887, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 0.30867944160235844, | |
| "grad_norm": 1.8796327717601744, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4075, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.3093731032688806, | |
| "grad_norm": 1.9681787692068275, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4476, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 0.31006676493540275, | |
| "grad_norm": 2.0373492235623285, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3909, | |
| "step": 447 | |
| }, | |
| { | |
| "epoch": 0.31076042660192493, | |
| "grad_norm": 1.8891756577788732, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4804, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 0.31145408826844706, | |
| "grad_norm": 1.833019021556368, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4207, | |
| "step": 449 | |
| }, | |
| { | |
| "epoch": 0.31214774993496924, | |
| "grad_norm": 1.836006077142845, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3933, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.31284141160149137, | |
| "grad_norm": 1.8756183495188887, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4081, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 0.31353507326801355, | |
| "grad_norm": 1.8832247399343314, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3978, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 0.3142287349345357, | |
| "grad_norm": 1.9411565103780286, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4346, | |
| "step": 453 | |
| }, | |
| { | |
| "epoch": 0.3149223966010578, | |
| "grad_norm": 1.9226021145167342, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4192, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 0.31561605826758, | |
| "grad_norm": 1.864849847975618, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4023, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.3163097199341021, | |
| "grad_norm": 1.841130556375779, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3843, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 0.3170033816006243, | |
| "grad_norm": 1.9910619133587744, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4196, | |
| "step": 457 | |
| }, | |
| { | |
| "epoch": 0.31769704326714643, | |
| "grad_norm": 1.871071007237103, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4398, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 0.3183907049336686, | |
| "grad_norm": 1.9525027065397538, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4221, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 0.31908436660019074, | |
| "grad_norm": 1.875959289372641, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4568, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.3197780282667129, | |
| "grad_norm": 1.896488018271282, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4443, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 0.32047168993323505, | |
| "grad_norm": 1.819017143897324, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3804, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 0.32116535159975723, | |
| "grad_norm": 1.904368280434257, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3803, | |
| "step": 463 | |
| }, | |
| { | |
| "epoch": 0.32185901326627936, | |
| "grad_norm": 1.9630080846645839, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4099, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 0.32255267493280154, | |
| "grad_norm": 1.8933783131596658, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4426, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.32324633659932367, | |
| "grad_norm": 1.9919230915473398, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4099, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 0.32393999826584585, | |
| "grad_norm": 2.0057958567267606, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4409, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 0.324633659932368, | |
| "grad_norm": 2.0237082745853088, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4331, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 0.32532732159889016, | |
| "grad_norm": 1.9072494572387, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4298, | |
| "step": 469 | |
| }, | |
| { | |
| "epoch": 0.3260209832654123, | |
| "grad_norm": 2.1235561783712718, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4125, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.3267146449319345, | |
| "grad_norm": 1.844374731285956, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3672, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 0.3274083065984566, | |
| "grad_norm": 2.1204902498109326, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3909, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 0.3281019682649788, | |
| "grad_norm": 2.084912862868831, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4521, | |
| "step": 473 | |
| }, | |
| { | |
| "epoch": 0.3287956299315009, | |
| "grad_norm": 1.9363382366089963, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4166, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 0.32948929159802304, | |
| "grad_norm": 2.043516332073307, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4221, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.3301829532645452, | |
| "grad_norm": 2.125873099222709, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4894, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 0.33087661493106735, | |
| "grad_norm": 2.042031475330511, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4025, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 0.33157027659758953, | |
| "grad_norm": 1.7849937086119454, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4132, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 0.33226393826411166, | |
| "grad_norm": 1.7931574592397888, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4273, | |
| "step": 479 | |
| }, | |
| { | |
| "epoch": 0.33295759993063384, | |
| "grad_norm": 2.826745637249205, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3552, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.33365126159715597, | |
| "grad_norm": 1.9644473691572903, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3728, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 0.33434492326367815, | |
| "grad_norm": 1.8815785092027932, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3836, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 0.3350385849302003, | |
| "grad_norm": 2.1707103250407265, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3872, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 0.33573224659672246, | |
| "grad_norm": 1.805124910060426, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4269, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 0.3364259082632446, | |
| "grad_norm": 2.0566289424785165, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4353, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.33711956992976677, | |
| "grad_norm": 1.9392776766197952, | |
| "learning_rate": 1e-05, | |
| "loss": 1.42, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 0.3378132315962889, | |
| "grad_norm": 1.7990589474342267, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4232, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 0.3385068932628111, | |
| "grad_norm": 2.042243634171937, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4094, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 0.3392005549293332, | |
| "grad_norm": 1.9990437442788238, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4349, | |
| "step": 489 | |
| }, | |
| { | |
| "epoch": 0.3398942165958554, | |
| "grad_norm": 2.1051212689157777, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3838, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.3405878782623775, | |
| "grad_norm": 2.1229600713115238, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3403, | |
| "step": 491 | |
| }, | |
| { | |
| "epoch": 0.3412815399288997, | |
| "grad_norm": 1.8779948313961718, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3938, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 0.34197520159542183, | |
| "grad_norm": 1.9140202797636157, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3917, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 0.342668863261944, | |
| "grad_norm": 1.974739574014709, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4054, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 0.34336252492846614, | |
| "grad_norm": 1.9199035763474215, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3922, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.34405618659498827, | |
| "grad_norm": 1.9177941872465927, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4168, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 0.34474984826151045, | |
| "grad_norm": 2.00337000954214, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3949, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 0.3454435099280326, | |
| "grad_norm": 1.998945535857092, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3929, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 0.34613717159455476, | |
| "grad_norm": 2.0182449229484636, | |
| "learning_rate": 1e-05, | |
| "loss": 1.422, | |
| "step": 499 | |
| }, | |
| { | |
| "epoch": 0.3468308332610769, | |
| "grad_norm": 1.9361126702422482, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3964, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.34752449492759907, | |
| "grad_norm": 2.0248490486946316, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4293, | |
| "step": 501 | |
| }, | |
| { | |
| "epoch": 0.3482181565941212, | |
| "grad_norm": 1.8982822396985153, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3778, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 0.3489118182606434, | |
| "grad_norm": 2.059259100223746, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4509, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 0.3496054799271655, | |
| "grad_norm": 2.00329460831796, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3878, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 0.3502991415936877, | |
| "grad_norm": 1.9719718567850673, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3829, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.3509928032602098, | |
| "grad_norm": 1.9623427987164561, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4036, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 0.351686464926732, | |
| "grad_norm": 2.022806123443883, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4068, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 0.35238012659325413, | |
| "grad_norm": 2.00015503601285, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4248, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 0.3530737882597763, | |
| "grad_norm": 1.904741684713311, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3519, | |
| "step": 509 | |
| }, | |
| { | |
| "epoch": 0.35376744992629844, | |
| "grad_norm": 1.9455166972699935, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4152, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.3544611115928206, | |
| "grad_norm": 1.9409989480916887, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3916, | |
| "step": 511 | |
| }, | |
| { | |
| "epoch": 0.35515477325934275, | |
| "grad_norm": 1.8707742492583141, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4362, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 0.35584843492586493, | |
| "grad_norm": 1.8314588678913148, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4279, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 0.35654209659238706, | |
| "grad_norm": 1.913107053098294, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4686, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 0.35723575825890924, | |
| "grad_norm": 1.8781008328794606, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4257, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.35792941992543137, | |
| "grad_norm": 1.858897618474902, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3299, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 0.35862308159195355, | |
| "grad_norm": 1.7902966802183116, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3806, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 0.3593167432584757, | |
| "grad_norm": 1.9406550842391148, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4069, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 0.3600104049249978, | |
| "grad_norm": 2.032374189763469, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3893, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 0.36070406659152, | |
| "grad_norm": 2.0622944109637946, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4044, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.3613977282580421, | |
| "grad_norm": 1.7694621919787776, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3781, | |
| "step": 521 | |
| }, | |
| { | |
| "epoch": 0.3620913899245643, | |
| "grad_norm": 1.9876645276332312, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3658, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 0.3627850515910864, | |
| "grad_norm": 1.8286541300136883, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3954, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 0.3634787132576086, | |
| "grad_norm": 2.1598158167597785, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4233, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 0.36417237492413074, | |
| "grad_norm": 2.0125775815004308, | |
| "learning_rate": 1e-05, | |
| "loss": 1.425, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.3648660365906529, | |
| "grad_norm": 1.8220999673991007, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4689, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 0.36555969825717505, | |
| "grad_norm": 1.9067444062678136, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4184, | |
| "step": 527 | |
| }, | |
| { | |
| "epoch": 0.36625335992369723, | |
| "grad_norm": 1.8244010981958079, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4307, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 0.36694702159021936, | |
| "grad_norm": 1.9491392457779768, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4046, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 0.36764068325674154, | |
| "grad_norm": 1.8922109612389026, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4312, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.36833434492326367, | |
| "grad_norm": 1.8416689512552131, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3785, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 0.36902800658978585, | |
| "grad_norm": 1.8783492413280818, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4109, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 0.369721668256308, | |
| "grad_norm": 1.849885383573315, | |
| "learning_rate": 1e-05, | |
| "loss": 1.378, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 0.37041532992283016, | |
| "grad_norm": 2.071142129836207, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3897, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 0.3711089915893523, | |
| "grad_norm": 1.955558194146127, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3781, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.37180265325587447, | |
| "grad_norm": 1.8539973789480575, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3616, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 0.3724963149223966, | |
| "grad_norm": 1.9837890849438713, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3713, | |
| "step": 537 | |
| }, | |
| { | |
| "epoch": 0.3731899765889188, | |
| "grad_norm": 1.9070579057354713, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3479, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 0.3738836382554409, | |
| "grad_norm": 1.9425946819156692, | |
| "learning_rate": 1e-05, | |
| "loss": 1.376, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 0.37457729992196304, | |
| "grad_norm": 1.8771696216581655, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3827, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.3752709615884852, | |
| "grad_norm": 1.9186911492371799, | |
| "learning_rate": 1e-05, | |
| "loss": 1.376, | |
| "step": 541 | |
| }, | |
| { | |
| "epoch": 0.37596462325500735, | |
| "grad_norm": 1.7294504716898789, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3706, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 0.37665828492152953, | |
| "grad_norm": 1.7712039048530457, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4438, | |
| "step": 543 | |
| }, | |
| { | |
| "epoch": 0.37735194658805166, | |
| "grad_norm": 1.83847300072347, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4161, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 0.37804560825457384, | |
| "grad_norm": 1.825146012889819, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4118, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.37873926992109597, | |
| "grad_norm": 1.7989485574696917, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3689, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 0.37943293158761815, | |
| "grad_norm": 2.1484169058996816, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3477, | |
| "step": 547 | |
| }, | |
| { | |
| "epoch": 0.3801265932541403, | |
| "grad_norm": 2.0781089621700777, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4372, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 0.38082025492066246, | |
| "grad_norm": 1.8367820389064522, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4556, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 0.3815139165871846, | |
| "grad_norm": 1.8748672667110173, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4059, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.38220757825370677, | |
| "grad_norm": 1.8956025199638242, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4102, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 0.3829012399202289, | |
| "grad_norm": 1.9601967006624061, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3879, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 0.3835949015867511, | |
| "grad_norm": 2.0406526524881707, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3885, | |
| "step": 553 | |
| }, | |
| { | |
| "epoch": 0.3842885632532732, | |
| "grad_norm": 2.091444436822791, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3999, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 0.3849822249197954, | |
| "grad_norm": 1.7714596927341815, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3452, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.3856758865863175, | |
| "grad_norm": 1.825942444423705, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4142, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 0.3863695482528397, | |
| "grad_norm": 1.871025102921204, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3421, | |
| "step": 557 | |
| }, | |
| { | |
| "epoch": 0.38706320991936183, | |
| "grad_norm": 1.9540951936439066, | |
| "learning_rate": 1e-05, | |
| "loss": 1.384, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 0.387756871585884, | |
| "grad_norm": 1.8147562505586048, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3794, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 0.38845053325240614, | |
| "grad_norm": 1.8138847080198641, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4251, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.38914419491892827, | |
| "grad_norm": 1.8462886417351692, | |
| "learning_rate": 1e-05, | |
| "loss": 1.398, | |
| "step": 561 | |
| }, | |
| { | |
| "epoch": 0.38983785658545045, | |
| "grad_norm": 2.076297894089803, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3529, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 0.3905315182519726, | |
| "grad_norm": 1.8009871240759965, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3739, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 0.39122517991849476, | |
| "grad_norm": 1.872935136959733, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3842, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 0.3919188415850169, | |
| "grad_norm": 1.8957537468315788, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3633, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.39261250325153907, | |
| "grad_norm": 1.991998761167588, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3742, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 0.3933061649180612, | |
| "grad_norm": 1.7016217238658489, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4253, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 0.3939998265845834, | |
| "grad_norm": 1.8800648961699629, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4451, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 0.3946934882511055, | |
| "grad_norm": 2.0019042611698774, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3997, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 0.3953871499176277, | |
| "grad_norm": 2.215323946324701, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3577, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.3960808115841498, | |
| "grad_norm": 2.0407346613187016, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3808, | |
| "step": 571 | |
| }, | |
| { | |
| "epoch": 0.396774473250672, | |
| "grad_norm": 1.874235605467884, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3883, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 0.3974681349171941, | |
| "grad_norm": 1.9076714486170196, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3785, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 0.3981617965837163, | |
| "grad_norm": 1.9438453292084767, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3888, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 0.39885545825023844, | |
| "grad_norm": 1.9010804186629797, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4044, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.3995491199167606, | |
| "grad_norm": 2.1173409005527213, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4043, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 0.40024278158328275, | |
| "grad_norm": 1.924471913084561, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3636, | |
| "step": 577 | |
| }, | |
| { | |
| "epoch": 0.40093644324980493, | |
| "grad_norm": 2.129867365540973, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4308, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 0.40163010491632706, | |
| "grad_norm": 1.9302914340159718, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3937, | |
| "step": 579 | |
| }, | |
| { | |
| "epoch": 0.40232376658284924, | |
| "grad_norm": 1.978021194477141, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4076, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.40301742824937137, | |
| "grad_norm": 1.9801917793272694, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3756, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 0.40371108991589355, | |
| "grad_norm": 1.8510216673051632, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3643, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 0.4044047515824157, | |
| "grad_norm": 2.183109714099149, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4537, | |
| "step": 583 | |
| }, | |
| { | |
| "epoch": 0.4050984132489378, | |
| "grad_norm": 2.349408814867446, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4192, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 0.40579207491546, | |
| "grad_norm": 2.2006000600981106, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4079, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.4064857365819821, | |
| "grad_norm": 1.8981822287744043, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4069, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 0.4071793982485043, | |
| "grad_norm": 1.760320500086242, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3738, | |
| "step": 587 | |
| }, | |
| { | |
| "epoch": 0.4078730599150264, | |
| "grad_norm": 1.8312063491211514, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3887, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 0.4085667215815486, | |
| "grad_norm": 1.760204313519952, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3332, | |
| "step": 589 | |
| }, | |
| { | |
| "epoch": 0.40926038324807074, | |
| "grad_norm": 1.7343834044934423, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3435, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.4099540449145929, | |
| "grad_norm": 2.2088224043427687, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4162, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 0.41064770658111505, | |
| "grad_norm": 1.8200743654853602, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3931, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 0.41134136824763723, | |
| "grad_norm": 1.8435586030279356, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4093, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 0.41203502991415936, | |
| "grad_norm": 1.916392749662975, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3894, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 0.41272869158068154, | |
| "grad_norm": 1.9161401196518564, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3726, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.41342235324720367, | |
| "grad_norm": 1.8756544580848145, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3697, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 0.41411601491372585, | |
| "grad_norm": 1.898787166818739, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3941, | |
| "step": 597 | |
| }, | |
| { | |
| "epoch": 0.414809676580248, | |
| "grad_norm": 1.8044701136277606, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3999, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 0.41550333824677016, | |
| "grad_norm": 1.8310418953275842, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3379, | |
| "step": 599 | |
| }, | |
| { | |
| "epoch": 0.4161969999132923, | |
| "grad_norm": 1.9227996432956476, | |
| "learning_rate": 1e-05, | |
| "loss": 1.362, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.41689066157981447, | |
| "grad_norm": 1.8826360241413953, | |
| "learning_rate": 1e-05, | |
| "loss": 1.413, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 0.4175843232463366, | |
| "grad_norm": 1.6984816355671049, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3703, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 0.4182779849128588, | |
| "grad_norm": 1.8568350133313958, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3909, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 0.4189716465793809, | |
| "grad_norm": 1.917415585499782, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3698, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 0.41966530824590303, | |
| "grad_norm": 1.8859603577321011, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4034, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 0.4203589699124252, | |
| "grad_norm": 1.8889156912270977, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4125, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 0.42105263157894735, | |
| "grad_norm": 1.911197022952282, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3655, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 0.42174629324546953, | |
| "grad_norm": 1.9782065265119402, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4201, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 0.42243995491199166, | |
| "grad_norm": 1.8747816354205493, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3664, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 0.42313361657851384, | |
| "grad_norm": 1.9012852889769853, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3259, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.42382727824503597, | |
| "grad_norm": 1.8979118524613148, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3971, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 0.42452093991155815, | |
| "grad_norm": 1.8520667588064368, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4135, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 0.4252146015780803, | |
| "grad_norm": 1.8342529385215973, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3863, | |
| "step": 613 | |
| }, | |
| { | |
| "epoch": 0.42590826324460246, | |
| "grad_norm": 1.8295048253836184, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3087, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 0.4266019249111246, | |
| "grad_norm": 1.7695063050348687, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4162, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 0.42729558657764677, | |
| "grad_norm": 2.082777123696616, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4278, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 0.4279892482441689, | |
| "grad_norm": 1.7051067845957866, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4095, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 0.4286829099106911, | |
| "grad_norm": 1.8695270037035383, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4503, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 0.4293765715772132, | |
| "grad_norm": 1.9281893067578681, | |
| "learning_rate": 1e-05, | |
| "loss": 1.38, | |
| "step": 619 | |
| }, | |
| { | |
| "epoch": 0.4300702332437354, | |
| "grad_norm": 1.777014355958319, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3984, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.4307638949102575, | |
| "grad_norm": 1.842917760375874, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3853, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 0.4314575565767797, | |
| "grad_norm": 1.8267047634185034, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3524, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 0.4321512182433018, | |
| "grad_norm": 2.0773027372115394, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3901, | |
| "step": 623 | |
| }, | |
| { | |
| "epoch": 0.432844879909824, | |
| "grad_norm": 1.8770974629057278, | |
| "learning_rate": 1e-05, | |
| "loss": 1.376, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 0.43353854157634614, | |
| "grad_norm": 1.8429345410006643, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3877, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.43423220324286826, | |
| "grad_norm": 1.7713210328211393, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3976, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 0.43492586490939045, | |
| "grad_norm": 1.8286568674310186, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3465, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 0.4356195265759126, | |
| "grad_norm": 1.8690248103614588, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3826, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 0.43631318824243476, | |
| "grad_norm": 2.025467675895459, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3671, | |
| "step": 629 | |
| }, | |
| { | |
| "epoch": 0.4370068499089569, | |
| "grad_norm": 1.8903327597767532, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3548, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.43770051157547907, | |
| "grad_norm": 1.7825184036448585, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3978, | |
| "step": 631 | |
| }, | |
| { | |
| "epoch": 0.4383941732420012, | |
| "grad_norm": 1.9191594921321666, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4214, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 0.4390878349085234, | |
| "grad_norm": 1.8710466935982961, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4685, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 0.4397814965750455, | |
| "grad_norm": 1.9252588373442325, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3529, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 0.4404751582415677, | |
| "grad_norm": 1.8307810199559515, | |
| "learning_rate": 1e-05, | |
| "loss": 1.379, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 0.4411688199080898, | |
| "grad_norm": 1.9458457674285412, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3734, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 0.441862481574612, | |
| "grad_norm": 1.8137288582058262, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3309, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 0.4425561432411341, | |
| "grad_norm": 1.972371773276373, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4303, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 0.4432498049076563, | |
| "grad_norm": 1.9163578588020878, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3191, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 0.44394346657417844, | |
| "grad_norm": 1.8861727336930172, | |
| "learning_rate": 1e-05, | |
| "loss": 1.374, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.4446371282407006, | |
| "grad_norm": 1.7493814291306995, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3992, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 0.44533078990722275, | |
| "grad_norm": 1.8289477836679608, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3472, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 0.44602445157374493, | |
| "grad_norm": 1.9323084827259125, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3966, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 0.44671811324026706, | |
| "grad_norm": 1.866327691734681, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3793, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 0.44741177490678924, | |
| "grad_norm": 1.8217937158784923, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4244, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 0.44810543657331137, | |
| "grad_norm": 1.8364372691697852, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3755, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 0.4487990982398335, | |
| "grad_norm": 2.013774748739019, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3807, | |
| "step": 647 | |
| }, | |
| { | |
| "epoch": 0.4494927599063557, | |
| "grad_norm": 1.805702981754065, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3942, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 0.4501864215728778, | |
| "grad_norm": 1.9948282199322953, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4366, | |
| "step": 649 | |
| }, | |
| { | |
| "epoch": 0.4508800832394, | |
| "grad_norm": 1.978073570947312, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4254, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4515737449059221, | |
| "grad_norm": 1.9232595674423625, | |
| "learning_rate": 1e-05, | |
| "loss": 1.404, | |
| "step": 651 | |
| }, | |
| { | |
| "epoch": 0.4522674065724443, | |
| "grad_norm": 1.855166028756208, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3791, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 0.4529610682389664, | |
| "grad_norm": 1.7786637265688048, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3936, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 0.4536547299054886, | |
| "grad_norm": 1.6650125213330889, | |
| "learning_rate": 1e-05, | |
| "loss": 1.38, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 0.45434839157201073, | |
| "grad_norm": 1.9212024581799898, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3951, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 0.4550420532385329, | |
| "grad_norm": 1.855795867809702, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3563, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 0.45573571490505504, | |
| "grad_norm": 1.8769724131475898, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3974, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 0.4564293765715772, | |
| "grad_norm": 1.7926551468562113, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3974, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 0.45712303823809936, | |
| "grad_norm": 1.9178863763832097, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4146, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 0.45781669990462154, | |
| "grad_norm": 1.9353453542577745, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4187, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.45851036157114367, | |
| "grad_norm": 1.952403141808128, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4008, | |
| "step": 661 | |
| }, | |
| { | |
| "epoch": 0.45920402323766585, | |
| "grad_norm": 1.757904596732749, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4264, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 0.459897684904188, | |
| "grad_norm": 1.9395774430498347, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3671, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 0.46059134657071016, | |
| "grad_norm": 1.8890203107447723, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3715, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 0.4612850082372323, | |
| "grad_norm": 1.8286704559781526, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3941, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 0.46197866990375447, | |
| "grad_norm": 1.810001367913328, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3883, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 0.4626723315702766, | |
| "grad_norm": 1.9152160013729407, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3813, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 0.4633659932367988, | |
| "grad_norm": 2.0094067199071546, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3176, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 0.4640596549033209, | |
| "grad_norm": 1.8734070756332013, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4233, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 0.46475331656984303, | |
| "grad_norm": 1.7612782018456719, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3438, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.4654469782363652, | |
| "grad_norm": 1.8683597363973128, | |
| "learning_rate": 1e-05, | |
| "loss": 1.355, | |
| "step": 671 | |
| }, | |
| { | |
| "epoch": 0.46614063990288734, | |
| "grad_norm": 2.0012042847679914, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4169, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 0.4668343015694095, | |
| "grad_norm": 1.9416276311234992, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3742, | |
| "step": 673 | |
| }, | |
| { | |
| "epoch": 0.46752796323593165, | |
| "grad_norm": 1.8922567854833168, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3779, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 0.46822162490245384, | |
| "grad_norm": 1.7793620481094168, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3915, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.46891528656897596, | |
| "grad_norm": 2.167051856102186, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3989, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 0.46960894823549815, | |
| "grad_norm": 1.871191468625491, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3547, | |
| "step": 677 | |
| }, | |
| { | |
| "epoch": 0.4703026099020203, | |
| "grad_norm": 1.7304516085856814, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3808, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 0.47099627156854246, | |
| "grad_norm": 1.745983949797038, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3425, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 0.4716899332350646, | |
| "grad_norm": 1.8779422010813842, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3592, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.47238359490158677, | |
| "grad_norm": 1.9934611197767778, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3538, | |
| "step": 681 | |
| }, | |
| { | |
| "epoch": 0.4730772565681089, | |
| "grad_norm": 1.9623233741400323, | |
| "learning_rate": 1e-05, | |
| "loss": 1.346, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 0.4737709182346311, | |
| "grad_norm": 1.796679910714133, | |
| "learning_rate": 1e-05, | |
| "loss": 1.355, | |
| "step": 683 | |
| }, | |
| { | |
| "epoch": 0.4744645799011532, | |
| "grad_norm": 1.7722903042578, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4071, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 0.4751582415676754, | |
| "grad_norm": 1.8052196535090335, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3814, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 0.4758519032341975, | |
| "grad_norm": 1.7948553541752041, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3729, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 0.4765455649007197, | |
| "grad_norm": 2.007593236556849, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3673, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 0.4772392265672418, | |
| "grad_norm": 1.9797683213286288, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3629, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 0.477932888233764, | |
| "grad_norm": 1.9548220267121557, | |
| "learning_rate": 1e-05, | |
| "loss": 1.383, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 0.47862654990028614, | |
| "grad_norm": 1.929009442664775, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3921, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.47932021156680826, | |
| "grad_norm": 1.8421018984754722, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3672, | |
| "step": 691 | |
| }, | |
| { | |
| "epoch": 0.48001387323333045, | |
| "grad_norm": 1.8234045076467948, | |
| "learning_rate": 1e-05, | |
| "loss": 1.398, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 0.4807075348998526, | |
| "grad_norm": 2.066405568328085, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3348, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 0.48140119656637476, | |
| "grad_norm": 1.9963663982661528, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3662, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 0.4820948582328969, | |
| "grad_norm": 1.883319503377311, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3835, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 0.48278851989941907, | |
| "grad_norm": 1.56186845290527, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3386, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 0.4834821815659412, | |
| "grad_norm": 1.8687876803046082, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3501, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 0.4841758432324634, | |
| "grad_norm": 2.0432055137936036, | |
| "learning_rate": 1e-05, | |
| "loss": 1.366, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 0.4848695048989855, | |
| "grad_norm": 1.7989863129663937, | |
| "learning_rate": 1e-05, | |
| "loss": 1.387, | |
| "step": 699 | |
| }, | |
| { | |
| "epoch": 0.4855631665655077, | |
| "grad_norm": 1.9153016999718706, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4391, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4862568282320298, | |
| "grad_norm": 1.8256041578604432, | |
| "learning_rate": 1e-05, | |
| "loss": 1.385, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 0.486950489898552, | |
| "grad_norm": 1.8391212949021127, | |
| "learning_rate": 1e-05, | |
| "loss": 1.388, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 0.4876441515650741, | |
| "grad_norm": 1.9978982769762854, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3841, | |
| "step": 703 | |
| }, | |
| { | |
| "epoch": 0.4883378132315963, | |
| "grad_norm": 1.9247875553761613, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3514, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 0.48903147489811843, | |
| "grad_norm": 1.8114868285875125, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3659, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 0.4897251365646406, | |
| "grad_norm": 1.8506801571462923, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4349, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 0.49041879823116274, | |
| "grad_norm": 1.8480926190821445, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3768, | |
| "step": 707 | |
| }, | |
| { | |
| "epoch": 0.4911124598976849, | |
| "grad_norm": 1.9547558985555982, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3767, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 0.49180612156420705, | |
| "grad_norm": 1.7241605811559706, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4277, | |
| "step": 709 | |
| }, | |
| { | |
| "epoch": 0.49249978323072924, | |
| "grad_norm": 1.9597827919247104, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2981, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.49319344489725137, | |
| "grad_norm": 2.241085697740784, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3738, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 0.4938871065637735, | |
| "grad_norm": 2.1107944914928023, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3788, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 0.4945807682302957, | |
| "grad_norm": 1.9819856716802244, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3805, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 0.4952744298968178, | |
| "grad_norm": 1.7818814238988587, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3864, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 0.49596809156334, | |
| "grad_norm": 1.8028425771808247, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4047, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 0.4966617532298621, | |
| "grad_norm": 1.810766896499592, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3899, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 0.4973554148963843, | |
| "grad_norm": 1.8166160127003885, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3603, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 0.4980490765629064, | |
| "grad_norm": 2.13361803754948, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4022, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 0.4987427382294286, | |
| "grad_norm": 2.016997065796454, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4034, | |
| "step": 719 | |
| }, | |
| { | |
| "epoch": 0.49943639989595073, | |
| "grad_norm": 2.0748032742705793, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4105, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.5001300615624729, | |
| "grad_norm": 1.8523948307863285, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3697, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 0.500823723228995, | |
| "grad_norm": 1.8122879610484914, | |
| "learning_rate": 1e-05, | |
| "loss": 1.404, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 0.5015173848955172, | |
| "grad_norm": 1.8832267152617044, | |
| "learning_rate": 1e-05, | |
| "loss": 1.412, | |
| "step": 723 | |
| }, | |
| { | |
| "epoch": 0.5022110465620394, | |
| "grad_norm": 1.8257618111117269, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4016, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 0.5029047082285615, | |
| "grad_norm": 1.7942051170674276, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3686, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5035983698950837, | |
| "grad_norm": 1.97042982772308, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3575, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 0.5042920315616058, | |
| "grad_norm": 2.053740555710897, | |
| "learning_rate": 1e-05, | |
| "loss": 1.379, | |
| "step": 727 | |
| }, | |
| { | |
| "epoch": 0.504985693228128, | |
| "grad_norm": 1.977833812394873, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3174, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 0.5056793548946501, | |
| "grad_norm": 1.9322288272504544, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4038, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 0.5063730165611723, | |
| "grad_norm": 2.0400420303295275, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3953, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.5070666782276945, | |
| "grad_norm": 1.976799409278783, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3895, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 0.5077603398942167, | |
| "grad_norm": 1.9306400432028785, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4228, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 0.5084540015607387, | |
| "grad_norm": 1.7347175465813025, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3704, | |
| "step": 733 | |
| }, | |
| { | |
| "epoch": 0.5091476632272609, | |
| "grad_norm": 1.7180782433090167, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3547, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 0.5098413248937831, | |
| "grad_norm": 1.852662073693016, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4228, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 0.5105349865603052, | |
| "grad_norm": 1.8075356657945834, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3887, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 0.5112286482268273, | |
| "grad_norm": 1.7367287190868546, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3752, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 0.5119223098933495, | |
| "grad_norm": 1.8286379353529887, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3963, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 0.5126159715598717, | |
| "grad_norm": 1.7825873791247597, | |
| "learning_rate": 1e-05, | |
| "loss": 1.397, | |
| "step": 739 | |
| }, | |
| { | |
| "epoch": 0.5133096332263938, | |
| "grad_norm": 2.0430860649068396, | |
| "learning_rate": 1e-05, | |
| "loss": 1.368, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.514003294892916, | |
| "grad_norm": 1.82138787437088, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3888, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 0.5146969565594381, | |
| "grad_norm": 1.9949710877946953, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3221, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 0.5153906182259603, | |
| "grad_norm": 1.7999499915500219, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3214, | |
| "step": 743 | |
| }, | |
| { | |
| "epoch": 0.5160842798924824, | |
| "grad_norm": 1.944101461453592, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3641, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 0.5167779415590046, | |
| "grad_norm": 1.9484142602052452, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4183, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 0.5174716032255268, | |
| "grad_norm": 1.9261361736136446, | |
| "learning_rate": 1e-05, | |
| "loss": 1.39, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 0.518165264892049, | |
| "grad_norm": 1.873564918154059, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3511, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 0.518858926558571, | |
| "grad_norm": 1.975733457802349, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3786, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 0.5195525882250932, | |
| "grad_norm": 1.9396312909048352, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3161, | |
| "step": 749 | |
| }, | |
| { | |
| "epoch": 0.5202462498916154, | |
| "grad_norm": 1.736368973000078, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3704, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5209399115581376, | |
| "grad_norm": 1.7911382730465684, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3379, | |
| "step": 751 | |
| }, | |
| { | |
| "epoch": 0.5216335732246596, | |
| "grad_norm": 1.8590253300206483, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3531, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 0.5223272348911818, | |
| "grad_norm": 1.9765690927121422, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3728, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 0.523020896557704, | |
| "grad_norm": 2.0025745088354148, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3939, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 0.5237145582242261, | |
| "grad_norm": 1.9192921819712603, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3914, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 0.5244082198907483, | |
| "grad_norm": 1.8600023632893699, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3499, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 0.5251018815572704, | |
| "grad_norm": 1.9355189855324415, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3471, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 0.5257955432237926, | |
| "grad_norm": 2.0191814963451495, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4244, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 0.5264892048903147, | |
| "grad_norm": 1.883726645686763, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3619, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 0.5271828665568369, | |
| "grad_norm": 1.7947607621987598, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3404, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.5278765282233591, | |
| "grad_norm": 1.8889256123493143, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4012, | |
| "step": 761 | |
| }, | |
| { | |
| "epoch": 0.5285701898898812, | |
| "grad_norm": 1.9230513498083945, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3475, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 0.5292638515564033, | |
| "grad_norm": 1.8067019812009846, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3911, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 0.5299575132229255, | |
| "grad_norm": 1.8380178891570427, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3614, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 0.5306511748894477, | |
| "grad_norm": 1.8235814437026021, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3556, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 0.5313448365559699, | |
| "grad_norm": 1.830617817421823, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3844, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 0.5320384982224919, | |
| "grad_norm": 1.9168498268351752, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3669, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 0.5327321598890141, | |
| "grad_norm": 1.945357782268639, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3595, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 0.5334258215555363, | |
| "grad_norm": 2.237537090117557, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3683, | |
| "step": 769 | |
| }, | |
| { | |
| "epoch": 0.5341194832220585, | |
| "grad_norm": 1.8612240335487669, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3937, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.5348131448885806, | |
| "grad_norm": 2.0804814565765266, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3706, | |
| "step": 771 | |
| }, | |
| { | |
| "epoch": 0.5355068065551027, | |
| "grad_norm": 1.9951415692701584, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3696, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 0.5362004682216249, | |
| "grad_norm": 1.7248104660180126, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3747, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 0.5368941298881471, | |
| "grad_norm": 1.7711620914058126, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3776, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 0.5375877915546692, | |
| "grad_norm": 1.7502844737194139, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3719, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.5382814532211914, | |
| "grad_norm": 1.975837716180595, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3392, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 0.5389751148877135, | |
| "grad_norm": 2.079903617254406, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3789, | |
| "step": 777 | |
| }, | |
| { | |
| "epoch": 0.5396687765542356, | |
| "grad_norm": 1.8776752694265728, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3968, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 0.5403624382207578, | |
| "grad_norm": 1.7524287384136423, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3538, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 0.54105609988728, | |
| "grad_norm": 2.034871602096054, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3546, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.5417497615538022, | |
| "grad_norm": 2.0262231787193117, | |
| "learning_rate": 1e-05, | |
| "loss": 1.351, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 0.5424434232203242, | |
| "grad_norm": 1.8800790132697096, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4063, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 0.5431370848868464, | |
| "grad_norm": 1.8397602468821888, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3284, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 0.5438307465533686, | |
| "grad_norm": 1.7812165456068076, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3116, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 0.5445244082198908, | |
| "grad_norm": 1.7794765249417972, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3796, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 0.5452180698864129, | |
| "grad_norm": 1.9715381587594996, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3771, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 0.545911731552935, | |
| "grad_norm": 1.7931337412184942, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4108, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 0.5466053932194572, | |
| "grad_norm": 1.8048639309386612, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3689, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 0.5472990548859794, | |
| "grad_norm": 1.8622884245084281, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3513, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 0.5479927165525015, | |
| "grad_norm": 1.816207257972127, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3331, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.5486863782190237, | |
| "grad_norm": 1.7508374499546198, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3594, | |
| "step": 791 | |
| }, | |
| { | |
| "epoch": 0.5493800398855458, | |
| "grad_norm": 1.7086134745690817, | |
| "learning_rate": 1e-05, | |
| "loss": 1.343, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 0.550073701552068, | |
| "grad_norm": 1.8826776634822224, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4312, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 0.5507673632185901, | |
| "grad_norm": 1.85161413953065, | |
| "learning_rate": 1e-05, | |
| "loss": 1.403, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 0.5514610248851123, | |
| "grad_norm": 2.169026140246512, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3514, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 0.5521546865516345, | |
| "grad_norm": 1.8974393651417674, | |
| "learning_rate": 1e-05, | |
| "loss": 1.376, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 0.5528483482181566, | |
| "grad_norm": 1.7818922230701455, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3365, | |
| "step": 797 | |
| }, | |
| { | |
| "epoch": 0.5535420098846787, | |
| "grad_norm": 1.8268621071764144, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3267, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 0.5542356715512009, | |
| "grad_norm": 1.741645703166344, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3401, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 0.5549293332177231, | |
| "grad_norm": 1.7684014664169725, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3453, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5556229948842452, | |
| "grad_norm": 1.872082247187476, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3849, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 0.5563166565507673, | |
| "grad_norm": 1.843965544279036, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3515, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 0.5570103182172895, | |
| "grad_norm": 1.7503097471047913, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3772, | |
| "step": 803 | |
| }, | |
| { | |
| "epoch": 0.5577039798838117, | |
| "grad_norm": 2.152027612117084, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3369, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 0.5583976415503338, | |
| "grad_norm": 1.9678830918650134, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3363, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 0.559091303216856, | |
| "grad_norm": 1.7813746189463364, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3268, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 0.5597849648833781, | |
| "grad_norm": 1.835262281512248, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3155, | |
| "step": 807 | |
| }, | |
| { | |
| "epoch": 0.5604786265499003, | |
| "grad_norm": 1.778985993964162, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3913, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 0.5611722882164224, | |
| "grad_norm": 2.07464881345254, | |
| "learning_rate": 1e-05, | |
| "loss": 1.35, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 0.5618659498829446, | |
| "grad_norm": 2.186610515953725, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3954, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.5625596115494668, | |
| "grad_norm": 2.129937538785471, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3644, | |
| "step": 811 | |
| }, | |
| { | |
| "epoch": 0.563253273215989, | |
| "grad_norm": 2.113811860287663, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3397, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 0.563946934882511, | |
| "grad_norm": 2.0550884583810487, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3316, | |
| "step": 813 | |
| }, | |
| { | |
| "epoch": 0.5646405965490332, | |
| "grad_norm": 1.85558729084722, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3586, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 0.5653342582155554, | |
| "grad_norm": 1.7940205357205186, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3328, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 0.5660279198820776, | |
| "grad_norm": 1.8867105415827423, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4099, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 0.5667215815485996, | |
| "grad_norm": 1.9561860862534044, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4491, | |
| "step": 817 | |
| }, | |
| { | |
| "epoch": 0.5674152432151218, | |
| "grad_norm": 2.0876714983856313, | |
| "learning_rate": 1e-05, | |
| "loss": 1.364, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 0.568108904881644, | |
| "grad_norm": 1.99399680820796, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3033, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 0.5688025665481661, | |
| "grad_norm": 1.976493240480479, | |
| "learning_rate": 1e-05, | |
| "loss": 1.333, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.5694962282146883, | |
| "grad_norm": 1.95662043100926, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3136, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 0.5701898898812104, | |
| "grad_norm": 1.6497733053601713, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3861, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 0.5708835515477326, | |
| "grad_norm": 1.818935374314111, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3857, | |
| "step": 823 | |
| }, | |
| { | |
| "epoch": 0.5715772132142547, | |
| "grad_norm": 1.747967079872631, | |
| "learning_rate": 1e-05, | |
| "loss": 1.349, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 0.5722708748807769, | |
| "grad_norm": 1.8908128266598878, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3616, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.5729645365472991, | |
| "grad_norm": 1.8720455934356435, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3238, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 0.5736581982138212, | |
| "grad_norm": 1.7833794579975666, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3956, | |
| "step": 827 | |
| }, | |
| { | |
| "epoch": 0.5743518598803433, | |
| "grad_norm": 2.1032994082238203, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3575, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 0.5750455215468655, | |
| "grad_norm": 2.0134774144141487, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3464, | |
| "step": 829 | |
| }, | |
| { | |
| "epoch": 0.5757391832133877, | |
| "grad_norm": 1.8711603789528202, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4115, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.5764328448799099, | |
| "grad_norm": 1.7819151783946192, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3615, | |
| "step": 831 | |
| }, | |
| { | |
| "epoch": 0.5771265065464319, | |
| "grad_norm": 1.9690584032628007, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3901, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 0.5778201682129541, | |
| "grad_norm": 1.9642954115887026, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3518, | |
| "step": 833 | |
| }, | |
| { | |
| "epoch": 0.5785138298794763, | |
| "grad_norm": 1.744497422769626, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3469, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 0.5792074915459985, | |
| "grad_norm": 2.0755337336634767, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3823, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 0.5799011532125206, | |
| "grad_norm": 1.8340533083393944, | |
| "learning_rate": 1e-05, | |
| "loss": 1.374, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 0.5805948148790427, | |
| "grad_norm": 1.7560881859736863, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4051, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 0.5812884765455649, | |
| "grad_norm": 1.7470644935895128, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3751, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 0.5819821382120871, | |
| "grad_norm": 1.6498957999043187, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3425, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 0.5826757998786092, | |
| "grad_norm": 1.708284581084529, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3772, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.5833694615451314, | |
| "grad_norm": 1.7341358375022322, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4271, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 0.5840631232116535, | |
| "grad_norm": 1.96090088265045, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3811, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 0.5847567848781756, | |
| "grad_norm": 1.8309156039915615, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3518, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 0.5854504465446978, | |
| "grad_norm": 1.766087750027872, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2791, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 0.58614410821122, | |
| "grad_norm": 1.840859949308765, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3364, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 0.5868377698777422, | |
| "grad_norm": 1.8811612648079954, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3866, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 0.5875314315442642, | |
| "grad_norm": 1.7882571243218808, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3818, | |
| "step": 847 | |
| }, | |
| { | |
| "epoch": 0.5882250932107864, | |
| "grad_norm": 1.7751021276239416, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3877, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 0.5889187548773086, | |
| "grad_norm": 1.863598117479808, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3283, | |
| "step": 849 | |
| }, | |
| { | |
| "epoch": 0.5896124165438308, | |
| "grad_norm": 1.7934939156618281, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3909, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.5903060782103529, | |
| "grad_norm": 1.8701501570430885, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3911, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 0.590999739876875, | |
| "grad_norm": 1.8075702135316054, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3267, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 0.5916934015433972, | |
| "grad_norm": 1.7844322891888966, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3749, | |
| "step": 853 | |
| }, | |
| { | |
| "epoch": 0.5923870632099194, | |
| "grad_norm": 1.6364137503955962, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3903, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 0.5930807248764415, | |
| "grad_norm": 1.845018525439836, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3638, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 0.5937743865429637, | |
| "grad_norm": 1.6438686492795926, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3356, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 0.5944680482094858, | |
| "grad_norm": 1.8461296262824984, | |
| "learning_rate": 1e-05, | |
| "loss": 1.318, | |
| "step": 857 | |
| }, | |
| { | |
| "epoch": 0.595161709876008, | |
| "grad_norm": 1.7523581116541502, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3278, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 0.5958553715425301, | |
| "grad_norm": 1.7821675206089143, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3257, | |
| "step": 859 | |
| }, | |
| { | |
| "epoch": 0.5965490332090523, | |
| "grad_norm": 1.8952855386403753, | |
| "learning_rate": 1e-05, | |
| "loss": 1.352, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.5972426948755745, | |
| "grad_norm": 1.9061495251459173, | |
| "learning_rate": 1e-05, | |
| "loss": 1.361, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 0.5979363565420966, | |
| "grad_norm": 1.807002944323855, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3498, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 0.5986300182086187, | |
| "grad_norm": 1.8427398989259318, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3442, | |
| "step": 863 | |
| }, | |
| { | |
| "epoch": 0.5993236798751409, | |
| "grad_norm": 2.159582705739885, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4137, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 0.6000173415416631, | |
| "grad_norm": 1.8186698531077894, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3743, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 0.6007110032081852, | |
| "grad_norm": 1.7754924011854336, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3758, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 0.6014046648747073, | |
| "grad_norm": 1.7146434615480912, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3655, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 0.6020983265412295, | |
| "grad_norm": 1.6864187498892416, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3842, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 0.6027919882077517, | |
| "grad_norm": 1.7625940182372917, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3563, | |
| "step": 869 | |
| }, | |
| { | |
| "epoch": 0.6034856498742738, | |
| "grad_norm": 1.8385401005573274, | |
| "learning_rate": 1e-05, | |
| "loss": 1.317, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.604179311540796, | |
| "grad_norm": 1.8249865396470937, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3666, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 0.6048729732073181, | |
| "grad_norm": 1.774024094473835, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3677, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 0.6055666348738403, | |
| "grad_norm": 1.9300061255791403, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3668, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 0.6062602965403624, | |
| "grad_norm": 1.8538055069719046, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3639, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 0.6069539582068846, | |
| "grad_norm": 1.8234801513872547, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3183, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6076476198734068, | |
| "grad_norm": 1.8887104952070137, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3798, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 0.608341281539929, | |
| "grad_norm": 1.967685873156897, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3733, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 0.609034943206451, | |
| "grad_norm": 1.8930452243619467, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3627, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 0.6097286048729732, | |
| "grad_norm": 1.9917808148339706, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3637, | |
| "step": 879 | |
| }, | |
| { | |
| "epoch": 0.6104222665394954, | |
| "grad_norm": 1.7953716355637714, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3552, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.6111159282060176, | |
| "grad_norm": 2.054090380165633, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3309, | |
| "step": 881 | |
| }, | |
| { | |
| "epoch": 0.6118095898725396, | |
| "grad_norm": 1.774125479388038, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3346, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 0.6125032515390618, | |
| "grad_norm": 1.788110427730304, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3907, | |
| "step": 883 | |
| }, | |
| { | |
| "epoch": 0.613196913205584, | |
| "grad_norm": 1.9117105836931287, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3662, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 0.6138905748721061, | |
| "grad_norm": 1.9761438291924842, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3487, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 0.6145842365386283, | |
| "grad_norm": 1.7939497044726074, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3617, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 0.6152778982051504, | |
| "grad_norm": 1.8137308254099254, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4017, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 0.6159715598716726, | |
| "grad_norm": 1.9358335601206476, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3221, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 0.6166652215381947, | |
| "grad_norm": 1.8023857684374447, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3645, | |
| "step": 889 | |
| }, | |
| { | |
| "epoch": 0.6173588832047169, | |
| "grad_norm": 1.7858631263519855, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3915, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.6180525448712391, | |
| "grad_norm": 1.8447027974665438, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3331, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 0.6187462065377612, | |
| "grad_norm": 1.8973843442097094, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3053, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 0.6194398682042833, | |
| "grad_norm": 1.7623453790808643, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2926, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 0.6201335298708055, | |
| "grad_norm": 1.8017523463560905, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3647, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 0.6208271915373277, | |
| "grad_norm": 1.7450544611778176, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3658, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 0.6215208532038499, | |
| "grad_norm": 1.823024100294463, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3508, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 0.6222145148703719, | |
| "grad_norm": 1.8524166096635302, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3446, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 0.6229081765368941, | |
| "grad_norm": 1.7203042262118677, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3838, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 0.6236018382034163, | |
| "grad_norm": 1.7896042667571013, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3401, | |
| "step": 899 | |
| }, | |
| { | |
| "epoch": 0.6242954998699385, | |
| "grad_norm": 1.8566869792112495, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3856, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6249891615364606, | |
| "grad_norm": 1.7236631672011284, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3455, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 0.6256828232029827, | |
| "grad_norm": 1.9857281332079058, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3347, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 0.6263764848695049, | |
| "grad_norm": 1.916490049064551, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2964, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 0.6270701465360271, | |
| "grad_norm": 1.9713572657543152, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3302, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 0.6277638082025492, | |
| "grad_norm": 1.7610441348646735, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3344, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 0.6284574698690714, | |
| "grad_norm": 1.799617843727853, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3284, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 0.6291511315355935, | |
| "grad_norm": 1.7832143262655586, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3356, | |
| "step": 907 | |
| }, | |
| { | |
| "epoch": 0.6298447932021156, | |
| "grad_norm": 1.7971828284576865, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3207, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 0.6305384548686378, | |
| "grad_norm": 1.7536423641131738, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3499, | |
| "step": 909 | |
| }, | |
| { | |
| "epoch": 0.63123211653516, | |
| "grad_norm": 1.7401477745871217, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3398, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.6319257782016822, | |
| "grad_norm": 1.7218362910164169, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4193, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 0.6326194398682042, | |
| "grad_norm": 1.9808528889101304, | |
| "learning_rate": 1e-05, | |
| "loss": 1.392, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 0.6333131015347264, | |
| "grad_norm": 1.9064408882431807, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3615, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 0.6340067632012486, | |
| "grad_norm": 1.7248366549906144, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3231, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 0.6347004248677708, | |
| "grad_norm": 1.795395048611617, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3851, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 0.6353940865342929, | |
| "grad_norm": 1.7344884424253888, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3528, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 0.636087748200815, | |
| "grad_norm": 1.700061571947052, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3348, | |
| "step": 917 | |
| }, | |
| { | |
| "epoch": 0.6367814098673372, | |
| "grad_norm": 1.8646566885856952, | |
| "learning_rate": 1e-05, | |
| "loss": 1.357, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 0.6374750715338594, | |
| "grad_norm": 1.8652265942281396, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3343, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 0.6381687332003815, | |
| "grad_norm": 1.7925656802981118, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3308, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.6388623948669037, | |
| "grad_norm": 1.7896253293176538, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3813, | |
| "step": 921 | |
| }, | |
| { | |
| "epoch": 0.6395560565334258, | |
| "grad_norm": 1.6534489942501098, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3233, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 0.640249718199948, | |
| "grad_norm": 1.8033026812852484, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3074, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 0.6409433798664701, | |
| "grad_norm": 1.822451647210804, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3217, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 0.6416370415329923, | |
| "grad_norm": 1.7821886850533442, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3531, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.6423307031995145, | |
| "grad_norm": 1.8446137766083273, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3552, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 0.6430243648660365, | |
| "grad_norm": 1.7365041612964318, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3472, | |
| "step": 927 | |
| }, | |
| { | |
| "epoch": 0.6437180265325587, | |
| "grad_norm": 1.9116647427342783, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3356, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 0.6444116881990809, | |
| "grad_norm": 1.6818488752250975, | |
| "learning_rate": 1e-05, | |
| "loss": 1.35, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 0.6451053498656031, | |
| "grad_norm": 1.8566305164008303, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3643, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.6457990115321252, | |
| "grad_norm": 1.7420953544687154, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3148, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 0.6464926731986473, | |
| "grad_norm": 1.885744413844102, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3492, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 0.6471863348651695, | |
| "grad_norm": 1.7944270298154161, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3513, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 0.6478799965316917, | |
| "grad_norm": 1.7535218523742484, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4101, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 0.6485736581982138, | |
| "grad_norm": 1.7229852034518358, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3437, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 0.649267319864736, | |
| "grad_norm": 1.896304422647214, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3156, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 0.6499609815312581, | |
| "grad_norm": 1.8055244846850502, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3691, | |
| "step": 937 | |
| }, | |
| { | |
| "epoch": 0.6506546431977803, | |
| "grad_norm": 1.684941036557295, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3579, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 0.6513483048643024, | |
| "grad_norm": 1.8888571415510795, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3632, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 0.6520419665308246, | |
| "grad_norm": 1.8160274262290288, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3099, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.6527356281973468, | |
| "grad_norm": 1.755049632438486, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3519, | |
| "step": 941 | |
| }, | |
| { | |
| "epoch": 0.653429289863869, | |
| "grad_norm": 1.89712944315266, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3494, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 0.654122951530391, | |
| "grad_norm": 1.765188366032801, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3856, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 0.6548166131969132, | |
| "grad_norm": 1.9497117765562002, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3665, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 0.6555102748634354, | |
| "grad_norm": 1.799101172711031, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3445, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 0.6562039365299576, | |
| "grad_norm": 1.712616408878392, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3491, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 0.6568975981964796, | |
| "grad_norm": 1.7946208261432808, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3731, | |
| "step": 947 | |
| }, | |
| { | |
| "epoch": 0.6575912598630018, | |
| "grad_norm": 1.7262699314904466, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3585, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 0.658284921529524, | |
| "grad_norm": 1.9628450564778277, | |
| "learning_rate": 1e-05, | |
| "loss": 1.301, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 0.6589785831960461, | |
| "grad_norm": 1.8061202922829884, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3783, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6596722448625683, | |
| "grad_norm": 1.7121738551781767, | |
| "learning_rate": 1e-05, | |
| "loss": 1.357, | |
| "step": 951 | |
| }, | |
| { | |
| "epoch": 0.6603659065290904, | |
| "grad_norm": 1.8850562141039617, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2819, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 0.6610595681956126, | |
| "grad_norm": 1.867193802881424, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3156, | |
| "step": 953 | |
| }, | |
| { | |
| "epoch": 0.6617532298621347, | |
| "grad_norm": 1.7443179431377005, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2751, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 0.6624468915286569, | |
| "grad_norm": 1.8733131607506688, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3534, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 0.6631405531951791, | |
| "grad_norm": 1.9784306105729255, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2742, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 0.6638342148617012, | |
| "grad_norm": 1.8959702823237385, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4094, | |
| "step": 957 | |
| }, | |
| { | |
| "epoch": 0.6645278765282233, | |
| "grad_norm": 1.664080974193892, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3658, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 0.6652215381947455, | |
| "grad_norm": 1.782008443874851, | |
| "learning_rate": 1e-05, | |
| "loss": 1.347, | |
| "step": 959 | |
| }, | |
| { | |
| "epoch": 0.6659151998612677, | |
| "grad_norm": 1.8460350587229146, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3639, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.6666088615277899, | |
| "grad_norm": 1.6425972064330443, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3693, | |
| "step": 961 | |
| }, | |
| { | |
| "epoch": 0.6673025231943119, | |
| "grad_norm": 1.8565662897573758, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3636, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 0.6679961848608341, | |
| "grad_norm": 1.7302118661778385, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3566, | |
| "step": 963 | |
| }, | |
| { | |
| "epoch": 0.6686898465273563, | |
| "grad_norm": 1.9616014623863918, | |
| "learning_rate": 1e-05, | |
| "loss": 1.374, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 0.6693835081938785, | |
| "grad_norm": 1.84734239559959, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2604, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 0.6700771698604006, | |
| "grad_norm": 1.6679061078794732, | |
| "learning_rate": 1e-05, | |
| "loss": 1.344, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 0.6707708315269227, | |
| "grad_norm": 1.9249982922421873, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3773, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 0.6714644931934449, | |
| "grad_norm": 1.9130662887594385, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3389, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 0.6721581548599671, | |
| "grad_norm": 2.0490408445129553, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3271, | |
| "step": 969 | |
| }, | |
| { | |
| "epoch": 0.6728518165264892, | |
| "grad_norm": 1.7966765927847634, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3082, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.6735454781930114, | |
| "grad_norm": 1.8365578777762348, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3813, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 0.6742391398595335, | |
| "grad_norm": 1.7856135676874048, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3791, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 0.6749328015260556, | |
| "grad_norm": 1.7189842050358264, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3859, | |
| "step": 973 | |
| }, | |
| { | |
| "epoch": 0.6756264631925778, | |
| "grad_norm": 1.697736604178591, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3531, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 0.6763201248591, | |
| "grad_norm": 1.85446433118358, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3377, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.6770137865256222, | |
| "grad_norm": 1.817734345502182, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3316, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 0.6777074481921442, | |
| "grad_norm": 1.6362921639652548, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2879, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 0.6784011098586664, | |
| "grad_norm": 1.7236748146694982, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3023, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 0.6790947715251886, | |
| "grad_norm": 1.874808634354032, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3291, | |
| "step": 979 | |
| }, | |
| { | |
| "epoch": 0.6797884331917108, | |
| "grad_norm": 2.143605571091092, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3305, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.6804820948582329, | |
| "grad_norm": 1.7140845838212821, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2762, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 0.681175756524755, | |
| "grad_norm": 1.7627892247793258, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3793, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 0.6818694181912772, | |
| "grad_norm": 1.9327806260640557, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3314, | |
| "step": 983 | |
| }, | |
| { | |
| "epoch": 0.6825630798577994, | |
| "grad_norm": 1.9570006600139125, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3092, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 0.6832567415243215, | |
| "grad_norm": 1.857815246395867, | |
| "learning_rate": 1e-05, | |
| "loss": 1.328, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 0.6839504031908437, | |
| "grad_norm": 1.6521408115836034, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3649, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 0.6846440648573658, | |
| "grad_norm": 1.6883217532733774, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3742, | |
| "step": 987 | |
| }, | |
| { | |
| "epoch": 0.685337726523888, | |
| "grad_norm": 1.7657705434647315, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4009, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 0.6860313881904101, | |
| "grad_norm": 1.8388470770976078, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3385, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 0.6867250498569323, | |
| "grad_norm": 1.6446394362553027, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3747, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.6874187115234545, | |
| "grad_norm": 1.76332053954708, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3744, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 0.6881123731899765, | |
| "grad_norm": 1.8551504155963352, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3461, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 0.6888060348564987, | |
| "grad_norm": 1.8242720423216203, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3949, | |
| "step": 993 | |
| }, | |
| { | |
| "epoch": 0.6894996965230209, | |
| "grad_norm": 1.8728688560334699, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4202, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 0.6901933581895431, | |
| "grad_norm": 1.8128080132317514, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3528, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 0.6908870198560652, | |
| "grad_norm": 1.7706897683233593, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3772, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 0.6915806815225873, | |
| "grad_norm": 1.867842253838222, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3735, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 0.6922743431891095, | |
| "grad_norm": 1.908519372133083, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3093, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 0.6929680048556317, | |
| "grad_norm": 1.7680738442803956, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3205, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 0.6936616665221538, | |
| "grad_norm": 1.752432098173259, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3451, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.694355328188676, | |
| "grad_norm": 1.7866959543838525, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3288, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 0.6950489898551981, | |
| "grad_norm": 1.6320555911640122, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3157, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 0.6957426515217203, | |
| "grad_norm": 1.7766379486245896, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3559, | |
| "step": 1003 | |
| }, | |
| { | |
| "epoch": 0.6964363131882424, | |
| "grad_norm": 1.7801227267982318, | |
| "learning_rate": 1e-05, | |
| "loss": 1.389, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 0.6971299748547646, | |
| "grad_norm": 1.7763939485733111, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4214, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 0.6978236365212868, | |
| "grad_norm": 1.7466154961438336, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3974, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 0.6985172981878089, | |
| "grad_norm": 1.8018054751465553, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3146, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 0.699210959854331, | |
| "grad_norm": 1.7015362135443022, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3521, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 0.6999046215208532, | |
| "grad_norm": 1.8044732359887248, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3298, | |
| "step": 1009 | |
| }, | |
| { | |
| "epoch": 0.7005982831873754, | |
| "grad_norm": 1.7248926110752036, | |
| "learning_rate": 1e-05, | |
| "loss": 1.311, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.7012919448538976, | |
| "grad_norm": 1.7408526444267358, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3346, | |
| "step": 1011 | |
| }, | |
| { | |
| "epoch": 0.7019856065204196, | |
| "grad_norm": 1.756432069962424, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3608, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 0.7026792681869418, | |
| "grad_norm": 1.9004070384049725, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3061, | |
| "step": 1013 | |
| }, | |
| { | |
| "epoch": 0.703372929853464, | |
| "grad_norm": 1.7674772646104595, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3365, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 0.7040665915199861, | |
| "grad_norm": 1.815286596926447, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3112, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 0.7047602531865083, | |
| "grad_norm": 1.9200058514873535, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3702, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 0.7054539148530304, | |
| "grad_norm": 1.7499367861528972, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3707, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 0.7061475765195526, | |
| "grad_norm": 1.925251587075512, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3208, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 0.7068412381860747, | |
| "grad_norm": 1.7154198796482498, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3336, | |
| "step": 1019 | |
| }, | |
| { | |
| "epoch": 0.7075348998525969, | |
| "grad_norm": 1.837360393002266, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3328, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.7082285615191191, | |
| "grad_norm": 1.6211349139215232, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3284, | |
| "step": 1021 | |
| }, | |
| { | |
| "epoch": 0.7089222231856412, | |
| "grad_norm": 1.866016563395064, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3198, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 0.7096158848521633, | |
| "grad_norm": 1.6839566806665383, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3651, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 0.7103095465186855, | |
| "grad_norm": 1.7159632620855965, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3268, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 0.7110032081852077, | |
| "grad_norm": 1.806422188485046, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2901, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.7116968698517299, | |
| "grad_norm": 1.7329697047767731, | |
| "learning_rate": 1e-05, | |
| "loss": 1.278, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 0.7123905315182519, | |
| "grad_norm": 1.7358387057504157, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3959, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 0.7130841931847741, | |
| "grad_norm": 1.7843805164975317, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3158, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 0.7137778548512963, | |
| "grad_norm": 1.8034487032951743, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3239, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 0.7144715165178185, | |
| "grad_norm": 1.7806257674138806, | |
| "learning_rate": 1e-05, | |
| "loss": 1.348, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.7151651781843406, | |
| "grad_norm": 1.8562808148693768, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3675, | |
| "step": 1031 | |
| }, | |
| { | |
| "epoch": 0.7158588398508627, | |
| "grad_norm": 1.9354803733254098, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3331, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 0.7165525015173849, | |
| "grad_norm": 1.7581325487197559, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3468, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 0.7172461631839071, | |
| "grad_norm": 1.802989571777425, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3429, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 0.7179398248504292, | |
| "grad_norm": 1.7427548981299272, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3555, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 0.7186334865169514, | |
| "grad_norm": 1.9113368723599877, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3458, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 0.7193271481834735, | |
| "grad_norm": 1.746074482528233, | |
| "learning_rate": 1e-05, | |
| "loss": 1.256, | |
| "step": 1037 | |
| }, | |
| { | |
| "epoch": 0.7200208098499956, | |
| "grad_norm": 1.8354380987152568, | |
| "learning_rate": 1e-05, | |
| "loss": 1.336, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 0.7207144715165178, | |
| "grad_norm": 1.945273024066637, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3985, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 0.72140813318304, | |
| "grad_norm": 1.843452934712379, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3193, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.7221017948495622, | |
| "grad_norm": 1.9504440192269308, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3296, | |
| "step": 1041 | |
| }, | |
| { | |
| "epoch": 0.7227954565160842, | |
| "grad_norm": 1.8706540071171702, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3403, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 0.7234891181826064, | |
| "grad_norm": 1.9488283001981537, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3245, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 0.7241827798491286, | |
| "grad_norm": 1.6969025009984984, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3041, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 0.7248764415156508, | |
| "grad_norm": 2.0072497667921443, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3447, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 0.7255701031821729, | |
| "grad_norm": 1.8741053070227567, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3814, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 0.726263764848695, | |
| "grad_norm": 1.8874635982516978, | |
| "learning_rate": 1e-05, | |
| "loss": 1.328, | |
| "step": 1047 | |
| }, | |
| { | |
| "epoch": 0.7269574265152172, | |
| "grad_norm": 1.8011724514294678, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3165, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 0.7276510881817394, | |
| "grad_norm": 1.7615037862154284, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3405, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 0.7283447498482615, | |
| "grad_norm": 1.8936590686337285, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3304, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7290384115147837, | |
| "grad_norm": 1.7561903582794731, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3135, | |
| "step": 1051 | |
| }, | |
| { | |
| "epoch": 0.7297320731813058, | |
| "grad_norm": 1.9910837825376402, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3328, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 0.730425734847828, | |
| "grad_norm": 1.7932757018881984, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3244, | |
| "step": 1053 | |
| }, | |
| { | |
| "epoch": 0.7311193965143501, | |
| "grad_norm": 1.8115170033234536, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3256, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 0.7318130581808723, | |
| "grad_norm": 1.846025824397134, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2897, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 0.7325067198473945, | |
| "grad_norm": 1.8145673138145253, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3333, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 0.7332003815139165, | |
| "grad_norm": 1.6430330564527496, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3569, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 0.7338940431804387, | |
| "grad_norm": 1.7140027949989802, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3096, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 0.7345877048469609, | |
| "grad_norm": 1.9676800557132401, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3327, | |
| "step": 1059 | |
| }, | |
| { | |
| "epoch": 0.7352813665134831, | |
| "grad_norm": 1.76220089185509, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3837, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.7359750281800052, | |
| "grad_norm": 1.8650314152543974, | |
| "learning_rate": 1e-05, | |
| "loss": 1.31, | |
| "step": 1061 | |
| }, | |
| { | |
| "epoch": 0.7366686898465273, | |
| "grad_norm": 2.1105903583633054, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3189, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 0.7373623515130495, | |
| "grad_norm": 1.9330298633028662, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3116, | |
| "step": 1063 | |
| }, | |
| { | |
| "epoch": 0.7380560131795717, | |
| "grad_norm": 1.8697841477985335, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3059, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 0.7387496748460938, | |
| "grad_norm": 1.8167855532087722, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3725, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 0.739443336512616, | |
| "grad_norm": 1.9237163410520044, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3216, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 0.7401369981791381, | |
| "grad_norm": 1.6945312309384704, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3205, | |
| "step": 1067 | |
| }, | |
| { | |
| "epoch": 0.7408306598456603, | |
| "grad_norm": 1.8403636035002944, | |
| "learning_rate": 1e-05, | |
| "loss": 1.323, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 0.7415243215121824, | |
| "grad_norm": 1.8463721460630584, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3215, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 0.7422179831787046, | |
| "grad_norm": 1.9058089022783489, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3117, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.7429116448452268, | |
| "grad_norm": 2.1180556320179935, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3762, | |
| "step": 1071 | |
| }, | |
| { | |
| "epoch": 0.7436053065117489, | |
| "grad_norm": 1.8334895855944495, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3443, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 0.744298968178271, | |
| "grad_norm": 2.1232912269113573, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3149, | |
| "step": 1073 | |
| }, | |
| { | |
| "epoch": 0.7449926298447932, | |
| "grad_norm": 1.933335743238272, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3036, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 0.7456862915113154, | |
| "grad_norm": 1.7161884340751359, | |
| "learning_rate": 1e-05, | |
| "loss": 1.337, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.7463799531778376, | |
| "grad_norm": 1.8821904641272937, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3484, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 0.7470736148443596, | |
| "grad_norm": 1.8420209136091366, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3255, | |
| "step": 1077 | |
| }, | |
| { | |
| "epoch": 0.7477672765108818, | |
| "grad_norm": 1.6844962997358441, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3763, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 0.748460938177404, | |
| "grad_norm": 1.6853533211643357, | |
| "learning_rate": 1e-05, | |
| "loss": 1.334, | |
| "step": 1079 | |
| }, | |
| { | |
| "epoch": 0.7491545998439261, | |
| "grad_norm": 1.7019512963652867, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2932, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.7498482615104483, | |
| "grad_norm": 1.811962898514401, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3587, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 0.7505419231769704, | |
| "grad_norm": 1.7755093149292678, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2892, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 0.7512355848434926, | |
| "grad_norm": 1.8629164456603942, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3037, | |
| "step": 1083 | |
| }, | |
| { | |
| "epoch": 0.7519292465100147, | |
| "grad_norm": 1.7886710463953541, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2803, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 0.7526229081765369, | |
| "grad_norm": 1.695860963172146, | |
| "learning_rate": 1e-05, | |
| "loss": 1.4153, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 0.7533165698430591, | |
| "grad_norm": 1.9667102987846012, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3436, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 0.7540102315095812, | |
| "grad_norm": 1.7749867238254802, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3543, | |
| "step": 1087 | |
| }, | |
| { | |
| "epoch": 0.7547038931761033, | |
| "grad_norm": 1.8601842356859748, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3261, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 0.7553975548426255, | |
| "grad_norm": 1.7933714883604228, | |
| "learning_rate": 1e-05, | |
| "loss": 1.375, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 0.7560912165091477, | |
| "grad_norm": 1.9108510451623624, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3306, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.7567848781756699, | |
| "grad_norm": 1.7031275623765503, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3776, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 0.7574785398421919, | |
| "grad_norm": 1.723756241244783, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3069, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 0.7581722015087141, | |
| "grad_norm": 1.790196818419589, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2887, | |
| "step": 1093 | |
| }, | |
| { | |
| "epoch": 0.7588658631752363, | |
| "grad_norm": 1.6853767916962972, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3328, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 0.7595595248417585, | |
| "grad_norm": 1.8283907475641092, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3394, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 0.7602531865082806, | |
| "grad_norm": 1.7686661299844872, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3213, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 0.7609468481748027, | |
| "grad_norm": 2.0255015885014678, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3553, | |
| "step": 1097 | |
| }, | |
| { | |
| "epoch": 0.7616405098413249, | |
| "grad_norm": 1.887116897314805, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2652, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 0.7623341715078471, | |
| "grad_norm": 2.0827192956718816, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2931, | |
| "step": 1099 | |
| }, | |
| { | |
| "epoch": 0.7630278331743692, | |
| "grad_norm": 1.6696475938133672, | |
| "learning_rate": 1e-05, | |
| "loss": 1.344, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.7637214948408914, | |
| "grad_norm": 1.8696319148514944, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2875, | |
| "step": 1101 | |
| }, | |
| { | |
| "epoch": 0.7644151565074135, | |
| "grad_norm": 1.9161318005356125, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3039, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 0.7651088181739356, | |
| "grad_norm": 1.7357326775365562, | |
| "learning_rate": 1e-05, | |
| "loss": 1.327, | |
| "step": 1103 | |
| }, | |
| { | |
| "epoch": 0.7658024798404578, | |
| "grad_norm": 1.9374495795431432, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3118, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 0.76649614150698, | |
| "grad_norm": 1.8970035705740447, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3518, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 0.7671898031735022, | |
| "grad_norm": 1.7310701480408488, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3011, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 0.7678834648400242, | |
| "grad_norm": 1.6347319795022848, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3885, | |
| "step": 1107 | |
| }, | |
| { | |
| "epoch": 0.7685771265065464, | |
| "grad_norm": 1.7389954204005627, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3407, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 0.7692707881730686, | |
| "grad_norm": 1.8545763638618038, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3475, | |
| "step": 1109 | |
| }, | |
| { | |
| "epoch": 0.7699644498395908, | |
| "grad_norm": 1.830290361513446, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3372, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.7706581115061129, | |
| "grad_norm": 1.7554211037949135, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3382, | |
| "step": 1111 | |
| }, | |
| { | |
| "epoch": 0.771351773172635, | |
| "grad_norm": 1.791182051330342, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2865, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 0.7720454348391572, | |
| "grad_norm": 1.7944697955298015, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3521, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 0.7727390965056794, | |
| "grad_norm": 1.7280244820641575, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2855, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 0.7734327581722015, | |
| "grad_norm": 1.646134246431486, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3443, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 0.7741264198387237, | |
| "grad_norm": 1.9614831482482202, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3511, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 0.7748200815052458, | |
| "grad_norm": 1.8931116828060321, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3014, | |
| "step": 1117 | |
| }, | |
| { | |
| "epoch": 0.775513743171768, | |
| "grad_norm": 1.766409312274434, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3368, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 0.7762074048382901, | |
| "grad_norm": 1.7681164259480207, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3277, | |
| "step": 1119 | |
| }, | |
| { | |
| "epoch": 0.7769010665048123, | |
| "grad_norm": 1.8795478880896714, | |
| "learning_rate": 1e-05, | |
| "loss": 1.358, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.7775947281713345, | |
| "grad_norm": 1.7552042041280342, | |
| "learning_rate": 1e-05, | |
| "loss": 1.338, | |
| "step": 1121 | |
| }, | |
| { | |
| "epoch": 0.7782883898378565, | |
| "grad_norm": 1.6825666179566667, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3468, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 0.7789820515043787, | |
| "grad_norm": 2.069237558777561, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3285, | |
| "step": 1123 | |
| }, | |
| { | |
| "epoch": 0.7796757131709009, | |
| "grad_norm": 1.8945277365950668, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3457, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 0.7803693748374231, | |
| "grad_norm": 1.804604515643157, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3198, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.7810630365039452, | |
| "grad_norm": 1.6687410426653992, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3571, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 0.7817566981704673, | |
| "grad_norm": 1.8408255487647456, | |
| "learning_rate": 1e-05, | |
| "loss": 1.348, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 0.7824503598369895, | |
| "grad_norm": 1.7473823693404393, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3449, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 0.7831440215035117, | |
| "grad_norm": 1.762367934431706, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3296, | |
| "step": 1129 | |
| }, | |
| { | |
| "epoch": 0.7838376831700338, | |
| "grad_norm": 1.8065045373977573, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3088, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.784531344836556, | |
| "grad_norm": 1.7702834178449094, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3251, | |
| "step": 1131 | |
| }, | |
| { | |
| "epoch": 0.7852250065030781, | |
| "grad_norm": 1.684651956430982, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3221, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 0.7859186681696003, | |
| "grad_norm": 1.718401813004295, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3583, | |
| "step": 1133 | |
| }, | |
| { | |
| "epoch": 0.7866123298361224, | |
| "grad_norm": 1.7593076318939966, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3481, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 0.7873059915026446, | |
| "grad_norm": 1.9066726353857328, | |
| "learning_rate": 1e-05, | |
| "loss": 1.298, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 0.7879996531691668, | |
| "grad_norm": 1.666639956360219, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3321, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 0.7886933148356889, | |
| "grad_norm": 1.722161549449429, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3081, | |
| "step": 1137 | |
| }, | |
| { | |
| "epoch": 0.789386976502211, | |
| "grad_norm": 1.8556050317918311, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3155, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 0.7900806381687332, | |
| "grad_norm": 1.906163646274444, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3127, | |
| "step": 1139 | |
| }, | |
| { | |
| "epoch": 0.7907742998352554, | |
| "grad_norm": 1.91777805112317, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3267, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.7914679615017776, | |
| "grad_norm": 1.758622480343078, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3604, | |
| "step": 1141 | |
| }, | |
| { | |
| "epoch": 0.7921616231682996, | |
| "grad_norm": 1.818304243791579, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3744, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 0.7928552848348218, | |
| "grad_norm": 1.7096779670968696, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2967, | |
| "step": 1143 | |
| }, | |
| { | |
| "epoch": 0.793548946501344, | |
| "grad_norm": 1.8288634798052434, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3836, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 0.7942426081678661, | |
| "grad_norm": 1.837569087516797, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3306, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 0.7949362698343883, | |
| "grad_norm": 1.757663683708377, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3289, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 0.7956299315009104, | |
| "grad_norm": 1.5733908350037635, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3293, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 0.7963235931674326, | |
| "grad_norm": 1.7006097263550368, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3544, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 0.7970172548339547, | |
| "grad_norm": 1.852767855910649, | |
| "learning_rate": 1e-05, | |
| "loss": 1.367, | |
| "step": 1149 | |
| }, | |
| { | |
| "epoch": 0.7977109165004769, | |
| "grad_norm": 1.8236237376393378, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3298, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.7984045781669991, | |
| "grad_norm": 1.7542840584318944, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3305, | |
| "step": 1151 | |
| }, | |
| { | |
| "epoch": 0.7990982398335212, | |
| "grad_norm": 1.7363578108911824, | |
| "learning_rate": 1e-05, | |
| "loss": 1.312, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 0.7997919015000433, | |
| "grad_norm": 1.7447796524158012, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3146, | |
| "step": 1153 | |
| }, | |
| { | |
| "epoch": 0.8004855631665655, | |
| "grad_norm": 1.7998354010324689, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2651, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 0.8011792248330877, | |
| "grad_norm": 1.72370764035216, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3313, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 0.8018728864996099, | |
| "grad_norm": 1.9027302235993584, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3275, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 0.8025665481661319, | |
| "grad_norm": 1.767361577842723, | |
| "learning_rate": 1e-05, | |
| "loss": 1.281, | |
| "step": 1157 | |
| }, | |
| { | |
| "epoch": 0.8032602098326541, | |
| "grad_norm": 1.6957352595013901, | |
| "learning_rate": 1e-05, | |
| "loss": 1.339, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 0.8039538714991763, | |
| "grad_norm": 1.8811200117431954, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3694, | |
| "step": 1159 | |
| }, | |
| { | |
| "epoch": 0.8046475331656985, | |
| "grad_norm": 1.8732110913975033, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3828, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.8053411948322206, | |
| "grad_norm": 1.806929633499466, | |
| "learning_rate": 1e-05, | |
| "loss": 1.293, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 0.8060348564987427, | |
| "grad_norm": 1.9234019762346322, | |
| "learning_rate": 1e-05, | |
| "loss": 1.356, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 0.8067285181652649, | |
| "grad_norm": 1.8612299857621706, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2944, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 0.8074221798317871, | |
| "grad_norm": 1.8949079530850956, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2759, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 0.8081158414983092, | |
| "grad_norm": 1.835747159847785, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2959, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 0.8088095031648314, | |
| "grad_norm": 1.6994420208999754, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3546, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 0.8095031648313535, | |
| "grad_norm": 1.7058565978885374, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3016, | |
| "step": 1167 | |
| }, | |
| { | |
| "epoch": 0.8101968264978756, | |
| "grad_norm": 1.7202328196552465, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3244, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 0.8108904881643978, | |
| "grad_norm": 1.5969257126998517, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2941, | |
| "step": 1169 | |
| }, | |
| { | |
| "epoch": 0.81158414983092, | |
| "grad_norm": 2.033122314469678, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3112, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.8122778114974422, | |
| "grad_norm": 1.818607135695885, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3372, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 0.8129714731639642, | |
| "grad_norm": 1.7332347851944176, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2963, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 0.8136651348304864, | |
| "grad_norm": 1.79912386221675, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3192, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 0.8143587964970086, | |
| "grad_norm": 1.759247644611021, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3087, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 0.8150524581635308, | |
| "grad_norm": 1.81196721719679, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3347, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.8157461198300529, | |
| "grad_norm": 1.8329350171829653, | |
| "learning_rate": 1e-05, | |
| "loss": 1.347, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 0.816439781496575, | |
| "grad_norm": 2.097683322497983, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3187, | |
| "step": 1177 | |
| }, | |
| { | |
| "epoch": 0.8171334431630972, | |
| "grad_norm": 1.682730633916439, | |
| "learning_rate": 1e-05, | |
| "loss": 1.326, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 0.8178271048296194, | |
| "grad_norm": 1.7967406525079352, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3229, | |
| "step": 1179 | |
| }, | |
| { | |
| "epoch": 0.8185207664961415, | |
| "grad_norm": 1.7155776890591734, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3143, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.8192144281626637, | |
| "grad_norm": 1.731948891871397, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3044, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 0.8199080898291858, | |
| "grad_norm": 1.6258267218121343, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2966, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 0.820601751495708, | |
| "grad_norm": 1.8947923997247118, | |
| "learning_rate": 1e-05, | |
| "loss": 1.25, | |
| "step": 1183 | |
| }, | |
| { | |
| "epoch": 0.8212954131622301, | |
| "grad_norm": 1.9268221611589265, | |
| "learning_rate": 1e-05, | |
| "loss": 1.363, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 0.8219890748287523, | |
| "grad_norm": 1.6151381049081766, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3323, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 0.8226827364952745, | |
| "grad_norm": 1.6754745238527986, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2736, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 0.8233763981617965, | |
| "grad_norm": 1.6566597340456615, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3292, | |
| "step": 1187 | |
| }, | |
| { | |
| "epoch": 0.8240700598283187, | |
| "grad_norm": 1.7868911126447826, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3146, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 0.8247637214948409, | |
| "grad_norm": 1.7395269678979228, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3172, | |
| "step": 1189 | |
| }, | |
| { | |
| "epoch": 0.8254573831613631, | |
| "grad_norm": 1.846976443991522, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2845, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.8261510448278852, | |
| "grad_norm": 1.9263142545199925, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3218, | |
| "step": 1191 | |
| }, | |
| { | |
| "epoch": 0.8268447064944073, | |
| "grad_norm": 1.6753572265693735, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3347, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 0.8275383681609295, | |
| "grad_norm": 1.8402922863638769, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3389, | |
| "step": 1193 | |
| }, | |
| { | |
| "epoch": 0.8282320298274517, | |
| "grad_norm": 1.7003805951507205, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3016, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 0.8289256914939738, | |
| "grad_norm": 1.7016887764707231, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3312, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 0.829619353160496, | |
| "grad_norm": 1.7450143183218212, | |
| "learning_rate": 1e-05, | |
| "loss": 1.303, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 0.8303130148270181, | |
| "grad_norm": 1.7676264585702774, | |
| "learning_rate": 1e-05, | |
| "loss": 1.317, | |
| "step": 1197 | |
| }, | |
| { | |
| "epoch": 0.8310066764935403, | |
| "grad_norm": 2.0267151482602177, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2687, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 0.8317003381600624, | |
| "grad_norm": 1.6648828369964448, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3436, | |
| "step": 1199 | |
| }, | |
| { | |
| "epoch": 0.8323939998265846, | |
| "grad_norm": 1.7437721633894316, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2719, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8330876614931068, | |
| "grad_norm": 1.76080267027574, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3364, | |
| "step": 1201 | |
| }, | |
| { | |
| "epoch": 0.8337813231596289, | |
| "grad_norm": 1.6023127093441858, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3567, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 0.834474984826151, | |
| "grad_norm": 1.807984298617728, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3243, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 0.8351686464926732, | |
| "grad_norm": 1.7507073872441543, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2748, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 0.8358623081591954, | |
| "grad_norm": 1.7340226078649954, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3236, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 0.8365559698257176, | |
| "grad_norm": 1.8128370813992278, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3398, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 0.8372496314922396, | |
| "grad_norm": 1.8385313973464554, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3244, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 0.8379432931587618, | |
| "grad_norm": 1.8202487072236857, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2841, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 0.838636954825284, | |
| "grad_norm": 1.8226882804903315, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3251, | |
| "step": 1209 | |
| }, | |
| { | |
| "epoch": 0.8393306164918061, | |
| "grad_norm": 1.7796315284012894, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3503, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.8400242781583283, | |
| "grad_norm": 1.770507934678552, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2873, | |
| "step": 1211 | |
| }, | |
| { | |
| "epoch": 0.8407179398248504, | |
| "grad_norm": 1.7983739916772261, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3309, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 0.8414116014913726, | |
| "grad_norm": 1.8581265442393458, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2976, | |
| "step": 1213 | |
| }, | |
| { | |
| "epoch": 0.8421052631578947, | |
| "grad_norm": 1.9709414587672909, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3244, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 0.8427989248244169, | |
| "grad_norm": 1.934195079670126, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3197, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 0.8434925864909391, | |
| "grad_norm": 1.5905446332751805, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3144, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 0.8441862481574612, | |
| "grad_norm": 1.8012514681125382, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2984, | |
| "step": 1217 | |
| }, | |
| { | |
| "epoch": 0.8448799098239833, | |
| "grad_norm": 1.8322906369289444, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3365, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 0.8455735714905055, | |
| "grad_norm": 1.7098942463292028, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2994, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 0.8462672331570277, | |
| "grad_norm": 1.806262096500155, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3135, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.8469608948235499, | |
| "grad_norm": 1.8088243417346301, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2548, | |
| "step": 1221 | |
| }, | |
| { | |
| "epoch": 0.8476545564900719, | |
| "grad_norm": 1.7615794277621235, | |
| "learning_rate": 1e-05, | |
| "loss": 1.369, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 0.8483482181565941, | |
| "grad_norm": 1.7446487987023735, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3417, | |
| "step": 1223 | |
| }, | |
| { | |
| "epoch": 0.8490418798231163, | |
| "grad_norm": 1.9292547174186359, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2619, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 0.8497355414896385, | |
| "grad_norm": 1.7385855689942564, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3367, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.8504292031561606, | |
| "grad_norm": 1.7893857799911939, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3231, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 0.8511228648226827, | |
| "grad_norm": 1.7904188745955463, | |
| "learning_rate": 1e-05, | |
| "loss": 1.273, | |
| "step": 1227 | |
| }, | |
| { | |
| "epoch": 0.8518165264892049, | |
| "grad_norm": 1.7311471203311328, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3036, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 0.852510188155727, | |
| "grad_norm": 1.8405281776102504, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3266, | |
| "step": 1229 | |
| }, | |
| { | |
| "epoch": 0.8532038498222492, | |
| "grad_norm": 1.7910111933959387, | |
| "learning_rate": 1e-05, | |
| "loss": 1.332, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.8538975114887714, | |
| "grad_norm": 1.692055737327334, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2594, | |
| "step": 1231 | |
| }, | |
| { | |
| "epoch": 0.8545911731552935, | |
| "grad_norm": 1.715888470034707, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2974, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 0.8552848348218156, | |
| "grad_norm": 1.6920575665727629, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3081, | |
| "step": 1233 | |
| }, | |
| { | |
| "epoch": 0.8559784964883378, | |
| "grad_norm": 1.8820212499017275, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2579, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 0.85667215815486, | |
| "grad_norm": 1.7590796982626669, | |
| "learning_rate": 1e-05, | |
| "loss": 1.346, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 0.8573658198213822, | |
| "grad_norm": 1.7175897993182907, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2952, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 0.8580594814879042, | |
| "grad_norm": 1.8872407973916834, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2678, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 0.8587531431544264, | |
| "grad_norm": 1.820685981576064, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3161, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 0.8594468048209486, | |
| "grad_norm": 1.7106563905509156, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2805, | |
| "step": 1239 | |
| }, | |
| { | |
| "epoch": 0.8601404664874708, | |
| "grad_norm": 1.835121012618051, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3411, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.8608341281539929, | |
| "grad_norm": 1.759648204282831, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3169, | |
| "step": 1241 | |
| }, | |
| { | |
| "epoch": 0.861527789820515, | |
| "grad_norm": 1.8108439058590786, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3365, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 0.8622214514870372, | |
| "grad_norm": 1.8264838456429158, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3122, | |
| "step": 1243 | |
| }, | |
| { | |
| "epoch": 0.8629151131535594, | |
| "grad_norm": 1.8561243343760983, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3126, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 0.8636087748200815, | |
| "grad_norm": 1.7744496574339597, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3419, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 0.8643024364866037, | |
| "grad_norm": 1.7102771712690799, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3913, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 0.8649960981531258, | |
| "grad_norm": 1.637011563504676, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3294, | |
| "step": 1247 | |
| }, | |
| { | |
| "epoch": 0.865689759819648, | |
| "grad_norm": 1.7378852441605857, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3604, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 0.8663834214861701, | |
| "grad_norm": 1.892145137680064, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3839, | |
| "step": 1249 | |
| }, | |
| { | |
| "epoch": 0.8670770831526923, | |
| "grad_norm": 1.7114640198993971, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3164, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.8677707448192145, | |
| "grad_norm": 1.857472545380348, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3409, | |
| "step": 1251 | |
| }, | |
| { | |
| "epoch": 0.8684644064857365, | |
| "grad_norm": 1.6636749531242045, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3103, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 0.8691580681522587, | |
| "grad_norm": 1.853753890533686, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3372, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 0.8698517298187809, | |
| "grad_norm": 1.7274626500933439, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3198, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 0.8705453914853031, | |
| "grad_norm": 1.7681696772117461, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3451, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 0.8712390531518251, | |
| "grad_norm": 1.7074991789276823, | |
| "learning_rate": 1e-05, | |
| "loss": 1.306, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 0.8719327148183473, | |
| "grad_norm": 1.8022859354848255, | |
| "learning_rate": 1e-05, | |
| "loss": 1.307, | |
| "step": 1257 | |
| }, | |
| { | |
| "epoch": 0.8726263764848695, | |
| "grad_norm": 1.722826958334196, | |
| "learning_rate": 1e-05, | |
| "loss": 1.348, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 0.8733200381513917, | |
| "grad_norm": 1.6278820667115972, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3156, | |
| "step": 1259 | |
| }, | |
| { | |
| "epoch": 0.8740136998179138, | |
| "grad_norm": 1.8615627558866032, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3642, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.874707361484436, | |
| "grad_norm": 1.6977360433618884, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3661, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 0.8754010231509581, | |
| "grad_norm": 1.7119935578912018, | |
| "learning_rate": 1e-05, | |
| "loss": 1.343, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 0.8760946848174803, | |
| "grad_norm": 1.8828273441618772, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3286, | |
| "step": 1263 | |
| }, | |
| { | |
| "epoch": 0.8767883464840024, | |
| "grad_norm": 1.7991745556104566, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2877, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 0.8774820081505246, | |
| "grad_norm": 1.7692125953469466, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3132, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 0.8781756698170468, | |
| "grad_norm": 1.763539004235656, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2516, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 0.8788693314835689, | |
| "grad_norm": 1.8097604071898914, | |
| "learning_rate": 1e-05, | |
| "loss": 1.322, | |
| "step": 1267 | |
| }, | |
| { | |
| "epoch": 0.879562993150091, | |
| "grad_norm": 1.8504952339835505, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3257, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 0.8802566548166132, | |
| "grad_norm": 1.8228681067260823, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3207, | |
| "step": 1269 | |
| }, | |
| { | |
| "epoch": 0.8809503164831354, | |
| "grad_norm": 1.8569141776272553, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3415, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.8816439781496576, | |
| "grad_norm": 1.8653037577154865, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2882, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 0.8823376398161796, | |
| "grad_norm": 1.7842091027351248, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3011, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 0.8830313014827018, | |
| "grad_norm": 1.865231093038103, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3421, | |
| "step": 1273 | |
| }, | |
| { | |
| "epoch": 0.883724963149224, | |
| "grad_norm": 1.8445484018508556, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2994, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 0.8844186248157461, | |
| "grad_norm": 2.0112180595207585, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3491, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.8851122864822683, | |
| "grad_norm": 1.9313320335876165, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2972, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 0.8858059481487904, | |
| "grad_norm": 1.766314463801755, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3551, | |
| "step": 1277 | |
| }, | |
| { | |
| "epoch": 0.8864996098153126, | |
| "grad_norm": 1.6939200547787165, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3346, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 0.8871932714818347, | |
| "grad_norm": 1.6553779025228499, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3331, | |
| "step": 1279 | |
| }, | |
| { | |
| "epoch": 0.8878869331483569, | |
| "grad_norm": 1.672522216377223, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3143, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.888580594814879, | |
| "grad_norm": 1.6477930929287763, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2655, | |
| "step": 1281 | |
| }, | |
| { | |
| "epoch": 0.8892742564814012, | |
| "grad_norm": 1.9082634726577539, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3101, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 0.8899679181479233, | |
| "grad_norm": 1.6619545725705775, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3134, | |
| "step": 1283 | |
| }, | |
| { | |
| "epoch": 0.8906615798144455, | |
| "grad_norm": 1.8186223504046093, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3356, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 0.8913552414809677, | |
| "grad_norm": 1.6755642327768199, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3005, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 0.8920489031474899, | |
| "grad_norm": 1.7174416264813488, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3347, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 0.8927425648140119, | |
| "grad_norm": 1.7186710255881854, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2908, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 0.8934362264805341, | |
| "grad_norm": 1.6346319252027368, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2927, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 0.8941298881470563, | |
| "grad_norm": 1.8474495055475482, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3255, | |
| "step": 1289 | |
| }, | |
| { | |
| "epoch": 0.8948235498135785, | |
| "grad_norm": 1.8788390378390694, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3385, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.8955172114801006, | |
| "grad_norm": 1.629566356590922, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3482, | |
| "step": 1291 | |
| }, | |
| { | |
| "epoch": 0.8962108731466227, | |
| "grad_norm": 1.7412289424540435, | |
| "learning_rate": 1e-05, | |
| "loss": 1.267, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 0.8969045348131449, | |
| "grad_norm": 1.5904673607353297, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3107, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 0.897598196479667, | |
| "grad_norm": 1.7668320875825854, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2968, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 0.8982918581461892, | |
| "grad_norm": 1.6434271863603802, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3531, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 0.8989855198127114, | |
| "grad_norm": 1.6446049917777663, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3067, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 0.8996791814792335, | |
| "grad_norm": 1.7114850493139724, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2526, | |
| "step": 1297 | |
| }, | |
| { | |
| "epoch": 0.9003728431457556, | |
| "grad_norm": 1.7703812278053084, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3284, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 0.9010665048122778, | |
| "grad_norm": 1.783598858952647, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3449, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 0.9017601664788, | |
| "grad_norm": 1.7594643222023651, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2865, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9024538281453222, | |
| "grad_norm": 1.7979500686841217, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2826, | |
| "step": 1301 | |
| }, | |
| { | |
| "epoch": 0.9031474898118442, | |
| "grad_norm": 1.7054221381249888, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3525, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 0.9038411514783664, | |
| "grad_norm": 1.7423889164937596, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3044, | |
| "step": 1303 | |
| }, | |
| { | |
| "epoch": 0.9045348131448886, | |
| "grad_norm": 1.8080520927021586, | |
| "learning_rate": 1e-05, | |
| "loss": 1.331, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 0.9052284748114108, | |
| "grad_norm": 1.9903383637954946, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3213, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 0.9059221364779328, | |
| "grad_norm": 1.622554431537696, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3307, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 0.906615798144455, | |
| "grad_norm": 1.891392187663753, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3484, | |
| "step": 1307 | |
| }, | |
| { | |
| "epoch": 0.9073094598109772, | |
| "grad_norm": 1.5686215128270367, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3101, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 0.9080031214774994, | |
| "grad_norm": 1.7879215692958745, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3499, | |
| "step": 1309 | |
| }, | |
| { | |
| "epoch": 0.9086967831440215, | |
| "grad_norm": 1.7583743340224038, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2757, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.9093904448105437, | |
| "grad_norm": 1.7396925828139032, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2925, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 0.9100841064770658, | |
| "grad_norm": 1.7958710900831494, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3058, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 0.910777768143588, | |
| "grad_norm": 1.764507973805378, | |
| "learning_rate": 1e-05, | |
| "loss": 1.297, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 0.9114714298101101, | |
| "grad_norm": 1.8550793576750044, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3146, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 0.9121650914766323, | |
| "grad_norm": 1.6803054339754033, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2966, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 0.9128587531431545, | |
| "grad_norm": 1.885929787117982, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3201, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 0.9135524148096765, | |
| "grad_norm": 1.6131928750808537, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2899, | |
| "step": 1317 | |
| }, | |
| { | |
| "epoch": 0.9142460764761987, | |
| "grad_norm": 1.8283426626966652, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3149, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 0.9149397381427209, | |
| "grad_norm": 1.7895417190819627, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3135, | |
| "step": 1319 | |
| }, | |
| { | |
| "epoch": 0.9156333998092431, | |
| "grad_norm": 1.6655898144804122, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3506, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.9163270614757651, | |
| "grad_norm": 1.789395903422411, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2944, | |
| "step": 1321 | |
| }, | |
| { | |
| "epoch": 0.9170207231422873, | |
| "grad_norm": 1.5554671892032146, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3035, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 0.9177143848088095, | |
| "grad_norm": 1.7462183316659554, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2871, | |
| "step": 1323 | |
| }, | |
| { | |
| "epoch": 0.9184080464753317, | |
| "grad_norm": 1.6407955316695355, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2842, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 0.9191017081418538, | |
| "grad_norm": 1.8054786932821483, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2801, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.919795369808376, | |
| "grad_norm": 1.7774882584688303, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3111, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 0.9204890314748981, | |
| "grad_norm": 1.7487308168423374, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3086, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 0.9211826931414203, | |
| "grad_norm": 1.830348412513128, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2398, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 0.9218763548079424, | |
| "grad_norm": 1.8745249129153505, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3174, | |
| "step": 1329 | |
| }, | |
| { | |
| "epoch": 0.9225700164744646, | |
| "grad_norm": 1.6997745347291242, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2795, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.9232636781409868, | |
| "grad_norm": 1.6461004692498915, | |
| "learning_rate": 1e-05, | |
| "loss": 1.369, | |
| "step": 1331 | |
| }, | |
| { | |
| "epoch": 0.9239573398075089, | |
| "grad_norm": 1.867846237084916, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3492, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 0.924651001474031, | |
| "grad_norm": 1.7338725633875975, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3425, | |
| "step": 1333 | |
| }, | |
| { | |
| "epoch": 0.9253446631405532, | |
| "grad_norm": 1.7058365907688622, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3289, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 0.9260383248070754, | |
| "grad_norm": 1.6812689204725293, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3166, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 0.9267319864735976, | |
| "grad_norm": 1.7854594207936143, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3357, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 0.9274256481401196, | |
| "grad_norm": 1.8632837097505413, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2385, | |
| "step": 1337 | |
| }, | |
| { | |
| "epoch": 0.9281193098066418, | |
| "grad_norm": 1.838541328017032, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2703, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 0.928812971473164, | |
| "grad_norm": 1.8476109239446323, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2934, | |
| "step": 1339 | |
| }, | |
| { | |
| "epoch": 0.9295066331396861, | |
| "grad_norm": 1.735630365745877, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2978, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.9302002948062082, | |
| "grad_norm": 1.6182767450065856, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2999, | |
| "step": 1341 | |
| }, | |
| { | |
| "epoch": 0.9308939564727304, | |
| "grad_norm": 1.7314432313872, | |
| "learning_rate": 1e-05, | |
| "loss": 1.291, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 0.9315876181392526, | |
| "grad_norm": 1.9298812546673758, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3352, | |
| "step": 1343 | |
| }, | |
| { | |
| "epoch": 0.9322812798057747, | |
| "grad_norm": 1.890305436960625, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3603, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 0.9329749414722969, | |
| "grad_norm": 1.7012847869156689, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2705, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 0.933668603138819, | |
| "grad_norm": 1.7624548099246455, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3229, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 0.9343622648053412, | |
| "grad_norm": 1.8174643785961295, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2613, | |
| "step": 1347 | |
| }, | |
| { | |
| "epoch": 0.9350559264718633, | |
| "grad_norm": 1.766018762161775, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3907, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 0.9357495881383855, | |
| "grad_norm": 1.8138067691928326, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2878, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 0.9364432498049077, | |
| "grad_norm": 1.8378030129613956, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3192, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9371369114714299, | |
| "grad_norm": 2.012247327683597, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3096, | |
| "step": 1351 | |
| }, | |
| { | |
| "epoch": 0.9378305731379519, | |
| "grad_norm": 1.704402821997915, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3304, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 0.9385242348044741, | |
| "grad_norm": 1.7885693471410014, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3065, | |
| "step": 1353 | |
| }, | |
| { | |
| "epoch": 0.9392178964709963, | |
| "grad_norm": 1.6661434949017349, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3307, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 0.9399115581375185, | |
| "grad_norm": 1.7962358503874996, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3129, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 0.9406052198040405, | |
| "grad_norm": 1.718189556083174, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2994, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 0.9412988814705627, | |
| "grad_norm": 1.9092876605935403, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3859, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 0.9419925431370849, | |
| "grad_norm": 1.8331634920945954, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2668, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 0.942686204803607, | |
| "grad_norm": 1.7795600411027266, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3158, | |
| "step": 1359 | |
| }, | |
| { | |
| "epoch": 0.9433798664701292, | |
| "grad_norm": 1.7718647937619816, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2766, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.9440735281366514, | |
| "grad_norm": 1.7833385647199957, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3188, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 0.9447671898031735, | |
| "grad_norm": 1.7094537156535095, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2916, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 0.9454608514696956, | |
| "grad_norm": 1.7218575319930665, | |
| "learning_rate": 1e-05, | |
| "loss": 1.363, | |
| "step": 1363 | |
| }, | |
| { | |
| "epoch": 0.9461545131362178, | |
| "grad_norm": 1.8291690131158997, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3067, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 0.94684817480274, | |
| "grad_norm": 1.5981127580277035, | |
| "learning_rate": 1e-05, | |
| "loss": 1.316, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 0.9475418364692622, | |
| "grad_norm": 1.7097133890389782, | |
| "learning_rate": 1e-05, | |
| "loss": 1.31, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 0.9482354981357842, | |
| "grad_norm": 1.7177869042959357, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3261, | |
| "step": 1367 | |
| }, | |
| { | |
| "epoch": 0.9489291598023064, | |
| "grad_norm": 1.7391018887085676, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3476, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 0.9496228214688286, | |
| "grad_norm": 1.835174110490517, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3198, | |
| "step": 1369 | |
| }, | |
| { | |
| "epoch": 0.9503164831353508, | |
| "grad_norm": 1.8268669014129535, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3338, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.9510101448018728, | |
| "grad_norm": 1.6581115729537674, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2378, | |
| "step": 1371 | |
| }, | |
| { | |
| "epoch": 0.951703806468395, | |
| "grad_norm": 1.809305155351, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3432, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 0.9523974681349172, | |
| "grad_norm": 1.7869170912103947, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3173, | |
| "step": 1373 | |
| }, | |
| { | |
| "epoch": 0.9530911298014394, | |
| "grad_norm": 1.5946853342076313, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3202, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 0.9537847914679615, | |
| "grad_norm": 1.7449681923450435, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3368, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.9544784531344837, | |
| "grad_norm": 1.7867785161300096, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2523, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 0.9551721148010058, | |
| "grad_norm": 1.7493793708109926, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3143, | |
| "step": 1377 | |
| }, | |
| { | |
| "epoch": 0.955865776467528, | |
| "grad_norm": 1.6708212386781847, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3197, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 0.9565594381340501, | |
| "grad_norm": 1.7624923123146528, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2976, | |
| "step": 1379 | |
| }, | |
| { | |
| "epoch": 0.9572530998005723, | |
| "grad_norm": 1.8444740226606935, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3251, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.9579467614670945, | |
| "grad_norm": 1.5953334218837774, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2664, | |
| "step": 1381 | |
| }, | |
| { | |
| "epoch": 0.9586404231336165, | |
| "grad_norm": 1.9455556499425986, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2883, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 0.9593340848001387, | |
| "grad_norm": 1.7447825684583413, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3101, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 0.9600277464666609, | |
| "grad_norm": 1.7815515049059591, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2931, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 0.9607214081331831, | |
| "grad_norm": 1.8473821407663016, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2916, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 0.9614150697997051, | |
| "grad_norm": 1.8287625516543624, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3302, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 0.9621087314662273, | |
| "grad_norm": 1.9131178436318876, | |
| "learning_rate": 1e-05, | |
| "loss": 1.292, | |
| "step": 1387 | |
| }, | |
| { | |
| "epoch": 0.9628023931327495, | |
| "grad_norm": 1.6731211620160438, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2662, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 0.9634960547992717, | |
| "grad_norm": 1.8100000986534872, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3462, | |
| "step": 1389 | |
| }, | |
| { | |
| "epoch": 0.9641897164657938, | |
| "grad_norm": 1.915036526264764, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2819, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.964883378132316, | |
| "grad_norm": 1.9069098292199362, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3002, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 0.9655770397988381, | |
| "grad_norm": 1.7930073332304715, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2816, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 0.9662707014653603, | |
| "grad_norm": 1.8530639734903125, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3521, | |
| "step": 1393 | |
| }, | |
| { | |
| "epoch": 0.9669643631318824, | |
| "grad_norm": 1.7315815210793186, | |
| "learning_rate": 1e-05, | |
| "loss": 1.299, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 0.9676580247984046, | |
| "grad_norm": 1.8034499383080111, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2712, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 0.9683516864649268, | |
| "grad_norm": 1.8019607332463998, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3431, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 0.9690453481314489, | |
| "grad_norm": 1.7026349253270283, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3306, | |
| "step": 1397 | |
| }, | |
| { | |
| "epoch": 0.969739009797971, | |
| "grad_norm": 1.8029112479233105, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2791, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 0.9704326714644932, | |
| "grad_norm": 1.6640138276094931, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3551, | |
| "step": 1399 | |
| }, | |
| { | |
| "epoch": 0.9711263331310154, | |
| "grad_norm": 1.8773693641133087, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3214, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.9718199947975376, | |
| "grad_norm": 1.6189625859175014, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3045, | |
| "step": 1401 | |
| }, | |
| { | |
| "epoch": 0.9725136564640596, | |
| "grad_norm": 1.8655984733394448, | |
| "learning_rate": 1e-05, | |
| "loss": 1.295, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 0.9732073181305818, | |
| "grad_norm": 1.6495459791796045, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3209, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 0.973900979797104, | |
| "grad_norm": 1.7262446380853802, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3179, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 0.9745946414636261, | |
| "grad_norm": 1.8782015233926619, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2991, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 0.9752883031301482, | |
| "grad_norm": 1.6294972087965263, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3022, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 0.9759819647966704, | |
| "grad_norm": 1.79967261492696, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3019, | |
| "step": 1407 | |
| }, | |
| { | |
| "epoch": 0.9766756264631926, | |
| "grad_norm": 1.8301257668132722, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2952, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 0.9773692881297147, | |
| "grad_norm": 1.7858886287008595, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3455, | |
| "step": 1409 | |
| }, | |
| { | |
| "epoch": 0.9780629497962369, | |
| "grad_norm": 1.645627337737642, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2788, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.978756611462759, | |
| "grad_norm": 1.8493218045117754, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2946, | |
| "step": 1411 | |
| }, | |
| { | |
| "epoch": 0.9794502731292812, | |
| "grad_norm": 1.9503637295771707, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2623, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 0.9801439347958033, | |
| "grad_norm": 1.854587002167059, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2966, | |
| "step": 1413 | |
| }, | |
| { | |
| "epoch": 0.9808375964623255, | |
| "grad_norm": 1.7795692734633988, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3538, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 0.9815312581288477, | |
| "grad_norm": 1.8026744879009458, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3534, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 0.9822249197953699, | |
| "grad_norm": 1.7902484770921805, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3155, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 0.9829185814618919, | |
| "grad_norm": 1.7212800748245163, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2941, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 0.9836122431284141, | |
| "grad_norm": 1.801323328800465, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3823, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 0.9843059047949363, | |
| "grad_norm": 1.7021881473197502, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3371, | |
| "step": 1419 | |
| }, | |
| { | |
| "epoch": 0.9849995664614585, | |
| "grad_norm": 1.8046118812459044, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3433, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.9856932281279805, | |
| "grad_norm": 1.6685271100241381, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2897, | |
| "step": 1421 | |
| }, | |
| { | |
| "epoch": 0.9863868897945027, | |
| "grad_norm": 1.6454889789915157, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3029, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 0.9870805514610249, | |
| "grad_norm": 1.8167751934930396, | |
| "learning_rate": 1e-05, | |
| "loss": 1.278, | |
| "step": 1423 | |
| }, | |
| { | |
| "epoch": 0.987774213127547, | |
| "grad_norm": 1.7280323726187787, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3137, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 0.9884678747940692, | |
| "grad_norm": 1.670222942611059, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3248, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.9891615364605914, | |
| "grad_norm": 1.799342131947682, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3215, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 0.9898551981271135, | |
| "grad_norm": 1.8123264473720575, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3548, | |
| "step": 1427 | |
| }, | |
| { | |
| "epoch": 0.9905488597936356, | |
| "grad_norm": 1.7747475128663022, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2765, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 0.9912425214601578, | |
| "grad_norm": 1.8041680654218195, | |
| "learning_rate": 1e-05, | |
| "loss": 1.31, | |
| "step": 1429 | |
| }, | |
| { | |
| "epoch": 0.99193618312668, | |
| "grad_norm": 1.7105917151107914, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2958, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.9926298447932022, | |
| "grad_norm": 1.6626340057182631, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3484, | |
| "step": 1431 | |
| }, | |
| { | |
| "epoch": 0.9933235064597242, | |
| "grad_norm": 1.6610746706958375, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3256, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 0.9940171681262464, | |
| "grad_norm": 1.7615692816320323, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3303, | |
| "step": 1433 | |
| }, | |
| { | |
| "epoch": 0.9947108297927686, | |
| "grad_norm": 1.7533916584851055, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2955, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 0.9954044914592908, | |
| "grad_norm": 1.7232538432339657, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3177, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 0.9960981531258128, | |
| "grad_norm": 1.7441612385283174, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3013, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 0.996791814792335, | |
| "grad_norm": 1.8597160448645227, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2923, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 0.9974854764588572, | |
| "grad_norm": 1.742932722373845, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3469, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 0.9981791381253794, | |
| "grad_norm": 1.8961285251801105, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3101, | |
| "step": 1439 | |
| }, | |
| { | |
| "epoch": 0.9988727997919015, | |
| "grad_norm": 1.6967076327288442, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3157, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.9995664614584236, | |
| "grad_norm": 1.669534255209671, | |
| "learning_rate": 1e-05, | |
| "loss": 1.3203, | |
| "step": 1441 | |
| }, | |
| { | |
| "epoch": 0.9995664614584236, | |
| "step": 1441, | |
| "total_flos": 2332396250726400.0, | |
| "train_loss": 1.4015557253881927, | |
| "train_runtime": 204310.3582, | |
| "train_samples_per_second": 0.903, | |
| "train_steps_per_second": 0.007 | |
| } | |
| ], | |
| "logging_steps": 1.0, | |
| "max_steps": 1441, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2332396250726400.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |