| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 6.0, | |
| "eval_steps": 500, | |
| "global_step": 2484, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.004830917874396135, | |
| "grad_norm": NaN, | |
| "learning_rate": 0.0002, | |
| "loss": 77.3398, | |
| "step": 2 | |
| }, | |
| { | |
| "epoch": 0.00966183574879227, | |
| "grad_norm": 46.88166809082031, | |
| "learning_rate": 0.0001998389694041868, | |
| "loss": 74.1949, | |
| "step": 4 | |
| }, | |
| { | |
| "epoch": 0.014492753623188406, | |
| "grad_norm": 57.10332489013672, | |
| "learning_rate": 0.0001996779388083736, | |
| "loss": 64.1131, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 0.01932367149758454, | |
| "grad_norm": 32.98695373535156, | |
| "learning_rate": 0.0001995169082125604, | |
| "loss": 50.8557, | |
| "step": 8 | |
| }, | |
| { | |
| "epoch": 0.024154589371980676, | |
| "grad_norm": 42.64508056640625, | |
| "learning_rate": 0.0001993558776167472, | |
| "loss": 46.3401, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.028985507246376812, | |
| "grad_norm": 23.093393325805664, | |
| "learning_rate": 0.00019919484702093397, | |
| "loss": 42.264, | |
| "step": 12 | |
| }, | |
| { | |
| "epoch": 0.033816425120772944, | |
| "grad_norm": 11.703313827514648, | |
| "learning_rate": 0.00019903381642512078, | |
| "loss": 40.6424, | |
| "step": 14 | |
| }, | |
| { | |
| "epoch": 0.03864734299516908, | |
| "grad_norm": 12.010417938232422, | |
| "learning_rate": 0.00019887278582930758, | |
| "loss": 41.0451, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 0.043478260869565216, | |
| "grad_norm": 8.227810859680176, | |
| "learning_rate": 0.00019871175523349436, | |
| "loss": 40.8654, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 0.04830917874396135, | |
| "grad_norm": 8.531281471252441, | |
| "learning_rate": 0.00019855072463768116, | |
| "loss": 39.9403, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.05314009661835749, | |
| "grad_norm": 13.759291648864746, | |
| "learning_rate": 0.00019838969404186796, | |
| "loss": 40.4885, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 0.057971014492753624, | |
| "grad_norm": 12.24333381652832, | |
| "learning_rate": 0.00019822866344605474, | |
| "loss": 38.9464, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 0.06280193236714976, | |
| "grad_norm": 9.252888679504395, | |
| "learning_rate": 0.00019806763285024154, | |
| "loss": 37.4614, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 0.06763285024154589, | |
| "grad_norm": 12.859115600585938, | |
| "learning_rate": 0.00019790660225442835, | |
| "loss": 40.04, | |
| "step": 28 | |
| }, | |
| { | |
| "epoch": 0.07246376811594203, | |
| "grad_norm": 8.288698196411133, | |
| "learning_rate": 0.00019774557165861512, | |
| "loss": 39.2962, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.07729468599033816, | |
| "grad_norm": 11.058815956115723, | |
| "learning_rate": 0.00019758454106280193, | |
| "loss": 38.5443, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 0.0821256038647343, | |
| "grad_norm": 16.540409088134766, | |
| "learning_rate": 0.00019742351046698876, | |
| "loss": 40.7756, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 0.08695652173913043, | |
| "grad_norm": 7.264046669006348, | |
| "learning_rate": 0.00019726247987117553, | |
| "loss": 40.0833, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 0.09178743961352658, | |
| "grad_norm": 7.761327743530273, | |
| "learning_rate": 0.00019710144927536234, | |
| "loss": 36.8212, | |
| "step": 38 | |
| }, | |
| { | |
| "epoch": 0.0966183574879227, | |
| "grad_norm": 12.5891695022583, | |
| "learning_rate": 0.00019694041867954914, | |
| "loss": 37.6787, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.10144927536231885, | |
| "grad_norm": 8.316587448120117, | |
| "learning_rate": 0.00019677938808373592, | |
| "loss": 38.399, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 0.10628019323671498, | |
| "grad_norm": 6.445375442504883, | |
| "learning_rate": 0.00019661835748792272, | |
| "loss": 37.4109, | |
| "step": 44 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 10.068939208984375, | |
| "learning_rate": 0.00019645732689210952, | |
| "loss": 36.8106, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 0.11594202898550725, | |
| "grad_norm": 10.530860900878906, | |
| "learning_rate": 0.0001962962962962963, | |
| "loss": 37.1642, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 0.12077294685990338, | |
| "grad_norm": 7.902243614196777, | |
| "learning_rate": 0.0001961352657004831, | |
| "loss": 39.0534, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.12560386473429952, | |
| "grad_norm": 7.551494598388672, | |
| "learning_rate": 0.0001959742351046699, | |
| "loss": 39.5417, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 0.13043478260869565, | |
| "grad_norm": 10.609959602355957, | |
| "learning_rate": 0.00019581320450885668, | |
| "loss": 38.9163, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 0.13526570048309178, | |
| "grad_norm": 11.915197372436523, | |
| "learning_rate": 0.0001956521739130435, | |
| "loss": 37.7346, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 0.14009661835748793, | |
| "grad_norm": 9.105422973632812, | |
| "learning_rate": 0.0001954911433172303, | |
| "loss": 37.7469, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.14492753623188406, | |
| "grad_norm": 10.62623119354248, | |
| "learning_rate": 0.00019533011272141707, | |
| "loss": 37.0263, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1497584541062802, | |
| "grad_norm": 9.892080307006836, | |
| "learning_rate": 0.00019516908212560387, | |
| "loss": 39.5135, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 0.15458937198067632, | |
| "grad_norm": 12.9131441116333, | |
| "learning_rate": 0.00019500805152979068, | |
| "loss": 35.7463, | |
| "step": 64 | |
| }, | |
| { | |
| "epoch": 0.15942028985507245, | |
| "grad_norm": 9.631657600402832, | |
| "learning_rate": 0.00019484702093397745, | |
| "loss": 36.228, | |
| "step": 66 | |
| }, | |
| { | |
| "epoch": 0.1642512077294686, | |
| "grad_norm": 9.47088623046875, | |
| "learning_rate": 0.00019468599033816426, | |
| "loss": 38.1165, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 0.16908212560386474, | |
| "grad_norm": 10.864086151123047, | |
| "learning_rate": 0.00019452495974235106, | |
| "loss": 37.2582, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.17391304347826086, | |
| "grad_norm": 11.572696685791016, | |
| "learning_rate": 0.00019436392914653784, | |
| "loss": 36.3281, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 0.178743961352657, | |
| "grad_norm": 8.3622407913208, | |
| "learning_rate": 0.00019420289855072464, | |
| "loss": 37.2185, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 0.18357487922705315, | |
| "grad_norm": 10.799039840698242, | |
| "learning_rate": 0.00019404186795491144, | |
| "loss": 37.1632, | |
| "step": 76 | |
| }, | |
| { | |
| "epoch": 0.18840579710144928, | |
| "grad_norm": 10.033112525939941, | |
| "learning_rate": 0.00019388083735909825, | |
| "loss": 36.5457, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 0.1932367149758454, | |
| "grad_norm": 14.008647918701172, | |
| "learning_rate": 0.00019371980676328502, | |
| "loss": 37.3024, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.19806763285024154, | |
| "grad_norm": 16.30805778503418, | |
| "learning_rate": 0.00019355877616747183, | |
| "loss": 33.9588, | |
| "step": 82 | |
| }, | |
| { | |
| "epoch": 0.2028985507246377, | |
| "grad_norm": 10.949873924255371, | |
| "learning_rate": 0.00019339774557165863, | |
| "loss": 34.5954, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 0.20772946859903382, | |
| "grad_norm": 13.2377290725708, | |
| "learning_rate": 0.0001932367149758454, | |
| "loss": 35.3164, | |
| "step": 86 | |
| }, | |
| { | |
| "epoch": 0.21256038647342995, | |
| "grad_norm": 16.080217361450195, | |
| "learning_rate": 0.0001930756843800322, | |
| "loss": 36.9439, | |
| "step": 88 | |
| }, | |
| { | |
| "epoch": 0.21739130434782608, | |
| "grad_norm": 12.830262184143066, | |
| "learning_rate": 0.000192914653784219, | |
| "loss": 35.9063, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 12.519986152648926, | |
| "learning_rate": 0.0001927536231884058, | |
| "loss": 36.0092, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 0.22705314009661837, | |
| "grad_norm": 11.222923278808594, | |
| "learning_rate": 0.0001925925925925926, | |
| "loss": 35.2874, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 0.2318840579710145, | |
| "grad_norm": 13.27009105682373, | |
| "learning_rate": 0.0001924315619967794, | |
| "loss": 33.1713, | |
| "step": 96 | |
| }, | |
| { | |
| "epoch": 0.23671497584541062, | |
| "grad_norm": 10.449563026428223, | |
| "learning_rate": 0.0001922705314009662, | |
| "loss": 34.836, | |
| "step": 98 | |
| }, | |
| { | |
| "epoch": 0.24154589371980675, | |
| "grad_norm": 17.162439346313477, | |
| "learning_rate": 0.000192109500805153, | |
| "loss": 33.4152, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2463768115942029, | |
| "grad_norm": 11.21731185913086, | |
| "learning_rate": 0.00019194847020933978, | |
| "loss": 33.7839, | |
| "step": 102 | |
| }, | |
| { | |
| "epoch": 0.25120772946859904, | |
| "grad_norm": 12.32532024383545, | |
| "learning_rate": 0.00019178743961352658, | |
| "loss": 34.1485, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 0.2560386473429952, | |
| "grad_norm": 10.481746673583984, | |
| "learning_rate": 0.0001916264090177134, | |
| "loss": 32.3468, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 0.2608695652173913, | |
| "grad_norm": 10.69057846069336, | |
| "learning_rate": 0.00019146537842190016, | |
| "loss": 33.281, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 0.26570048309178745, | |
| "grad_norm": 14.237508773803711, | |
| "learning_rate": 0.00019130434782608697, | |
| "loss": 33.0036, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.27053140096618356, | |
| "grad_norm": 8.754230499267578, | |
| "learning_rate": 0.00019114331723027377, | |
| "loss": 34.4538, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 0.2753623188405797, | |
| "grad_norm": 8.595375061035156, | |
| "learning_rate": 0.00019098228663446057, | |
| "loss": 32.7405, | |
| "step": 114 | |
| }, | |
| { | |
| "epoch": 0.28019323671497587, | |
| "grad_norm": 10.565451622009277, | |
| "learning_rate": 0.00019082125603864735, | |
| "loss": 32.7687, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.28502415458937197, | |
| "grad_norm": 9.513022422790527, | |
| "learning_rate": 0.00019066022544283415, | |
| "loss": 32.8597, | |
| "step": 118 | |
| }, | |
| { | |
| "epoch": 0.2898550724637681, | |
| "grad_norm": 12.073749542236328, | |
| "learning_rate": 0.00019049919484702096, | |
| "loss": 32.9837, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2946859903381642, | |
| "grad_norm": 9.835869789123535, | |
| "learning_rate": 0.00019033816425120773, | |
| "loss": 33.7884, | |
| "step": 122 | |
| }, | |
| { | |
| "epoch": 0.2995169082125604, | |
| "grad_norm": 9.995708465576172, | |
| "learning_rate": 0.00019017713365539454, | |
| "loss": 33.7684, | |
| "step": 124 | |
| }, | |
| { | |
| "epoch": 0.30434782608695654, | |
| "grad_norm": 9.067010879516602, | |
| "learning_rate": 0.00019001610305958134, | |
| "loss": 30.8072, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 0.30917874396135264, | |
| "grad_norm": 9.235272407531738, | |
| "learning_rate": 0.00018985507246376812, | |
| "loss": 32.0888, | |
| "step": 128 | |
| }, | |
| { | |
| "epoch": 0.3140096618357488, | |
| "grad_norm": 9.046205520629883, | |
| "learning_rate": 0.00018969404186795492, | |
| "loss": 33.0407, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.3188405797101449, | |
| "grad_norm": 9.927671432495117, | |
| "learning_rate": 0.00018953301127214172, | |
| "loss": 32.0351, | |
| "step": 132 | |
| }, | |
| { | |
| "epoch": 0.32367149758454106, | |
| "grad_norm": 10.035076141357422, | |
| "learning_rate": 0.0001893719806763285, | |
| "loss": 32.5972, | |
| "step": 134 | |
| }, | |
| { | |
| "epoch": 0.3285024154589372, | |
| "grad_norm": 10.489717483520508, | |
| "learning_rate": 0.0001892109500805153, | |
| "loss": 31.3804, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 13.48115348815918, | |
| "learning_rate": 0.0001890499194847021, | |
| "loss": 32.5356, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 0.33816425120772947, | |
| "grad_norm": 8.694147109985352, | |
| "learning_rate": 0.00018888888888888888, | |
| "loss": 32.9306, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.34299516908212563, | |
| "grad_norm": 8.273658752441406, | |
| "learning_rate": 0.0001887278582930757, | |
| "loss": 32.2116, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 0.34782608695652173, | |
| "grad_norm": 10.635282516479492, | |
| "learning_rate": 0.0001885668276972625, | |
| "loss": 30.2346, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 0.3526570048309179, | |
| "grad_norm": 9.83012866973877, | |
| "learning_rate": 0.00018840579710144927, | |
| "loss": 32.6259, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 0.357487922705314, | |
| "grad_norm": 12.415063858032227, | |
| "learning_rate": 0.00018824476650563607, | |
| "loss": 32.0993, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 0.36231884057971014, | |
| "grad_norm": 11.103983879089355, | |
| "learning_rate": 0.00018808373590982287, | |
| "loss": 31.7034, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3671497584541063, | |
| "grad_norm": 15.64197826385498, | |
| "learning_rate": 0.00018792270531400965, | |
| "loss": 31.6066, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 0.3719806763285024, | |
| "grad_norm": 10.493355751037598, | |
| "learning_rate": 0.00018776167471819645, | |
| "loss": 29.1212, | |
| "step": 154 | |
| }, | |
| { | |
| "epoch": 0.37681159420289856, | |
| "grad_norm": 9.921483993530273, | |
| "learning_rate": 0.00018760064412238326, | |
| "loss": 31.0883, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 0.38164251207729466, | |
| "grad_norm": 9.639843940734863, | |
| "learning_rate": 0.00018743961352657006, | |
| "loss": 29.4677, | |
| "step": 158 | |
| }, | |
| { | |
| "epoch": 0.3864734299516908, | |
| "grad_norm": 13.891378402709961, | |
| "learning_rate": 0.00018727858293075687, | |
| "loss": 31.5877, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.391304347826087, | |
| "grad_norm": 10.116133689880371, | |
| "learning_rate": 0.00018711755233494367, | |
| "loss": 29.9048, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 0.3961352657004831, | |
| "grad_norm": 7.1683173179626465, | |
| "learning_rate": 0.00018695652173913045, | |
| "loss": 30.6905, | |
| "step": 164 | |
| }, | |
| { | |
| "epoch": 0.40096618357487923, | |
| "grad_norm": 11.81785774230957, | |
| "learning_rate": 0.00018679549114331725, | |
| "loss": 32.2774, | |
| "step": 166 | |
| }, | |
| { | |
| "epoch": 0.4057971014492754, | |
| "grad_norm": 7.2079925537109375, | |
| "learning_rate": 0.00018663446054750405, | |
| "loss": 31.3603, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 0.4106280193236715, | |
| "grad_norm": 10.22714900970459, | |
| "learning_rate": 0.00018647342995169083, | |
| "loss": 29.7549, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.41545893719806765, | |
| "grad_norm": 8.35627269744873, | |
| "learning_rate": 0.00018631239935587763, | |
| "loss": 31.7175, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 0.42028985507246375, | |
| "grad_norm": 8.98567008972168, | |
| "learning_rate": 0.00018615136876006444, | |
| "loss": 31.4399, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.4251207729468599, | |
| "grad_norm": 10.814435958862305, | |
| "learning_rate": 0.0001859903381642512, | |
| "loss": 30.4269, | |
| "step": 176 | |
| }, | |
| { | |
| "epoch": 0.42995169082125606, | |
| "grad_norm": 9.445025444030762, | |
| "learning_rate": 0.00018582930756843802, | |
| "loss": 29.7986, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 0.43478260869565216, | |
| "grad_norm": 11.667193412780762, | |
| "learning_rate": 0.00018566827697262482, | |
| "loss": 28.7911, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4396135265700483, | |
| "grad_norm": 8.154279708862305, | |
| "learning_rate": 0.0001855072463768116, | |
| "loss": 32.8533, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 9.40849781036377, | |
| "learning_rate": 0.0001853462157809984, | |
| "loss": 30.3062, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 0.4492753623188406, | |
| "grad_norm": 9.476842880249023, | |
| "learning_rate": 0.0001851851851851852, | |
| "loss": 29.9025, | |
| "step": 186 | |
| }, | |
| { | |
| "epoch": 0.45410628019323673, | |
| "grad_norm": 9.150154113769531, | |
| "learning_rate": 0.00018502415458937198, | |
| "loss": 31.5757, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 0.45893719806763283, | |
| "grad_norm": 8.072809219360352, | |
| "learning_rate": 0.00018486312399355878, | |
| "loss": 32.186, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.463768115942029, | |
| "grad_norm": 11.970826148986816, | |
| "learning_rate": 0.0001847020933977456, | |
| "loss": 29.3784, | |
| "step": 192 | |
| }, | |
| { | |
| "epoch": 0.46859903381642515, | |
| "grad_norm": 11.011039733886719, | |
| "learning_rate": 0.00018454106280193236, | |
| "loss": 28.1951, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 0.47342995169082125, | |
| "grad_norm": 10.958206176757812, | |
| "learning_rate": 0.00018438003220611917, | |
| "loss": 31.0484, | |
| "step": 196 | |
| }, | |
| { | |
| "epoch": 0.4782608695652174, | |
| "grad_norm": 9.812915802001953, | |
| "learning_rate": 0.00018421900161030597, | |
| "loss": 31.784, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 0.4830917874396135, | |
| "grad_norm": 11.235363960266113, | |
| "learning_rate": 0.00018405797101449275, | |
| "loss": 31.3269, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.48792270531400966, | |
| "grad_norm": 14.048873901367188, | |
| "learning_rate": 0.00018389694041867955, | |
| "loss": 31.0424, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 0.4927536231884058, | |
| "grad_norm": 9.81869125366211, | |
| "learning_rate": 0.00018373590982286635, | |
| "loss": 32.6147, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 0.4975845410628019, | |
| "grad_norm": 8.801289558410645, | |
| "learning_rate": 0.00018357487922705313, | |
| "loss": 30.4718, | |
| "step": 206 | |
| }, | |
| { | |
| "epoch": 0.5024154589371981, | |
| "grad_norm": 7.1190385818481445, | |
| "learning_rate": 0.00018341384863123993, | |
| "loss": 29.2369, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 0.5072463768115942, | |
| "grad_norm": 8.437512397766113, | |
| "learning_rate": 0.00018325281803542674, | |
| "loss": 30.7868, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5120772946859904, | |
| "grad_norm": 6.539140224456787, | |
| "learning_rate": 0.0001830917874396135, | |
| "loss": 29.9767, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 0.5169082125603864, | |
| "grad_norm": 9.160558700561523, | |
| "learning_rate": 0.00018293075684380032, | |
| "loss": 29.9602, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 0.5217391304347826, | |
| "grad_norm": 7.8765177726745605, | |
| "learning_rate": 0.00018276972624798712, | |
| "loss": 30.8873, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 0.5265700483091788, | |
| "grad_norm": 8.778061866760254, | |
| "learning_rate": 0.00018260869565217392, | |
| "loss": 30.1888, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 0.5314009661835749, | |
| "grad_norm": 8.268914222717285, | |
| "learning_rate": 0.00018244766505636073, | |
| "loss": 31.7208, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5362318840579711, | |
| "grad_norm": 8.659038543701172, | |
| "learning_rate": 0.00018228663446054753, | |
| "loss": 31.3643, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 0.5410628019323671, | |
| "grad_norm": 9.013368606567383, | |
| "learning_rate": 0.0001821256038647343, | |
| "loss": 30.1504, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 0.5458937198067633, | |
| "grad_norm": 9.309354782104492, | |
| "learning_rate": 0.0001819645732689211, | |
| "loss": 30.2107, | |
| "step": 226 | |
| }, | |
| { | |
| "epoch": 0.5507246376811594, | |
| "grad_norm": 7.953092098236084, | |
| "learning_rate": 0.00018180354267310791, | |
| "loss": 29.3963, | |
| "step": 228 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 9.035888671875, | |
| "learning_rate": 0.0001816425120772947, | |
| "loss": 31.3732, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5603864734299517, | |
| "grad_norm": 10.098958969116211, | |
| "learning_rate": 0.0001814814814814815, | |
| "loss": 30.0866, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.5652173913043478, | |
| "grad_norm": 9.308027267456055, | |
| "learning_rate": 0.0001813204508856683, | |
| "loss": 31.168, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 0.5700483091787439, | |
| "grad_norm": 10.684345245361328, | |
| "learning_rate": 0.00018115942028985507, | |
| "loss": 30.3999, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 0.5748792270531401, | |
| "grad_norm": 8.09032917022705, | |
| "learning_rate": 0.00018099838969404188, | |
| "loss": 31.6167, | |
| "step": 238 | |
| }, | |
| { | |
| "epoch": 0.5797101449275363, | |
| "grad_norm": 7.366332530975342, | |
| "learning_rate": 0.00018083735909822868, | |
| "loss": 30.2593, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5845410628019324, | |
| "grad_norm": 7.711369514465332, | |
| "learning_rate": 0.00018067632850241546, | |
| "loss": 28.9501, | |
| "step": 242 | |
| }, | |
| { | |
| "epoch": 0.5893719806763285, | |
| "grad_norm": 7.934360504150391, | |
| "learning_rate": 0.00018051529790660226, | |
| "loss": 28.5365, | |
| "step": 244 | |
| }, | |
| { | |
| "epoch": 0.5942028985507246, | |
| "grad_norm": 8.121601104736328, | |
| "learning_rate": 0.00018035426731078907, | |
| "loss": 29.2618, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 0.5990338164251208, | |
| "grad_norm": 7.918673038482666, | |
| "learning_rate": 0.00018019323671497584, | |
| "loss": 30.0373, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 0.6038647342995169, | |
| "grad_norm": 11.193553924560547, | |
| "learning_rate": 0.00018003220611916265, | |
| "loss": 31.3798, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.6086956521739131, | |
| "grad_norm": 9.393643379211426, | |
| "learning_rate": 0.00017987117552334945, | |
| "loss": 31.2223, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 0.6135265700483091, | |
| "grad_norm": 8.13814926147461, | |
| "learning_rate": 0.00017971014492753625, | |
| "loss": 30.5097, | |
| "step": 254 | |
| }, | |
| { | |
| "epoch": 0.6183574879227053, | |
| "grad_norm": 8.290206909179688, | |
| "learning_rate": 0.00017954911433172303, | |
| "loss": 27.856, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 0.6231884057971014, | |
| "grad_norm": 9.917459487915039, | |
| "learning_rate": 0.00017938808373590983, | |
| "loss": 28.664, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 0.6280193236714976, | |
| "grad_norm": 10.206878662109375, | |
| "learning_rate": 0.00017922705314009664, | |
| "loss": 31.3406, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6328502415458938, | |
| "grad_norm": 9.776812553405762, | |
| "learning_rate": 0.0001790660225442834, | |
| "loss": 30.5843, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 0.6376811594202898, | |
| "grad_norm": 10.508336067199707, | |
| "learning_rate": 0.00017890499194847022, | |
| "loss": 30.3617, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 0.642512077294686, | |
| "grad_norm": 9.144083976745605, | |
| "learning_rate": 0.00017874396135265702, | |
| "loss": 30.0566, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 0.6473429951690821, | |
| "grad_norm": 9.019740104675293, | |
| "learning_rate": 0.0001785829307568438, | |
| "loss": 30.1384, | |
| "step": 268 | |
| }, | |
| { | |
| "epoch": 0.6521739130434783, | |
| "grad_norm": 9.140926361083984, | |
| "learning_rate": 0.0001784219001610306, | |
| "loss": 28.6601, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6570048309178744, | |
| "grad_norm": 9.820598602294922, | |
| "learning_rate": 0.0001782608695652174, | |
| "loss": 30.1565, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 0.6618357487922706, | |
| "grad_norm": 9.670087814331055, | |
| "learning_rate": 0.00017809983896940418, | |
| "loss": 30.156, | |
| "step": 274 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 8.119627952575684, | |
| "learning_rate": 0.00017793880837359098, | |
| "loss": 29.5374, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 0.6714975845410628, | |
| "grad_norm": 8.52702522277832, | |
| "learning_rate": 0.00017777777777777779, | |
| "loss": 30.2013, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 0.6763285024154589, | |
| "grad_norm": 8.241043090820312, | |
| "learning_rate": 0.00017761674718196456, | |
| "loss": 29.2284, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6811594202898551, | |
| "grad_norm": 9.305002212524414, | |
| "learning_rate": 0.0001774557165861514, | |
| "loss": 30.8417, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 0.6859903381642513, | |
| "grad_norm": 8.483264923095703, | |
| "learning_rate": 0.00017729468599033817, | |
| "loss": 28.3121, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 0.6908212560386473, | |
| "grad_norm": 8.674230575561523, | |
| "learning_rate": 0.00017713365539452497, | |
| "loss": 30.4354, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 0.6956521739130435, | |
| "grad_norm": 8.816984176635742, | |
| "learning_rate": 0.00017697262479871178, | |
| "loss": 29.0581, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 0.7004830917874396, | |
| "grad_norm": 8.081759452819824, | |
| "learning_rate": 0.00017681159420289858, | |
| "loss": 31.0066, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.7053140096618358, | |
| "grad_norm": 10.987712860107422, | |
| "learning_rate": 0.00017665056360708536, | |
| "loss": 28.4036, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 0.7101449275362319, | |
| "grad_norm": 9.358428955078125, | |
| "learning_rate": 0.00017648953301127216, | |
| "loss": 31.733, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 0.714975845410628, | |
| "grad_norm": 9.714231491088867, | |
| "learning_rate": 0.00017632850241545896, | |
| "loss": 28.3295, | |
| "step": 296 | |
| }, | |
| { | |
| "epoch": 0.7198067632850241, | |
| "grad_norm": 10.079188346862793, | |
| "learning_rate": 0.00017616747181964574, | |
| "loss": 29.7857, | |
| "step": 298 | |
| }, | |
| { | |
| "epoch": 0.7246376811594203, | |
| "grad_norm": 10.379854202270508, | |
| "learning_rate": 0.00017600644122383254, | |
| "loss": 28.7091, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7294685990338164, | |
| "grad_norm": 9.6157808303833, | |
| "learning_rate": 0.00017584541062801935, | |
| "loss": 30.0664, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 0.7342995169082126, | |
| "grad_norm": 9.851590156555176, | |
| "learning_rate": 0.00017568438003220612, | |
| "loss": 30.5656, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 0.7391304347826086, | |
| "grad_norm": 9.500916481018066, | |
| "learning_rate": 0.00017552334943639293, | |
| "loss": 28.8709, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 0.7439613526570048, | |
| "grad_norm": 9.999371528625488, | |
| "learning_rate": 0.00017536231884057973, | |
| "loss": 29.5298, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 0.748792270531401, | |
| "grad_norm": 8.5446195602417, | |
| "learning_rate": 0.0001752012882447665, | |
| "loss": 29.5134, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7536231884057971, | |
| "grad_norm": 9.369108200073242, | |
| "learning_rate": 0.0001750402576489533, | |
| "loss": 30.5918, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 0.7584541062801933, | |
| "grad_norm": 9.66053581237793, | |
| "learning_rate": 0.00017487922705314011, | |
| "loss": 29.5254, | |
| "step": 314 | |
| }, | |
| { | |
| "epoch": 0.7632850241545893, | |
| "grad_norm": 9.699007034301758, | |
| "learning_rate": 0.0001747181964573269, | |
| "loss": 29.297, | |
| "step": 316 | |
| }, | |
| { | |
| "epoch": 0.7681159420289855, | |
| "grad_norm": 6.751578330993652, | |
| "learning_rate": 0.0001745571658615137, | |
| "loss": 27.781, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 0.7729468599033816, | |
| "grad_norm": 8.00158977508545, | |
| "learning_rate": 0.0001743961352657005, | |
| "loss": 28.7844, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7777777777777778, | |
| "grad_norm": 11.203788757324219, | |
| "learning_rate": 0.00017423510466988727, | |
| "loss": 29.4509, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 0.782608695652174, | |
| "grad_norm": 11.6134033203125, | |
| "learning_rate": 0.00017407407407407408, | |
| "loss": 28.4847, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 0.7874396135265701, | |
| "grad_norm": 8.184885025024414, | |
| "learning_rate": 0.00017391304347826088, | |
| "loss": 30.9662, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 0.7922705314009661, | |
| "grad_norm": 7.118366241455078, | |
| "learning_rate": 0.00017375201288244766, | |
| "loss": 30.3931, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 0.7971014492753623, | |
| "grad_norm": 8.378530502319336, | |
| "learning_rate": 0.00017359098228663446, | |
| "loss": 28.4438, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.8019323671497585, | |
| "grad_norm": 8.539013862609863, | |
| "learning_rate": 0.00017342995169082126, | |
| "loss": 30.293, | |
| "step": 332 | |
| }, | |
| { | |
| "epoch": 0.8067632850241546, | |
| "grad_norm": 10.329437255859375, | |
| "learning_rate": 0.00017326892109500804, | |
| "loss": 28.9439, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 0.8115942028985508, | |
| "grad_norm": 7.267086982727051, | |
| "learning_rate": 0.00017310789049919484, | |
| "loss": 30.0624, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 0.8164251207729468, | |
| "grad_norm": 10.7781400680542, | |
| "learning_rate": 0.00017294685990338165, | |
| "loss": 30.338, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 0.821256038647343, | |
| "grad_norm": 11.077190399169922, | |
| "learning_rate": 0.00017278582930756842, | |
| "loss": 29.5852, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8260869565217391, | |
| "grad_norm": 9.007209777832031, | |
| "learning_rate": 0.00017262479871175523, | |
| "loss": 29.4697, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 0.8309178743961353, | |
| "grad_norm": 8.706661224365234, | |
| "learning_rate": 0.00017246376811594206, | |
| "loss": 29.4373, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 0.8357487922705314, | |
| "grad_norm": 8.104077339172363, | |
| "learning_rate": 0.00017230273752012884, | |
| "loss": 31.1459, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 0.8405797101449275, | |
| "grad_norm": 8.499916076660156, | |
| "learning_rate": 0.00017214170692431564, | |
| "loss": 29.6039, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 0.8454106280193237, | |
| "grad_norm": 9.886308670043945, | |
| "learning_rate": 0.00017198067632850244, | |
| "loss": 28.4998, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8502415458937198, | |
| "grad_norm": 6.680812835693359, | |
| "learning_rate": 0.00017181964573268922, | |
| "loss": 29.2374, | |
| "step": 352 | |
| }, | |
| { | |
| "epoch": 0.855072463768116, | |
| "grad_norm": 7.037901401519775, | |
| "learning_rate": 0.00017165861513687602, | |
| "loss": 29.3442, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 0.8599033816425121, | |
| "grad_norm": 9.425200462341309, | |
| "learning_rate": 0.00017149758454106283, | |
| "loss": 28.2594, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 0.8647342995169082, | |
| "grad_norm": 10.08089828491211, | |
| "learning_rate": 0.0001713365539452496, | |
| "loss": 29.583, | |
| "step": 358 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 8.83069133758545, | |
| "learning_rate": 0.0001711755233494364, | |
| "loss": 28.8106, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8743961352657005, | |
| "grad_norm": 12.723852157592773, | |
| "learning_rate": 0.0001710144927536232, | |
| "loss": 28.6247, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 0.8792270531400966, | |
| "grad_norm": 7.244641304016113, | |
| "learning_rate": 0.00017085346215780999, | |
| "loss": 28.2318, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 0.8840579710144928, | |
| "grad_norm": 10.645294189453125, | |
| "learning_rate": 0.0001706924315619968, | |
| "loss": 30.3984, | |
| "step": 366 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 8.675403594970703, | |
| "learning_rate": 0.0001705314009661836, | |
| "loss": 29.51, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 0.893719806763285, | |
| "grad_norm": 9.324760437011719, | |
| "learning_rate": 0.00017037037037037037, | |
| "loss": 29.173, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8985507246376812, | |
| "grad_norm": 8.37873363494873, | |
| "learning_rate": 0.00017020933977455717, | |
| "loss": 28.2663, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 0.9033816425120773, | |
| "grad_norm": 7.841792583465576, | |
| "learning_rate": 0.00017004830917874398, | |
| "loss": 30.4364, | |
| "step": 374 | |
| }, | |
| { | |
| "epoch": 0.9082125603864735, | |
| "grad_norm": 9.046091079711914, | |
| "learning_rate": 0.00016988727858293075, | |
| "loss": 30.9454, | |
| "step": 376 | |
| }, | |
| { | |
| "epoch": 0.9130434782608695, | |
| "grad_norm": 8.812469482421875, | |
| "learning_rate": 0.00016972624798711756, | |
| "loss": 26.7434, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 0.9178743961352657, | |
| "grad_norm": 6.815216541290283, | |
| "learning_rate": 0.00016956521739130436, | |
| "loss": 30.5352, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.9227053140096618, | |
| "grad_norm": 9.451848983764648, | |
| "learning_rate": 0.00016940418679549114, | |
| "loss": 29.4241, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 0.927536231884058, | |
| "grad_norm": 8.97130298614502, | |
| "learning_rate": 0.00016924315619967794, | |
| "loss": 28.7862, | |
| "step": 384 | |
| }, | |
| { | |
| "epoch": 0.9323671497584541, | |
| "grad_norm": 7.6972975730896, | |
| "learning_rate": 0.00016908212560386474, | |
| "loss": 29.5439, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 0.9371980676328503, | |
| "grad_norm": 7.955355167388916, | |
| "learning_rate": 0.00016892109500805152, | |
| "loss": 29.0917, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 0.9420289855072463, | |
| "grad_norm": 9.80173397064209, | |
| "learning_rate": 0.00016876006441223832, | |
| "loss": 26.3015, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9468599033816425, | |
| "grad_norm": 9.457799911499023, | |
| "learning_rate": 0.00016859903381642513, | |
| "loss": 27.8037, | |
| "step": 392 | |
| }, | |
| { | |
| "epoch": 0.9516908212560387, | |
| "grad_norm": 7.2435173988342285, | |
| "learning_rate": 0.00016843800322061193, | |
| "loss": 28.3055, | |
| "step": 394 | |
| }, | |
| { | |
| "epoch": 0.9565217391304348, | |
| "grad_norm": 8.652717590332031, | |
| "learning_rate": 0.0001682769726247987, | |
| "loss": 29.4198, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 0.961352657004831, | |
| "grad_norm": 11.697986602783203, | |
| "learning_rate": 0.0001681159420289855, | |
| "loss": 29.3374, | |
| "step": 398 | |
| }, | |
| { | |
| "epoch": 0.966183574879227, | |
| "grad_norm": 9.140453338623047, | |
| "learning_rate": 0.00016795491143317231, | |
| "loss": 28.7595, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9710144927536232, | |
| "grad_norm": 8.438916206359863, | |
| "learning_rate": 0.0001677938808373591, | |
| "loss": 27.0845, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 0.9758454106280193, | |
| "grad_norm": 9.950366973876953, | |
| "learning_rate": 0.0001676328502415459, | |
| "loss": 28.7205, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 0.9806763285024155, | |
| "grad_norm": 7.97797155380249, | |
| "learning_rate": 0.0001674718196457327, | |
| "loss": 30.1159, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 0.9855072463768116, | |
| "grad_norm": 7.832582950592041, | |
| "learning_rate": 0.0001673107890499195, | |
| "loss": 29.9818, | |
| "step": 408 | |
| }, | |
| { | |
| "epoch": 0.9903381642512077, | |
| "grad_norm": 9.50314998626709, | |
| "learning_rate": 0.0001671497584541063, | |
| "loss": 26.6032, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9951690821256038, | |
| "grad_norm": 10.015514373779297, | |
| "learning_rate": 0.00016698872785829308, | |
| "loss": 30.9247, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 8.032495498657227, | |
| "learning_rate": 0.00016682769726247988, | |
| "loss": 29.0615, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 1.0048309178743962, | |
| "grad_norm": 7.304556846618652, | |
| "learning_rate": 0.0001666666666666667, | |
| "loss": 29.5321, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 1.0096618357487923, | |
| "grad_norm": 7.575999736785889, | |
| "learning_rate": 0.00016650563607085346, | |
| "loss": 26.892, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 1.0144927536231885, | |
| "grad_norm": 9.361268043518066, | |
| "learning_rate": 0.00016634460547504027, | |
| "loss": 28.0997, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.0193236714975846, | |
| "grad_norm": 8.8648099899292, | |
| "learning_rate": 0.00016618357487922707, | |
| "loss": 28.1403, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 1.0241545893719808, | |
| "grad_norm": 11.204512596130371, | |
| "learning_rate": 0.00016602254428341385, | |
| "loss": 29.3477, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 1.0289855072463767, | |
| "grad_norm": 8.673910140991211, | |
| "learning_rate": 0.00016586151368760065, | |
| "loss": 29.9338, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 1.0338164251207729, | |
| "grad_norm": 10.797616958618164, | |
| "learning_rate": 0.00016570048309178746, | |
| "loss": 28.3507, | |
| "step": 428 | |
| }, | |
| { | |
| "epoch": 1.038647342995169, | |
| "grad_norm": 9.084686279296875, | |
| "learning_rate": 0.00016553945249597426, | |
| "loss": 27.9469, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.0434782608695652, | |
| "grad_norm": 9.642114639282227, | |
| "learning_rate": 0.00016537842190016104, | |
| "loss": 29.2968, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 1.0483091787439613, | |
| "grad_norm": 8.333573341369629, | |
| "learning_rate": 0.00016521739130434784, | |
| "loss": 27.1096, | |
| "step": 434 | |
| }, | |
| { | |
| "epoch": 1.0531400966183575, | |
| "grad_norm": 10.562450408935547, | |
| "learning_rate": 0.00016505636070853464, | |
| "loss": 30.1957, | |
| "step": 436 | |
| }, | |
| { | |
| "epoch": 1.0579710144927537, | |
| "grad_norm": 7.98309326171875, | |
| "learning_rate": 0.00016489533011272142, | |
| "loss": 29.5, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 1.0628019323671498, | |
| "grad_norm": 7.789132595062256, | |
| "learning_rate": 0.00016473429951690822, | |
| "loss": 29.6741, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.067632850241546, | |
| "grad_norm": 8.362640380859375, | |
| "learning_rate": 0.00016457326892109503, | |
| "loss": 29.4575, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 1.0724637681159421, | |
| "grad_norm": 7.407423973083496, | |
| "learning_rate": 0.0001644122383252818, | |
| "loss": 26.9575, | |
| "step": 444 | |
| }, | |
| { | |
| "epoch": 1.077294685990338, | |
| "grad_norm": 8.499112129211426, | |
| "learning_rate": 0.0001642512077294686, | |
| "loss": 29.4598, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 1.0821256038647342, | |
| "grad_norm": 8.675498008728027, | |
| "learning_rate": 0.0001640901771336554, | |
| "loss": 26.3951, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 1.0869565217391304, | |
| "grad_norm": 9.390106201171875, | |
| "learning_rate": 0.00016392914653784219, | |
| "loss": 27.4861, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.0917874396135265, | |
| "grad_norm": 8.66092586517334, | |
| "learning_rate": 0.000163768115942029, | |
| "loss": 27.5384, | |
| "step": 452 | |
| }, | |
| { | |
| "epoch": 1.0966183574879227, | |
| "grad_norm": 9.866594314575195, | |
| "learning_rate": 0.0001636070853462158, | |
| "loss": 28.5237, | |
| "step": 454 | |
| }, | |
| { | |
| "epoch": 1.1014492753623188, | |
| "grad_norm": 8.653681755065918, | |
| "learning_rate": 0.00016344605475040257, | |
| "loss": 28.2452, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 1.106280193236715, | |
| "grad_norm": 7.964409351348877, | |
| "learning_rate": 0.00016328502415458937, | |
| "loss": 27.3373, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 1.1111111111111112, | |
| "grad_norm": 6.7314863204956055, | |
| "learning_rate": 0.00016312399355877618, | |
| "loss": 27.628, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.1159420289855073, | |
| "grad_norm": 8.670600891113281, | |
| "learning_rate": 0.00016296296296296295, | |
| "loss": 28.4199, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 1.1207729468599035, | |
| "grad_norm": 8.304594993591309, | |
| "learning_rate": 0.00016280193236714976, | |
| "loss": 28.3439, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.1256038647342996, | |
| "grad_norm": 8.142372131347656, | |
| "learning_rate": 0.00016264090177133656, | |
| "loss": 29.8356, | |
| "step": 466 | |
| }, | |
| { | |
| "epoch": 1.1304347826086956, | |
| "grad_norm": 9.617864608764648, | |
| "learning_rate": 0.00016247987117552336, | |
| "loss": 26.8729, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 1.1352657004830917, | |
| "grad_norm": 11.739964485168457, | |
| "learning_rate": 0.00016231884057971017, | |
| "loss": 27.5099, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.1400966183574879, | |
| "grad_norm": 9.0482759475708, | |
| "learning_rate": 0.00016215780998389697, | |
| "loss": 25.6606, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 1.144927536231884, | |
| "grad_norm": 7.055074214935303, | |
| "learning_rate": 0.00016199677938808375, | |
| "loss": 27.0075, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 1.1497584541062802, | |
| "grad_norm": 9.319602012634277, | |
| "learning_rate": 0.00016183574879227055, | |
| "loss": 30.2885, | |
| "step": 476 | |
| }, | |
| { | |
| "epoch": 1.1545893719806763, | |
| "grad_norm": 9.021683692932129, | |
| "learning_rate": 0.00016167471819645735, | |
| "loss": 28.8099, | |
| "step": 478 | |
| }, | |
| { | |
| "epoch": 1.1594202898550725, | |
| "grad_norm": 6.554941177368164, | |
| "learning_rate": 0.00016151368760064413, | |
| "loss": 28.2995, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.1642512077294687, | |
| "grad_norm": 7.542542934417725, | |
| "learning_rate": 0.00016135265700483093, | |
| "loss": 28.35, | |
| "step": 482 | |
| }, | |
| { | |
| "epoch": 1.1690821256038648, | |
| "grad_norm": 12.053621292114258, | |
| "learning_rate": 0.00016119162640901774, | |
| "loss": 30.3838, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 1.1739130434782608, | |
| "grad_norm": 8.615163803100586, | |
| "learning_rate": 0.00016103059581320451, | |
| "loss": 27.872, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 1.178743961352657, | |
| "grad_norm": 10.88862419128418, | |
| "learning_rate": 0.00016086956521739132, | |
| "loss": 26.9656, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 1.183574879227053, | |
| "grad_norm": 9.35364818572998, | |
| "learning_rate": 0.00016070853462157812, | |
| "loss": 27.791, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.1884057971014492, | |
| "grad_norm": 8.610274314880371, | |
| "learning_rate": 0.0001605475040257649, | |
| "loss": 27.1357, | |
| "step": 492 | |
| }, | |
| { | |
| "epoch": 1.1932367149758454, | |
| "grad_norm": 8.759892463684082, | |
| "learning_rate": 0.0001603864734299517, | |
| "loss": 27.035, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 1.1980676328502415, | |
| "grad_norm": 10.015132904052734, | |
| "learning_rate": 0.0001602254428341385, | |
| "loss": 28.5965, | |
| "step": 496 | |
| }, | |
| { | |
| "epoch": 1.2028985507246377, | |
| "grad_norm": 9.121025085449219, | |
| "learning_rate": 0.00016006441223832528, | |
| "loss": 28.5107, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 1.2077294685990339, | |
| "grad_norm": 9.401590347290039, | |
| "learning_rate": 0.00015990338164251208, | |
| "loss": 28.5304, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.21256038647343, | |
| "grad_norm": 8.708001136779785, | |
| "learning_rate": 0.0001597423510466989, | |
| "loss": 29.9226, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 1.2173913043478262, | |
| "grad_norm": 9.344232559204102, | |
| "learning_rate": 0.00015958132045088566, | |
| "loss": 27.7489, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 1.2222222222222223, | |
| "grad_norm": 7.874361991882324, | |
| "learning_rate": 0.00015942028985507247, | |
| "loss": 29.2094, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 1.2270531400966185, | |
| "grad_norm": 9.35866928100586, | |
| "learning_rate": 0.00015925925925925927, | |
| "loss": 28.5889, | |
| "step": 508 | |
| }, | |
| { | |
| "epoch": 1.2318840579710144, | |
| "grad_norm": 9.740680694580078, | |
| "learning_rate": 0.00015909822866344605, | |
| "loss": 29.5747, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.2367149758454106, | |
| "grad_norm": 7.713297367095947, | |
| "learning_rate": 0.00015893719806763285, | |
| "loss": 28.9137, | |
| "step": 512 | |
| }, | |
| { | |
| "epoch": 1.2415458937198067, | |
| "grad_norm": 8.05880355834961, | |
| "learning_rate": 0.00015877616747181965, | |
| "loss": 29.4705, | |
| "step": 514 | |
| }, | |
| { | |
| "epoch": 1.2463768115942029, | |
| "grad_norm": 8.30479621887207, | |
| "learning_rate": 0.00015861513687600643, | |
| "loss": 29.1907, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 1.251207729468599, | |
| "grad_norm": 10.590409278869629, | |
| "learning_rate": 0.00015845410628019323, | |
| "loss": 27.9428, | |
| "step": 518 | |
| }, | |
| { | |
| "epoch": 1.2560386473429952, | |
| "grad_norm": 8.054545402526855, | |
| "learning_rate": 0.00015829307568438004, | |
| "loss": 28.4886, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.2608695652173914, | |
| "grad_norm": 8.148458480834961, | |
| "learning_rate": 0.00015813204508856681, | |
| "loss": 27.5395, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.2657004830917875, | |
| "grad_norm": 8.747846603393555, | |
| "learning_rate": 0.00015797101449275362, | |
| "loss": 29.784, | |
| "step": 524 | |
| }, | |
| { | |
| "epoch": 1.2705314009661834, | |
| "grad_norm": 8.56131362915039, | |
| "learning_rate": 0.00015780998389694042, | |
| "loss": 26.8683, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 1.2753623188405796, | |
| "grad_norm": 7.3210883140563965, | |
| "learning_rate": 0.0001576489533011272, | |
| "loss": 28.6462, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 1.2801932367149758, | |
| "grad_norm": 7.494152545928955, | |
| "learning_rate": 0.00015748792270531403, | |
| "loss": 28.5302, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.285024154589372, | |
| "grad_norm": 8.267993927001953, | |
| "learning_rate": 0.00015732689210950083, | |
| "loss": 28.3822, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 1.289855072463768, | |
| "grad_norm": 9.768172264099121, | |
| "learning_rate": 0.0001571658615136876, | |
| "loss": 27.6898, | |
| "step": 534 | |
| }, | |
| { | |
| "epoch": 1.2946859903381642, | |
| "grad_norm": 6.865130424499512, | |
| "learning_rate": 0.0001570048309178744, | |
| "loss": 28.2198, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 1.2995169082125604, | |
| "grad_norm": 8.628961563110352, | |
| "learning_rate": 0.00015684380032206122, | |
| "loss": 26.5476, | |
| "step": 538 | |
| }, | |
| { | |
| "epoch": 1.3043478260869565, | |
| "grad_norm": 9.150886535644531, | |
| "learning_rate": 0.000156682769726248, | |
| "loss": 29.4819, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.3091787439613527, | |
| "grad_norm": 8.535932540893555, | |
| "learning_rate": 0.0001565217391304348, | |
| "loss": 28.8612, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 1.3140096618357489, | |
| "grad_norm": 8.818495750427246, | |
| "learning_rate": 0.0001563607085346216, | |
| "loss": 29.2101, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 1.318840579710145, | |
| "grad_norm": 8.080242156982422, | |
| "learning_rate": 0.00015619967793880838, | |
| "loss": 26.2693, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 1.3236714975845412, | |
| "grad_norm": 7.340477466583252, | |
| "learning_rate": 0.00015603864734299518, | |
| "loss": 28.7229, | |
| "step": 548 | |
| }, | |
| { | |
| "epoch": 1.3285024154589373, | |
| "grad_norm": 7.5151047706604, | |
| "learning_rate": 0.00015587761674718198, | |
| "loss": 28.9171, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.3333333333333333, | |
| "grad_norm": 8.710932731628418, | |
| "learning_rate": 0.00015571658615136876, | |
| "loss": 27.4353, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 1.3381642512077294, | |
| "grad_norm": 8.146522521972656, | |
| "learning_rate": 0.00015555555555555556, | |
| "loss": 27.0576, | |
| "step": 554 | |
| }, | |
| { | |
| "epoch": 1.3429951690821256, | |
| "grad_norm": 9.677267074584961, | |
| "learning_rate": 0.00015539452495974237, | |
| "loss": 27.569, | |
| "step": 556 | |
| }, | |
| { | |
| "epoch": 1.3478260869565217, | |
| "grad_norm": 8.272392272949219, | |
| "learning_rate": 0.00015523349436392914, | |
| "loss": 27.5188, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.3526570048309179, | |
| "grad_norm": 9.684012413024902, | |
| "learning_rate": 0.00015507246376811595, | |
| "loss": 29.1665, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.357487922705314, | |
| "grad_norm": 12.55364990234375, | |
| "learning_rate": 0.00015491143317230275, | |
| "loss": 28.0156, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 1.3623188405797102, | |
| "grad_norm": 8.099139213562012, | |
| "learning_rate": 0.00015475040257648953, | |
| "loss": 28.706, | |
| "step": 564 | |
| }, | |
| { | |
| "epoch": 1.3671497584541064, | |
| "grad_norm": 9.807384490966797, | |
| "learning_rate": 0.00015458937198067633, | |
| "loss": 28.5265, | |
| "step": 566 | |
| }, | |
| { | |
| "epoch": 1.3719806763285023, | |
| "grad_norm": 9.85666275024414, | |
| "learning_rate": 0.00015442834138486313, | |
| "loss": 27.7005, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 1.3768115942028984, | |
| "grad_norm": 7.128468990325928, | |
| "learning_rate": 0.00015426731078904994, | |
| "loss": 27.5816, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.3816425120772946, | |
| "grad_norm": 8.653708457946777, | |
| "learning_rate": 0.0001541062801932367, | |
| "loss": 28.4573, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 1.3864734299516908, | |
| "grad_norm": 7.988314151763916, | |
| "learning_rate": 0.00015394524959742352, | |
| "loss": 27.9508, | |
| "step": 574 | |
| }, | |
| { | |
| "epoch": 1.391304347826087, | |
| "grad_norm": 10.148573875427246, | |
| "learning_rate": 0.00015378421900161032, | |
| "loss": 28.0399, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 1.396135265700483, | |
| "grad_norm": 8.33492660522461, | |
| "learning_rate": 0.0001536231884057971, | |
| "loss": 29.6252, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 1.4009661835748792, | |
| "grad_norm": 10.362284660339355, | |
| "learning_rate": 0.0001534621578099839, | |
| "loss": 28.2634, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.4057971014492754, | |
| "grad_norm": 11.445610046386719, | |
| "learning_rate": 0.0001533011272141707, | |
| "loss": 27.94, | |
| "step": 582 | |
| }, | |
| { | |
| "epoch": 1.4106280193236715, | |
| "grad_norm": 5.8856916427612305, | |
| "learning_rate": 0.00015314009661835748, | |
| "loss": 28.6919, | |
| "step": 584 | |
| }, | |
| { | |
| "epoch": 1.4154589371980677, | |
| "grad_norm": 8.040237426757812, | |
| "learning_rate": 0.00015297906602254428, | |
| "loss": 27.4428, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 1.4202898550724639, | |
| "grad_norm": 8.459455490112305, | |
| "learning_rate": 0.0001528180354267311, | |
| "loss": 28.883, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 1.42512077294686, | |
| "grad_norm": 9.862092971801758, | |
| "learning_rate": 0.00015265700483091786, | |
| "loss": 27.5396, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.4299516908212562, | |
| "grad_norm": 9.240866661071777, | |
| "learning_rate": 0.0001524959742351047, | |
| "loss": 28.5589, | |
| "step": 592 | |
| }, | |
| { | |
| "epoch": 1.434782608695652, | |
| "grad_norm": 8.943296432495117, | |
| "learning_rate": 0.00015233494363929147, | |
| "loss": 28.6255, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 1.4396135265700483, | |
| "grad_norm": 9.087813377380371, | |
| "learning_rate": 0.00015217391304347827, | |
| "loss": 29.508, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 1.4444444444444444, | |
| "grad_norm": 8.143028259277344, | |
| "learning_rate": 0.00015201288244766508, | |
| "loss": 27.9508, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 1.4492753623188406, | |
| "grad_norm": 8.073821067810059, | |
| "learning_rate": 0.00015185185185185185, | |
| "loss": 28.4479, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.4541062801932367, | |
| "grad_norm": 7.678289413452148, | |
| "learning_rate": 0.00015169082125603866, | |
| "loss": 28.75, | |
| "step": 602 | |
| }, | |
| { | |
| "epoch": 1.458937198067633, | |
| "grad_norm": 7.962745189666748, | |
| "learning_rate": 0.00015152979066022546, | |
| "loss": 28.074, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 1.463768115942029, | |
| "grad_norm": 8.225008010864258, | |
| "learning_rate": 0.00015136876006441224, | |
| "loss": 28.5051, | |
| "step": 606 | |
| }, | |
| { | |
| "epoch": 1.4685990338164252, | |
| "grad_norm": 6.815709590911865, | |
| "learning_rate": 0.00015120772946859904, | |
| "loss": 29.1703, | |
| "step": 608 | |
| }, | |
| { | |
| "epoch": 1.4734299516908211, | |
| "grad_norm": 7.653327465057373, | |
| "learning_rate": 0.00015104669887278585, | |
| "loss": 27.2951, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.4782608695652173, | |
| "grad_norm": 10.327927589416504, | |
| "learning_rate": 0.00015088566827697265, | |
| "loss": 28.1144, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 1.4830917874396135, | |
| "grad_norm": 8.612911224365234, | |
| "learning_rate": 0.00015072463768115943, | |
| "loss": 28.5057, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 1.4879227053140096, | |
| "grad_norm": 8.190404891967773, | |
| "learning_rate": 0.00015056360708534623, | |
| "loss": 27.3437, | |
| "step": 616 | |
| }, | |
| { | |
| "epoch": 1.4927536231884058, | |
| "grad_norm": 7.556375980377197, | |
| "learning_rate": 0.00015040257648953303, | |
| "loss": 26.3396, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 1.497584541062802, | |
| "grad_norm": 8.995963096618652, | |
| "learning_rate": 0.0001502415458937198, | |
| "loss": 28.9629, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.502415458937198, | |
| "grad_norm": 13.403937339782715, | |
| "learning_rate": 0.0001500805152979066, | |
| "loss": 28.1381, | |
| "step": 622 | |
| }, | |
| { | |
| "epoch": 1.5072463768115942, | |
| "grad_norm": 8.48337459564209, | |
| "learning_rate": 0.00014991948470209342, | |
| "loss": 28.8002, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 1.5120772946859904, | |
| "grad_norm": 7.916252613067627, | |
| "learning_rate": 0.0001497584541062802, | |
| "loss": 29.5661, | |
| "step": 626 | |
| }, | |
| { | |
| "epoch": 1.5169082125603865, | |
| "grad_norm": 8.097860336303711, | |
| "learning_rate": 0.000149597423510467, | |
| "loss": 29.1108, | |
| "step": 628 | |
| }, | |
| { | |
| "epoch": 1.5217391304347827, | |
| "grad_norm": 7.992598056793213, | |
| "learning_rate": 0.0001494363929146538, | |
| "loss": 25.8962, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.5265700483091789, | |
| "grad_norm": 6.601809501647949, | |
| "learning_rate": 0.00014927536231884058, | |
| "loss": 27.6647, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 1.531400966183575, | |
| "grad_norm": 10.532876014709473, | |
| "learning_rate": 0.00014911433172302738, | |
| "loss": 30.0814, | |
| "step": 634 | |
| }, | |
| { | |
| "epoch": 1.5362318840579712, | |
| "grad_norm": 8.925707817077637, | |
| "learning_rate": 0.00014895330112721418, | |
| "loss": 28.0325, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 1.541062801932367, | |
| "grad_norm": 6.749852657318115, | |
| "learning_rate": 0.00014879227053140096, | |
| "loss": 27.5785, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 1.5458937198067633, | |
| "grad_norm": 9.3954439163208, | |
| "learning_rate": 0.00014863123993558776, | |
| "loss": 29.7051, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.5507246376811594, | |
| "grad_norm": 7.524625778198242, | |
| "learning_rate": 0.00014847020933977457, | |
| "loss": 29.8091, | |
| "step": 642 | |
| }, | |
| { | |
| "epoch": 1.5555555555555556, | |
| "grad_norm": 8.303244590759277, | |
| "learning_rate": 0.00014830917874396134, | |
| "loss": 28.2779, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 1.5603864734299517, | |
| "grad_norm": 8.040205001831055, | |
| "learning_rate": 0.00014814814814814815, | |
| "loss": 29.3399, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 1.5652173913043477, | |
| "grad_norm": 6.253566265106201, | |
| "learning_rate": 0.00014798711755233495, | |
| "loss": 29.2561, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 1.5700483091787438, | |
| "grad_norm": 8.045578002929688, | |
| "learning_rate": 0.00014782608695652173, | |
| "loss": 26.8589, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.57487922705314, | |
| "grad_norm": 8.851433753967285, | |
| "learning_rate": 0.00014766505636070853, | |
| "loss": 28.2428, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 1.5797101449275361, | |
| "grad_norm": 8.400809288024902, | |
| "learning_rate": 0.00014750402576489533, | |
| "loss": 29.5603, | |
| "step": 654 | |
| }, | |
| { | |
| "epoch": 1.5845410628019323, | |
| "grad_norm": 7.533260345458984, | |
| "learning_rate": 0.00014734299516908214, | |
| "loss": 26.993, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 1.5893719806763285, | |
| "grad_norm": 6.993838310241699, | |
| "learning_rate": 0.00014718196457326894, | |
| "loss": 28.0578, | |
| "step": 658 | |
| }, | |
| { | |
| "epoch": 1.5942028985507246, | |
| "grad_norm": 9.009007453918457, | |
| "learning_rate": 0.00014702093397745574, | |
| "loss": 27.0416, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.5990338164251208, | |
| "grad_norm": 7.587328910827637, | |
| "learning_rate": 0.00014685990338164252, | |
| "loss": 27.896, | |
| "step": 662 | |
| }, | |
| { | |
| "epoch": 1.603864734299517, | |
| "grad_norm": 7.423081398010254, | |
| "learning_rate": 0.00014669887278582932, | |
| "loss": 26.6892, | |
| "step": 664 | |
| }, | |
| { | |
| "epoch": 1.608695652173913, | |
| "grad_norm": 8.408404350280762, | |
| "learning_rate": 0.00014653784219001613, | |
| "loss": 27.5191, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 1.6135265700483092, | |
| "grad_norm": 8.044210433959961, | |
| "learning_rate": 0.0001463768115942029, | |
| "loss": 30.3552, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 1.6183574879227054, | |
| "grad_norm": 8.7662935256958, | |
| "learning_rate": 0.0001462157809983897, | |
| "loss": 27.6742, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.6231884057971016, | |
| "grad_norm": 7.504002094268799, | |
| "learning_rate": 0.0001460547504025765, | |
| "loss": 26.1561, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 1.6280193236714977, | |
| "grad_norm": 7.4576826095581055, | |
| "learning_rate": 0.0001458937198067633, | |
| "loss": 27.9011, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 1.6328502415458939, | |
| "grad_norm": 7.216124057769775, | |
| "learning_rate": 0.0001457326892109501, | |
| "loss": 27.3678, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 1.6376811594202898, | |
| "grad_norm": 8.461156845092773, | |
| "learning_rate": 0.0001455716586151369, | |
| "loss": 28.0337, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 1.642512077294686, | |
| "grad_norm": 9.682413101196289, | |
| "learning_rate": 0.00014541062801932367, | |
| "loss": 27.1031, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.6473429951690821, | |
| "grad_norm": 6.8604817390441895, | |
| "learning_rate": 0.00014524959742351047, | |
| "loss": 28.9948, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 1.6521739130434783, | |
| "grad_norm": 8.835001945495605, | |
| "learning_rate": 0.00014508856682769728, | |
| "loss": 28.2096, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 1.6570048309178744, | |
| "grad_norm": 8.947821617126465, | |
| "learning_rate": 0.00014492753623188405, | |
| "loss": 28.8512, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 1.6618357487922706, | |
| "grad_norm": 7.301581859588623, | |
| "learning_rate": 0.00014476650563607086, | |
| "loss": 28.8649, | |
| "step": 688 | |
| }, | |
| { | |
| "epoch": 1.6666666666666665, | |
| "grad_norm": 8.465940475463867, | |
| "learning_rate": 0.00014460547504025766, | |
| "loss": 28.6396, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.6714975845410627, | |
| "grad_norm": 9.281678199768066, | |
| "learning_rate": 0.00014444444444444444, | |
| "loss": 26.2167, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 1.6763285024154588, | |
| "grad_norm": 8.054730415344238, | |
| "learning_rate": 0.00014428341384863124, | |
| "loss": 30.5799, | |
| "step": 694 | |
| }, | |
| { | |
| "epoch": 1.681159420289855, | |
| "grad_norm": 9.177703857421875, | |
| "learning_rate": 0.00014412238325281804, | |
| "loss": 27.6689, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 1.6859903381642511, | |
| "grad_norm": 7.34149169921875, | |
| "learning_rate": 0.00014396135265700482, | |
| "loss": 28.1966, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 1.6908212560386473, | |
| "grad_norm": 9.34843921661377, | |
| "learning_rate": 0.00014380032206119162, | |
| "loss": 28.2126, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.6956521739130435, | |
| "grad_norm": 8.255733489990234, | |
| "learning_rate": 0.00014363929146537843, | |
| "loss": 28.279, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 1.7004830917874396, | |
| "grad_norm": 7.138146877288818, | |
| "learning_rate": 0.0001434782608695652, | |
| "loss": 28.0335, | |
| "step": 704 | |
| }, | |
| { | |
| "epoch": 1.7053140096618358, | |
| "grad_norm": 7.608633041381836, | |
| "learning_rate": 0.000143317230273752, | |
| "loss": 27.788, | |
| "step": 706 | |
| }, | |
| { | |
| "epoch": 1.710144927536232, | |
| "grad_norm": 10.221348762512207, | |
| "learning_rate": 0.0001431561996779388, | |
| "loss": 26.4698, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 1.714975845410628, | |
| "grad_norm": 7.764200210571289, | |
| "learning_rate": 0.0001429951690821256, | |
| "loss": 27.5784, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.7198067632850242, | |
| "grad_norm": 7.295494079589844, | |
| "learning_rate": 0.0001428341384863124, | |
| "loss": 25.7921, | |
| "step": 712 | |
| }, | |
| { | |
| "epoch": 1.7246376811594204, | |
| "grad_norm": 7.534460544586182, | |
| "learning_rate": 0.0001426731078904992, | |
| "loss": 27.5532, | |
| "step": 714 | |
| }, | |
| { | |
| "epoch": 1.7294685990338166, | |
| "grad_norm": 7.3485002517700195, | |
| "learning_rate": 0.000142512077294686, | |
| "loss": 27.3069, | |
| "step": 716 | |
| }, | |
| { | |
| "epoch": 1.7342995169082127, | |
| "grad_norm": 7.3418049812316895, | |
| "learning_rate": 0.0001423510466988728, | |
| "loss": 28.4993, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 1.7391304347826086, | |
| "grad_norm": 7.740454196929932, | |
| "learning_rate": 0.0001421900161030596, | |
| "loss": 27.7353, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.7439613526570048, | |
| "grad_norm": 6.945924282073975, | |
| "learning_rate": 0.00014202898550724638, | |
| "loss": 29.0447, | |
| "step": 722 | |
| }, | |
| { | |
| "epoch": 1.748792270531401, | |
| "grad_norm": 10.651424407958984, | |
| "learning_rate": 0.00014186795491143319, | |
| "loss": 28.2643, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 1.7536231884057971, | |
| "grad_norm": 8.329526901245117, | |
| "learning_rate": 0.00014170692431562, | |
| "loss": 27.9287, | |
| "step": 726 | |
| }, | |
| { | |
| "epoch": 1.7584541062801933, | |
| "grad_norm": 9.379905700683594, | |
| "learning_rate": 0.00014154589371980677, | |
| "loss": 29.7388, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 1.7632850241545892, | |
| "grad_norm": 8.386578559875488, | |
| "learning_rate": 0.00014138486312399357, | |
| "loss": 27.136, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.7681159420289854, | |
| "grad_norm": 7.3653388023376465, | |
| "learning_rate": 0.00014122383252818037, | |
| "loss": 27.7946, | |
| "step": 732 | |
| }, | |
| { | |
| "epoch": 1.7729468599033815, | |
| "grad_norm": 8.317994117736816, | |
| "learning_rate": 0.00014106280193236715, | |
| "loss": 27.9617, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 1.7777777777777777, | |
| "grad_norm": 9.021920204162598, | |
| "learning_rate": 0.00014090177133655395, | |
| "loss": 27.6967, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 1.7826086956521738, | |
| "grad_norm": 6.490061283111572, | |
| "learning_rate": 0.00014074074074074076, | |
| "loss": 29.1245, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 1.78743961352657, | |
| "grad_norm": 8.023162841796875, | |
| "learning_rate": 0.00014057971014492753, | |
| "loss": 28.1596, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.7922705314009661, | |
| "grad_norm": 7.9419169425964355, | |
| "learning_rate": 0.00014041867954911434, | |
| "loss": 27.8295, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 1.7971014492753623, | |
| "grad_norm": 7.990035057067871, | |
| "learning_rate": 0.00014025764895330114, | |
| "loss": 26.8629, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 1.8019323671497585, | |
| "grad_norm": 8.936909675598145, | |
| "learning_rate": 0.00014009661835748792, | |
| "loss": 28.8046, | |
| "step": 746 | |
| }, | |
| { | |
| "epoch": 1.8067632850241546, | |
| "grad_norm": 8.737541198730469, | |
| "learning_rate": 0.00013993558776167472, | |
| "loss": 28.1564, | |
| "step": 748 | |
| }, | |
| { | |
| "epoch": 1.8115942028985508, | |
| "grad_norm": 6.9399518966674805, | |
| "learning_rate": 0.00013977455716586152, | |
| "loss": 28.085, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.816425120772947, | |
| "grad_norm": 7.811395645141602, | |
| "learning_rate": 0.00013961352657004833, | |
| "loss": 26.8601, | |
| "step": 752 | |
| }, | |
| { | |
| "epoch": 1.821256038647343, | |
| "grad_norm": 7.366069793701172, | |
| "learning_rate": 0.0001394524959742351, | |
| "loss": 27.7022, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 1.8260869565217392, | |
| "grad_norm": 7.216097831726074, | |
| "learning_rate": 0.0001392914653784219, | |
| "loss": 27.8814, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 1.8309178743961354, | |
| "grad_norm": 7.748776912689209, | |
| "learning_rate": 0.0001391304347826087, | |
| "loss": 26.7868, | |
| "step": 758 | |
| }, | |
| { | |
| "epoch": 1.8357487922705316, | |
| "grad_norm": 7.118618488311768, | |
| "learning_rate": 0.0001389694041867955, | |
| "loss": 27.519, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.8405797101449275, | |
| "grad_norm": 7.588200092315674, | |
| "learning_rate": 0.0001388083735909823, | |
| "loss": 27.8801, | |
| "step": 762 | |
| }, | |
| { | |
| "epoch": 1.8454106280193237, | |
| "grad_norm": 8.082246780395508, | |
| "learning_rate": 0.0001386473429951691, | |
| "loss": 27.8767, | |
| "step": 764 | |
| }, | |
| { | |
| "epoch": 1.8502415458937198, | |
| "grad_norm": 8.772019386291504, | |
| "learning_rate": 0.00013848631239935587, | |
| "loss": 27.0454, | |
| "step": 766 | |
| }, | |
| { | |
| "epoch": 1.855072463768116, | |
| "grad_norm": 11.820154190063477, | |
| "learning_rate": 0.00013832528180354267, | |
| "loss": 27.4214, | |
| "step": 768 | |
| }, | |
| { | |
| "epoch": 1.8599033816425121, | |
| "grad_norm": 7.21035623550415, | |
| "learning_rate": 0.00013816425120772948, | |
| "loss": 28.2132, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.864734299516908, | |
| "grad_norm": 7.833118438720703, | |
| "learning_rate": 0.00013800322061191625, | |
| "loss": 26.0298, | |
| "step": 772 | |
| }, | |
| { | |
| "epoch": 1.8695652173913042, | |
| "grad_norm": 9.474292755126953, | |
| "learning_rate": 0.00013784219001610306, | |
| "loss": 27.4543, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 1.8743961352657004, | |
| "grad_norm": 8.790839195251465, | |
| "learning_rate": 0.00013768115942028986, | |
| "loss": 26.6007, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 1.8792270531400965, | |
| "grad_norm": 7.7932963371276855, | |
| "learning_rate": 0.00013752012882447664, | |
| "loss": 26.3272, | |
| "step": 778 | |
| }, | |
| { | |
| "epoch": 1.8840579710144927, | |
| "grad_norm": 8.080236434936523, | |
| "learning_rate": 0.00013735909822866347, | |
| "loss": 27.8956, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.8888888888888888, | |
| "grad_norm": 8.07216739654541, | |
| "learning_rate": 0.00013719806763285024, | |
| "loss": 27.893, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 1.893719806763285, | |
| "grad_norm": 12.139753341674805, | |
| "learning_rate": 0.00013703703703703705, | |
| "loss": 29.3664, | |
| "step": 784 | |
| }, | |
| { | |
| "epoch": 1.8985507246376812, | |
| "grad_norm": 8.131410598754883, | |
| "learning_rate": 0.00013687600644122385, | |
| "loss": 26.927, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 1.9033816425120773, | |
| "grad_norm": 7.748467922210693, | |
| "learning_rate": 0.00013671497584541066, | |
| "loss": 27.8784, | |
| "step": 788 | |
| }, | |
| { | |
| "epoch": 1.9082125603864735, | |
| "grad_norm": 7.19915771484375, | |
| "learning_rate": 0.00013655394524959743, | |
| "loss": 26.3311, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.9130434782608696, | |
| "grad_norm": 7.374076843261719, | |
| "learning_rate": 0.00013639291465378424, | |
| "loss": 27.5376, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 1.9178743961352658, | |
| "grad_norm": 9.71866512298584, | |
| "learning_rate": 0.00013623188405797104, | |
| "loss": 25.3754, | |
| "step": 794 | |
| }, | |
| { | |
| "epoch": 1.922705314009662, | |
| "grad_norm": 7.384367942810059, | |
| "learning_rate": 0.00013607085346215782, | |
| "loss": 27.993, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 1.927536231884058, | |
| "grad_norm": 8.255502700805664, | |
| "learning_rate": 0.00013590982286634462, | |
| "loss": 27.9883, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 1.9323671497584543, | |
| "grad_norm": 6.8607306480407715, | |
| "learning_rate": 0.00013574879227053142, | |
| "loss": 27.9741, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.9371980676328504, | |
| "grad_norm": 7.215616226196289, | |
| "learning_rate": 0.0001355877616747182, | |
| "loss": 28.1998, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 1.9420289855072463, | |
| "grad_norm": 7.920051574707031, | |
| "learning_rate": 0.000135426731078905, | |
| "loss": 27.6914, | |
| "step": 804 | |
| }, | |
| { | |
| "epoch": 1.9468599033816425, | |
| "grad_norm": 7.799438953399658, | |
| "learning_rate": 0.0001352657004830918, | |
| "loss": 26.2782, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 1.9516908212560387, | |
| "grad_norm": 7.846622943878174, | |
| "learning_rate": 0.00013510466988727858, | |
| "loss": 27.5173, | |
| "step": 808 | |
| }, | |
| { | |
| "epoch": 1.9565217391304348, | |
| "grad_norm": 7.30129861831665, | |
| "learning_rate": 0.00013494363929146539, | |
| "loss": 24.9119, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.961352657004831, | |
| "grad_norm": 7.13409948348999, | |
| "learning_rate": 0.0001347826086956522, | |
| "loss": 27.9657, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 1.966183574879227, | |
| "grad_norm": 9.307235717773438, | |
| "learning_rate": 0.00013462157809983897, | |
| "loss": 27.6966, | |
| "step": 814 | |
| }, | |
| { | |
| "epoch": 1.971014492753623, | |
| "grad_norm": 7.8404741287231445, | |
| "learning_rate": 0.00013446054750402577, | |
| "loss": 26.6173, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 1.9758454106280192, | |
| "grad_norm": 8.165302276611328, | |
| "learning_rate": 0.00013429951690821257, | |
| "loss": 27.9967, | |
| "step": 818 | |
| }, | |
| { | |
| "epoch": 1.9806763285024154, | |
| "grad_norm": 7.126535892486572, | |
| "learning_rate": 0.00013413848631239935, | |
| "loss": 28.0344, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.9855072463768115, | |
| "grad_norm": 9.40721321105957, | |
| "learning_rate": 0.00013397745571658615, | |
| "loss": 26.6568, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 1.9903381642512077, | |
| "grad_norm": 6.842724323272705, | |
| "learning_rate": 0.00013381642512077296, | |
| "loss": 27.3194, | |
| "step": 824 | |
| }, | |
| { | |
| "epoch": 1.9951690821256038, | |
| "grad_norm": 6.537780284881592, | |
| "learning_rate": 0.00013365539452495973, | |
| "loss": 27.7764, | |
| "step": 826 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 6.422900199890137, | |
| "learning_rate": 0.00013349436392914654, | |
| "loss": 27.8163, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 2.004830917874396, | |
| "grad_norm": 7.0895466804504395, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 28.3841, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.0096618357487923, | |
| "grad_norm": 6.439542293548584, | |
| "learning_rate": 0.00013317230273752012, | |
| "loss": 28.0774, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 2.0144927536231885, | |
| "grad_norm": 7.766908645629883, | |
| "learning_rate": 0.00013301127214170692, | |
| "loss": 26.6744, | |
| "step": 834 | |
| }, | |
| { | |
| "epoch": 2.0193236714975846, | |
| "grad_norm": 9.178189277648926, | |
| "learning_rate": 0.00013285024154589372, | |
| "loss": 29.3364, | |
| "step": 836 | |
| }, | |
| { | |
| "epoch": 2.024154589371981, | |
| "grad_norm": 6.916229248046875, | |
| "learning_rate": 0.0001326892109500805, | |
| "loss": 28.0622, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 2.028985507246377, | |
| "grad_norm": 7.51179838180542, | |
| "learning_rate": 0.0001325281803542673, | |
| "loss": 26.2849, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.033816425120773, | |
| "grad_norm": 8.321070671081543, | |
| "learning_rate": 0.00013236714975845413, | |
| "loss": 27.222, | |
| "step": 842 | |
| }, | |
| { | |
| "epoch": 2.0386473429951693, | |
| "grad_norm": 6.450362205505371, | |
| "learning_rate": 0.0001322061191626409, | |
| "loss": 28.371, | |
| "step": 844 | |
| }, | |
| { | |
| "epoch": 2.0434782608695654, | |
| "grad_norm": 9.631372451782227, | |
| "learning_rate": 0.00013204508856682771, | |
| "loss": 29.1135, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 2.0483091787439616, | |
| "grad_norm": 7.727206707000732, | |
| "learning_rate": 0.00013188405797101452, | |
| "loss": 27.9027, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 2.0531400966183573, | |
| "grad_norm": 8.837319374084473, | |
| "learning_rate": 0.0001317230273752013, | |
| "loss": 27.5687, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.0579710144927534, | |
| "grad_norm": 8.151753425598145, | |
| "learning_rate": 0.0001315619967793881, | |
| "loss": 27.0424, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 2.0628019323671496, | |
| "grad_norm": 7.2588605880737305, | |
| "learning_rate": 0.0001314009661835749, | |
| "loss": 26.2731, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 2.0676328502415457, | |
| "grad_norm": 9.428071975708008, | |
| "learning_rate": 0.00013123993558776168, | |
| "loss": 27.1224, | |
| "step": 856 | |
| }, | |
| { | |
| "epoch": 2.072463768115942, | |
| "grad_norm": 8.864592552185059, | |
| "learning_rate": 0.00013107890499194848, | |
| "loss": 27.3137, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 2.077294685990338, | |
| "grad_norm": 9.21855640411377, | |
| "learning_rate": 0.00013091787439613528, | |
| "loss": 26.886, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.082125603864734, | |
| "grad_norm": 7.239558696746826, | |
| "learning_rate": 0.00013075684380032206, | |
| "loss": 28.2175, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 2.0869565217391304, | |
| "grad_norm": 8.155842781066895, | |
| "learning_rate": 0.00013059581320450886, | |
| "loss": 27.7151, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 2.0917874396135265, | |
| "grad_norm": 7.057051658630371, | |
| "learning_rate": 0.00013043478260869567, | |
| "loss": 26.3673, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 2.0966183574879227, | |
| "grad_norm": 7.664299488067627, | |
| "learning_rate": 0.00013027375201288244, | |
| "loss": 25.7326, | |
| "step": 868 | |
| }, | |
| { | |
| "epoch": 2.101449275362319, | |
| "grad_norm": 6.310895919799805, | |
| "learning_rate": 0.00013011272141706925, | |
| "loss": 28.7024, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.106280193236715, | |
| "grad_norm": 7.707338809967041, | |
| "learning_rate": 0.00012995169082125605, | |
| "loss": 27.724, | |
| "step": 872 | |
| }, | |
| { | |
| "epoch": 2.111111111111111, | |
| "grad_norm": 7.318761825561523, | |
| "learning_rate": 0.00012979066022544283, | |
| "loss": 27.2221, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 2.1159420289855073, | |
| "grad_norm": 9.668201446533203, | |
| "learning_rate": 0.00012962962962962963, | |
| "loss": 27.0287, | |
| "step": 876 | |
| }, | |
| { | |
| "epoch": 2.1207729468599035, | |
| "grad_norm": 7.614035129547119, | |
| "learning_rate": 0.00012946859903381643, | |
| "loss": 26.1026, | |
| "step": 878 | |
| }, | |
| { | |
| "epoch": 2.1256038647342996, | |
| "grad_norm": 8.675333023071289, | |
| "learning_rate": 0.0001293075684380032, | |
| "loss": 27.6808, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.130434782608696, | |
| "grad_norm": 6.966851234436035, | |
| "learning_rate": 0.00012914653784219001, | |
| "loss": 27.6239, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 2.135265700483092, | |
| "grad_norm": 6.5391974449157715, | |
| "learning_rate": 0.00012898550724637682, | |
| "loss": 27.9896, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 2.140096618357488, | |
| "grad_norm": 8.508500099182129, | |
| "learning_rate": 0.0001288244766505636, | |
| "loss": 27.918, | |
| "step": 886 | |
| }, | |
| { | |
| "epoch": 2.1449275362318843, | |
| "grad_norm": 7.540635108947754, | |
| "learning_rate": 0.0001286634460547504, | |
| "loss": 28.694, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 2.14975845410628, | |
| "grad_norm": 8.311809539794922, | |
| "learning_rate": 0.0001285024154589372, | |
| "loss": 27.7563, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.154589371980676, | |
| "grad_norm": 7.208229064941406, | |
| "learning_rate": 0.000128341384863124, | |
| "loss": 28.0522, | |
| "step": 892 | |
| }, | |
| { | |
| "epoch": 2.1594202898550723, | |
| "grad_norm": 7.324676036834717, | |
| "learning_rate": 0.00012818035426731078, | |
| "loss": 28.7856, | |
| "step": 894 | |
| }, | |
| { | |
| "epoch": 2.1642512077294684, | |
| "grad_norm": 8.06933879852295, | |
| "learning_rate": 0.00012801932367149759, | |
| "loss": 26.2217, | |
| "step": 896 | |
| }, | |
| { | |
| "epoch": 2.1690821256038646, | |
| "grad_norm": 7.0082902908325195, | |
| "learning_rate": 0.0001278582930756844, | |
| "loss": 28.2109, | |
| "step": 898 | |
| }, | |
| { | |
| "epoch": 2.1739130434782608, | |
| "grad_norm": 6.494582176208496, | |
| "learning_rate": 0.00012769726247987117, | |
| "loss": 27.6243, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.178743961352657, | |
| "grad_norm": 6.218760967254639, | |
| "learning_rate": 0.00012753623188405797, | |
| "loss": 26.7612, | |
| "step": 902 | |
| }, | |
| { | |
| "epoch": 2.183574879227053, | |
| "grad_norm": 9.239087104797363, | |
| "learning_rate": 0.00012737520128824477, | |
| "loss": 27.9478, | |
| "step": 904 | |
| }, | |
| { | |
| "epoch": 2.1884057971014492, | |
| "grad_norm": 6.95756196975708, | |
| "learning_rate": 0.00012721417069243158, | |
| "loss": 28.2791, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 2.1932367149758454, | |
| "grad_norm": 7.1247944831848145, | |
| "learning_rate": 0.00012705314009661838, | |
| "loss": 26.1297, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 2.1980676328502415, | |
| "grad_norm": 9.735993385314941, | |
| "learning_rate": 0.00012689210950080516, | |
| "loss": 24.9398, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.2028985507246377, | |
| "grad_norm": 10.508362770080566, | |
| "learning_rate": 0.00012673107890499196, | |
| "loss": 26.488, | |
| "step": 912 | |
| }, | |
| { | |
| "epoch": 2.207729468599034, | |
| "grad_norm": 8.847992897033691, | |
| "learning_rate": 0.00012657004830917876, | |
| "loss": 27.945, | |
| "step": 914 | |
| }, | |
| { | |
| "epoch": 2.21256038647343, | |
| "grad_norm": 6.917768478393555, | |
| "learning_rate": 0.00012640901771336554, | |
| "loss": 28.1242, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 2.217391304347826, | |
| "grad_norm": 8.339996337890625, | |
| "learning_rate": 0.00012624798711755234, | |
| "loss": 26.9138, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 2.2222222222222223, | |
| "grad_norm": 6.435300827026367, | |
| "learning_rate": 0.00012608695652173915, | |
| "loss": 27.6529, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.2270531400966185, | |
| "grad_norm": 7.194887638092041, | |
| "learning_rate": 0.00012592592592592592, | |
| "loss": 27.1809, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 2.2318840579710146, | |
| "grad_norm": 9.154160499572754, | |
| "learning_rate": 0.00012576489533011273, | |
| "loss": 27.0501, | |
| "step": 924 | |
| }, | |
| { | |
| "epoch": 2.236714975845411, | |
| "grad_norm": 7.581670761108398, | |
| "learning_rate": 0.00012560386473429953, | |
| "loss": 26.1787, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 2.241545893719807, | |
| "grad_norm": 8.077373504638672, | |
| "learning_rate": 0.00012544283413848633, | |
| "loss": 27.04, | |
| "step": 928 | |
| }, | |
| { | |
| "epoch": 2.246376811594203, | |
| "grad_norm": 7.282364845275879, | |
| "learning_rate": 0.0001252818035426731, | |
| "loss": 27.0844, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.2512077294685993, | |
| "grad_norm": 7.848824501037598, | |
| "learning_rate": 0.0001251207729468599, | |
| "loss": 29.1671, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 2.2560386473429954, | |
| "grad_norm": 7.200251579284668, | |
| "learning_rate": 0.00012495974235104672, | |
| "loss": 26.1801, | |
| "step": 934 | |
| }, | |
| { | |
| "epoch": 2.260869565217391, | |
| "grad_norm": 7.419154167175293, | |
| "learning_rate": 0.0001247987117552335, | |
| "loss": 27.1106, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 2.2657004830917873, | |
| "grad_norm": 8.16390609741211, | |
| "learning_rate": 0.0001246376811594203, | |
| "loss": 25.5017, | |
| "step": 938 | |
| }, | |
| { | |
| "epoch": 2.2705314009661834, | |
| "grad_norm": 7.58992338180542, | |
| "learning_rate": 0.0001244766505636071, | |
| "loss": 27.0191, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.2753623188405796, | |
| "grad_norm": 8.532602310180664, | |
| "learning_rate": 0.00012431561996779388, | |
| "loss": 28.0053, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 2.2801932367149758, | |
| "grad_norm": 7.449092388153076, | |
| "learning_rate": 0.00012415458937198068, | |
| "loss": 25.7749, | |
| "step": 944 | |
| }, | |
| { | |
| "epoch": 2.285024154589372, | |
| "grad_norm": 7.38059139251709, | |
| "learning_rate": 0.00012399355877616748, | |
| "loss": 28.2566, | |
| "step": 946 | |
| }, | |
| { | |
| "epoch": 2.289855072463768, | |
| "grad_norm": 6.6862874031066895, | |
| "learning_rate": 0.00012383252818035426, | |
| "loss": 28.8852, | |
| "step": 948 | |
| }, | |
| { | |
| "epoch": 2.2946859903381642, | |
| "grad_norm": 7.916528701782227, | |
| "learning_rate": 0.00012367149758454106, | |
| "loss": 27.8083, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.2995169082125604, | |
| "grad_norm": 6.143187522888184, | |
| "learning_rate": 0.00012351046698872787, | |
| "loss": 26.0691, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 2.3043478260869565, | |
| "grad_norm": 8.420724868774414, | |
| "learning_rate": 0.00012334943639291464, | |
| "loss": 28.0858, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 2.3091787439613527, | |
| "grad_norm": 7.883975505828857, | |
| "learning_rate": 0.00012318840579710145, | |
| "loss": 27.3439, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 2.314009661835749, | |
| "grad_norm": 7.242871284484863, | |
| "learning_rate": 0.00012302737520128825, | |
| "loss": 27.1341, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 2.318840579710145, | |
| "grad_norm": 7.858469009399414, | |
| "learning_rate": 0.00012286634460547503, | |
| "loss": 25.8494, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.323671497584541, | |
| "grad_norm": 7.365942478179932, | |
| "learning_rate": 0.00012270531400966183, | |
| "loss": 26.9695, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 2.3285024154589373, | |
| "grad_norm": 6.930251121520996, | |
| "learning_rate": 0.00012254428341384863, | |
| "loss": 25.7841, | |
| "step": 964 | |
| }, | |
| { | |
| "epoch": 2.3333333333333335, | |
| "grad_norm": 6.728757858276367, | |
| "learning_rate": 0.00012238325281803544, | |
| "loss": 28.053, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 2.3381642512077296, | |
| "grad_norm": 6.711808681488037, | |
| "learning_rate": 0.00012222222222222224, | |
| "loss": 27.7962, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 2.342995169082126, | |
| "grad_norm": 7.4918951988220215, | |
| "learning_rate": 0.00012206119162640903, | |
| "loss": 27.0597, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.3478260869565215, | |
| "grad_norm": 8.181355476379395, | |
| "learning_rate": 0.00012190016103059582, | |
| "loss": 28.2665, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 2.3526570048309177, | |
| "grad_norm": 7.762918949127197, | |
| "learning_rate": 0.00012173913043478263, | |
| "loss": 28.546, | |
| "step": 974 | |
| }, | |
| { | |
| "epoch": 2.357487922705314, | |
| "grad_norm": 7.8778276443481445, | |
| "learning_rate": 0.00012157809983896942, | |
| "loss": 27.1973, | |
| "step": 976 | |
| }, | |
| { | |
| "epoch": 2.36231884057971, | |
| "grad_norm": 7.002277374267578, | |
| "learning_rate": 0.0001214170692431562, | |
| "loss": 26.2418, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 2.367149758454106, | |
| "grad_norm": 7.298165321350098, | |
| "learning_rate": 0.00012125603864734301, | |
| "loss": 28.2059, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.3719806763285023, | |
| "grad_norm": 7.899686336517334, | |
| "learning_rate": 0.0001210950080515298, | |
| "loss": 26.9666, | |
| "step": 982 | |
| }, | |
| { | |
| "epoch": 2.3768115942028984, | |
| "grad_norm": 7.3516669273376465, | |
| "learning_rate": 0.0001209339774557166, | |
| "loss": 27.9299, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 2.3816425120772946, | |
| "grad_norm": 7.224858283996582, | |
| "learning_rate": 0.00012077294685990339, | |
| "loss": 25.816, | |
| "step": 986 | |
| }, | |
| { | |
| "epoch": 2.3864734299516908, | |
| "grad_norm": 7.0076494216918945, | |
| "learning_rate": 0.00012061191626409018, | |
| "loss": 24.8251, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 2.391304347826087, | |
| "grad_norm": 6.74472188949585, | |
| "learning_rate": 0.00012045088566827699, | |
| "loss": 27.6622, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.396135265700483, | |
| "grad_norm": 6.549550533294678, | |
| "learning_rate": 0.00012028985507246378, | |
| "loss": 28.284, | |
| "step": 992 | |
| }, | |
| { | |
| "epoch": 2.4009661835748792, | |
| "grad_norm": 6.806623458862305, | |
| "learning_rate": 0.00012012882447665057, | |
| "loss": 26.6694, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 2.4057971014492754, | |
| "grad_norm": 8.050207138061523, | |
| "learning_rate": 0.00011996779388083737, | |
| "loss": 28.3372, | |
| "step": 996 | |
| }, | |
| { | |
| "epoch": 2.4106280193236715, | |
| "grad_norm": 7.284823417663574, | |
| "learning_rate": 0.00011980676328502416, | |
| "loss": 26.9082, | |
| "step": 998 | |
| }, | |
| { | |
| "epoch": 2.4154589371980677, | |
| "grad_norm": 7.920591831207275, | |
| "learning_rate": 0.00011964573268921095, | |
| "loss": 29.7462, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.420289855072464, | |
| "grad_norm": 8.616438865661621, | |
| "learning_rate": 0.00011948470209339775, | |
| "loss": 26.7905, | |
| "step": 1002 | |
| }, | |
| { | |
| "epoch": 2.42512077294686, | |
| "grad_norm": 7.106829643249512, | |
| "learning_rate": 0.00011932367149758454, | |
| "loss": 27.5633, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 2.429951690821256, | |
| "grad_norm": 8.117084503173828, | |
| "learning_rate": 0.00011916264090177133, | |
| "loss": 26.9659, | |
| "step": 1006 | |
| }, | |
| { | |
| "epoch": 2.4347826086956523, | |
| "grad_norm": 7.732640743255615, | |
| "learning_rate": 0.00011900161030595814, | |
| "loss": 28.2114, | |
| "step": 1008 | |
| }, | |
| { | |
| "epoch": 2.4396135265700485, | |
| "grad_norm": 7.36362361907959, | |
| "learning_rate": 0.00011884057971014493, | |
| "loss": 26.3716, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.4444444444444446, | |
| "grad_norm": 8.114975929260254, | |
| "learning_rate": 0.00011867954911433172, | |
| "loss": 28.8353, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 2.449275362318841, | |
| "grad_norm": 7.141117095947266, | |
| "learning_rate": 0.00011851851851851852, | |
| "loss": 25.7371, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 2.454106280193237, | |
| "grad_norm": 7.491177558898926, | |
| "learning_rate": 0.00011835748792270531, | |
| "loss": 26.9146, | |
| "step": 1016 | |
| }, | |
| { | |
| "epoch": 2.4589371980676327, | |
| "grad_norm": 6.710269451141357, | |
| "learning_rate": 0.00011819645732689211, | |
| "loss": 25.5321, | |
| "step": 1018 | |
| }, | |
| { | |
| "epoch": 2.463768115942029, | |
| "grad_norm": 7.143400192260742, | |
| "learning_rate": 0.0001180354267310789, | |
| "loss": 29.9676, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.468599033816425, | |
| "grad_norm": 8.246957778930664, | |
| "learning_rate": 0.00011787439613526569, | |
| "loss": 29.2592, | |
| "step": 1022 | |
| }, | |
| { | |
| "epoch": 2.473429951690821, | |
| "grad_norm": 8.44863510131836, | |
| "learning_rate": 0.0001177133655394525, | |
| "loss": 26.0309, | |
| "step": 1024 | |
| }, | |
| { | |
| "epoch": 2.4782608695652173, | |
| "grad_norm": 7.821875095367432, | |
| "learning_rate": 0.00011755233494363929, | |
| "loss": 26.8746, | |
| "step": 1026 | |
| }, | |
| { | |
| "epoch": 2.4830917874396135, | |
| "grad_norm": 8.529960632324219, | |
| "learning_rate": 0.0001173913043478261, | |
| "loss": 27.0204, | |
| "step": 1028 | |
| }, | |
| { | |
| "epoch": 2.4879227053140096, | |
| "grad_norm": 6.8329339027404785, | |
| "learning_rate": 0.0001172302737520129, | |
| "loss": 25.2555, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.4927536231884058, | |
| "grad_norm": 6.804640769958496, | |
| "learning_rate": 0.0001170692431561997, | |
| "loss": 25.6537, | |
| "step": 1032 | |
| }, | |
| { | |
| "epoch": 2.497584541062802, | |
| "grad_norm": 7.089588642120361, | |
| "learning_rate": 0.00011690821256038649, | |
| "loss": 25.2568, | |
| "step": 1034 | |
| }, | |
| { | |
| "epoch": 2.502415458937198, | |
| "grad_norm": 11.241130828857422, | |
| "learning_rate": 0.00011674718196457328, | |
| "loss": 27.1132, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 2.5072463768115942, | |
| "grad_norm": 7.47288703918457, | |
| "learning_rate": 0.00011658615136876008, | |
| "loss": 25.9993, | |
| "step": 1038 | |
| }, | |
| { | |
| "epoch": 2.5120772946859904, | |
| "grad_norm": 8.372520446777344, | |
| "learning_rate": 0.00011642512077294687, | |
| "loss": 27.6641, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.5169082125603865, | |
| "grad_norm": 8.117879867553711, | |
| "learning_rate": 0.00011626409017713366, | |
| "loss": 26.6226, | |
| "step": 1042 | |
| }, | |
| { | |
| "epoch": 2.5217391304347827, | |
| "grad_norm": 8.319169044494629, | |
| "learning_rate": 0.00011610305958132046, | |
| "loss": 27.4311, | |
| "step": 1044 | |
| }, | |
| { | |
| "epoch": 2.526570048309179, | |
| "grad_norm": 7.18233585357666, | |
| "learning_rate": 0.00011594202898550725, | |
| "loss": 27.6304, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 2.531400966183575, | |
| "grad_norm": 7.594292640686035, | |
| "learning_rate": 0.00011578099838969404, | |
| "loss": 26.9063, | |
| "step": 1048 | |
| }, | |
| { | |
| "epoch": 2.536231884057971, | |
| "grad_norm": 8.392667770385742, | |
| "learning_rate": 0.00011561996779388085, | |
| "loss": 27.2786, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.541062801932367, | |
| "grad_norm": 6.698591709136963, | |
| "learning_rate": 0.00011545893719806764, | |
| "loss": 25.5416, | |
| "step": 1052 | |
| }, | |
| { | |
| "epoch": 2.545893719806763, | |
| "grad_norm": 6.185670375823975, | |
| "learning_rate": 0.00011529790660225444, | |
| "loss": 26.9696, | |
| "step": 1054 | |
| }, | |
| { | |
| "epoch": 2.550724637681159, | |
| "grad_norm": 7.676215648651123, | |
| "learning_rate": 0.00011513687600644123, | |
| "loss": 26.5383, | |
| "step": 1056 | |
| }, | |
| { | |
| "epoch": 2.5555555555555554, | |
| "grad_norm": 6.880972385406494, | |
| "learning_rate": 0.00011497584541062802, | |
| "loss": 26.3302, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 2.5603864734299515, | |
| "grad_norm": 8.553890228271484, | |
| "learning_rate": 0.00011481481481481482, | |
| "loss": 26.0391, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.5652173913043477, | |
| "grad_norm": 6.153205394744873, | |
| "learning_rate": 0.00011465378421900161, | |
| "loss": 25.729, | |
| "step": 1062 | |
| }, | |
| { | |
| "epoch": 2.570048309178744, | |
| "grad_norm": 8.465208053588867, | |
| "learning_rate": 0.0001144927536231884, | |
| "loss": 26.5018, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 2.57487922705314, | |
| "grad_norm": 8.127817153930664, | |
| "learning_rate": 0.00011433172302737521, | |
| "loss": 26.3506, | |
| "step": 1066 | |
| }, | |
| { | |
| "epoch": 2.579710144927536, | |
| "grad_norm": 9.615152359008789, | |
| "learning_rate": 0.000114170692431562, | |
| "loss": 25.9415, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 2.5845410628019323, | |
| "grad_norm": 7.294039249420166, | |
| "learning_rate": 0.00011400966183574879, | |
| "loss": 26.7507, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.5893719806763285, | |
| "grad_norm": 8.261009216308594, | |
| "learning_rate": 0.00011384863123993559, | |
| "loss": 26.7187, | |
| "step": 1072 | |
| }, | |
| { | |
| "epoch": 2.5942028985507246, | |
| "grad_norm": 6.705962181091309, | |
| "learning_rate": 0.00011368760064412238, | |
| "loss": 26.6202, | |
| "step": 1074 | |
| }, | |
| { | |
| "epoch": 2.5990338164251208, | |
| "grad_norm": 10.057275772094727, | |
| "learning_rate": 0.00011352657004830917, | |
| "loss": 26.6226, | |
| "step": 1076 | |
| }, | |
| { | |
| "epoch": 2.603864734299517, | |
| "grad_norm": 8.795845031738281, | |
| "learning_rate": 0.00011336553945249598, | |
| "loss": 28.1032, | |
| "step": 1078 | |
| }, | |
| { | |
| "epoch": 2.608695652173913, | |
| "grad_norm": 7.4816131591796875, | |
| "learning_rate": 0.00011320450885668277, | |
| "loss": 25.8255, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.6135265700483092, | |
| "grad_norm": 7.060609340667725, | |
| "learning_rate": 0.00011304347826086956, | |
| "loss": 26.9353, | |
| "step": 1082 | |
| }, | |
| { | |
| "epoch": 2.6183574879227054, | |
| "grad_norm": 7.140244960784912, | |
| "learning_rate": 0.00011288244766505636, | |
| "loss": 27.3619, | |
| "step": 1084 | |
| }, | |
| { | |
| "epoch": 2.6231884057971016, | |
| "grad_norm": 7.22598934173584, | |
| "learning_rate": 0.00011272141706924315, | |
| "loss": 25.3791, | |
| "step": 1086 | |
| }, | |
| { | |
| "epoch": 2.6280193236714977, | |
| "grad_norm": 7.098104953765869, | |
| "learning_rate": 0.00011256038647342995, | |
| "loss": 26.0269, | |
| "step": 1088 | |
| }, | |
| { | |
| "epoch": 2.632850241545894, | |
| "grad_norm": 6.918243408203125, | |
| "learning_rate": 0.00011239935587761677, | |
| "loss": 26.9077, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.63768115942029, | |
| "grad_norm": 7.557582378387451, | |
| "learning_rate": 0.00011223832528180356, | |
| "loss": 26.3413, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 2.642512077294686, | |
| "grad_norm": 7.406020164489746, | |
| "learning_rate": 0.00011207729468599035, | |
| "loss": 25.9181, | |
| "step": 1094 | |
| }, | |
| { | |
| "epoch": 2.6473429951690823, | |
| "grad_norm": 7.0549492835998535, | |
| "learning_rate": 0.00011191626409017715, | |
| "loss": 26.8606, | |
| "step": 1096 | |
| }, | |
| { | |
| "epoch": 2.6521739130434785, | |
| "grad_norm": 6.645535469055176, | |
| "learning_rate": 0.00011175523349436394, | |
| "loss": 27.9375, | |
| "step": 1098 | |
| }, | |
| { | |
| "epoch": 2.6570048309178746, | |
| "grad_norm": 7.90491247177124, | |
| "learning_rate": 0.00011159420289855073, | |
| "loss": 26.3062, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 2.661835748792271, | |
| "grad_norm": 6.990922927856445, | |
| "learning_rate": 0.00011143317230273754, | |
| "loss": 28.5585, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 2.6666666666666665, | |
| "grad_norm": 7.085525989532471, | |
| "learning_rate": 0.00011127214170692433, | |
| "loss": 25.2121, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 2.6714975845410627, | |
| "grad_norm": 8.292244911193848, | |
| "learning_rate": 0.00011111111111111112, | |
| "loss": 26.5729, | |
| "step": 1106 | |
| }, | |
| { | |
| "epoch": 2.676328502415459, | |
| "grad_norm": 7.650384426116943, | |
| "learning_rate": 0.00011095008051529792, | |
| "loss": 25.5093, | |
| "step": 1108 | |
| }, | |
| { | |
| "epoch": 2.681159420289855, | |
| "grad_norm": 9.83218765258789, | |
| "learning_rate": 0.00011078904991948471, | |
| "loss": 25.2708, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 2.685990338164251, | |
| "grad_norm": 6.258013725280762, | |
| "learning_rate": 0.0001106280193236715, | |
| "loss": 24.9544, | |
| "step": 1112 | |
| }, | |
| { | |
| "epoch": 2.6908212560386473, | |
| "grad_norm": 7.423259258270264, | |
| "learning_rate": 0.0001104669887278583, | |
| "loss": 27.2744, | |
| "step": 1114 | |
| }, | |
| { | |
| "epoch": 2.6956521739130435, | |
| "grad_norm": 7.9002814292907715, | |
| "learning_rate": 0.0001103059581320451, | |
| "loss": 26.9861, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.7004830917874396, | |
| "grad_norm": 7.641670227050781, | |
| "learning_rate": 0.00011014492753623188, | |
| "loss": 27.426, | |
| "step": 1118 | |
| }, | |
| { | |
| "epoch": 2.7053140096618358, | |
| "grad_norm": 7.658080577850342, | |
| "learning_rate": 0.00010998389694041869, | |
| "loss": 27.6252, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 2.710144927536232, | |
| "grad_norm": 7.938218116760254, | |
| "learning_rate": 0.00010982286634460548, | |
| "loss": 26.1781, | |
| "step": 1122 | |
| }, | |
| { | |
| "epoch": 2.714975845410628, | |
| "grad_norm": 7.96283483505249, | |
| "learning_rate": 0.00010966183574879228, | |
| "loss": 27.7596, | |
| "step": 1124 | |
| }, | |
| { | |
| "epoch": 2.7198067632850242, | |
| "grad_norm": 10.215167045593262, | |
| "learning_rate": 0.00010950080515297907, | |
| "loss": 26.9451, | |
| "step": 1126 | |
| }, | |
| { | |
| "epoch": 2.7246376811594204, | |
| "grad_norm": 7.972415924072266, | |
| "learning_rate": 0.00010933977455716586, | |
| "loss": 27.1329, | |
| "step": 1128 | |
| }, | |
| { | |
| "epoch": 2.7294685990338166, | |
| "grad_norm": 5.932509899139404, | |
| "learning_rate": 0.00010917874396135266, | |
| "loss": 28.5013, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 2.7342995169082127, | |
| "grad_norm": 8.786707878112793, | |
| "learning_rate": 0.00010901771336553945, | |
| "loss": 26.5279, | |
| "step": 1132 | |
| }, | |
| { | |
| "epoch": 2.7391304347826084, | |
| "grad_norm": 6.930019855499268, | |
| "learning_rate": 0.00010885668276972624, | |
| "loss": 27.3484, | |
| "step": 1134 | |
| }, | |
| { | |
| "epoch": 2.7439613526570046, | |
| "grad_norm": 7.4109015464782715, | |
| "learning_rate": 0.00010869565217391305, | |
| "loss": 26.4129, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 2.7487922705314007, | |
| "grad_norm": 6.286072731018066, | |
| "learning_rate": 0.00010853462157809984, | |
| "loss": 26.3836, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 2.753623188405797, | |
| "grad_norm": 8.696404457092285, | |
| "learning_rate": 0.00010837359098228663, | |
| "loss": 25.7786, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 2.758454106280193, | |
| "grad_norm": 8.277897834777832, | |
| "learning_rate": 0.00010821256038647343, | |
| "loss": 27.2492, | |
| "step": 1142 | |
| }, | |
| { | |
| "epoch": 2.763285024154589, | |
| "grad_norm": 7.653816223144531, | |
| "learning_rate": 0.00010805152979066022, | |
| "loss": 27.0198, | |
| "step": 1144 | |
| }, | |
| { | |
| "epoch": 2.7681159420289854, | |
| "grad_norm": 7.8368144035339355, | |
| "learning_rate": 0.00010789049919484701, | |
| "loss": 28.3334, | |
| "step": 1146 | |
| }, | |
| { | |
| "epoch": 2.7729468599033815, | |
| "grad_norm": 6.9786529541015625, | |
| "learning_rate": 0.00010772946859903381, | |
| "loss": 26.5917, | |
| "step": 1148 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 7.004583358764648, | |
| "learning_rate": 0.0001075684380032206, | |
| "loss": 26.4706, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 2.782608695652174, | |
| "grad_norm": 8.017105102539062, | |
| "learning_rate": 0.00010740740740740742, | |
| "loss": 28.0672, | |
| "step": 1152 | |
| }, | |
| { | |
| "epoch": 2.78743961352657, | |
| "grad_norm": 6.233907699584961, | |
| "learning_rate": 0.00010724637681159421, | |
| "loss": 27.5043, | |
| "step": 1154 | |
| }, | |
| { | |
| "epoch": 2.792270531400966, | |
| "grad_norm": 7.529089450836182, | |
| "learning_rate": 0.00010708534621578102, | |
| "loss": 25.2191, | |
| "step": 1156 | |
| }, | |
| { | |
| "epoch": 2.7971014492753623, | |
| "grad_norm": 7.839463233947754, | |
| "learning_rate": 0.0001069243156199678, | |
| "loss": 25.6082, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 2.8019323671497585, | |
| "grad_norm": 8.686691284179688, | |
| "learning_rate": 0.00010676328502415461, | |
| "loss": 27.9281, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 2.8067632850241546, | |
| "grad_norm": 6.9186930656433105, | |
| "learning_rate": 0.0001066022544283414, | |
| "loss": 26.3933, | |
| "step": 1162 | |
| }, | |
| { | |
| "epoch": 2.8115942028985508, | |
| "grad_norm": 7.170950889587402, | |
| "learning_rate": 0.00010644122383252819, | |
| "loss": 26.5526, | |
| "step": 1164 | |
| }, | |
| { | |
| "epoch": 2.816425120772947, | |
| "grad_norm": 6.971534729003906, | |
| "learning_rate": 0.00010628019323671499, | |
| "loss": 26.1706, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 2.821256038647343, | |
| "grad_norm": 7.302921295166016, | |
| "learning_rate": 0.00010611916264090178, | |
| "loss": 28.0723, | |
| "step": 1168 | |
| }, | |
| { | |
| "epoch": 2.8260869565217392, | |
| "grad_norm": 7.918272495269775, | |
| "learning_rate": 0.00010595813204508857, | |
| "loss": 25.9546, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 2.8309178743961354, | |
| "grad_norm": 8.934640884399414, | |
| "learning_rate": 0.00010579710144927538, | |
| "loss": 28.0027, | |
| "step": 1172 | |
| }, | |
| { | |
| "epoch": 2.8357487922705316, | |
| "grad_norm": 9.624857902526855, | |
| "learning_rate": 0.00010563607085346217, | |
| "loss": 27.6072, | |
| "step": 1174 | |
| }, | |
| { | |
| "epoch": 2.8405797101449277, | |
| "grad_norm": 7.182722091674805, | |
| "learning_rate": 0.00010547504025764896, | |
| "loss": 25.9444, | |
| "step": 1176 | |
| }, | |
| { | |
| "epoch": 2.845410628019324, | |
| "grad_norm": 8.560644149780273, | |
| "learning_rate": 0.00010531400966183576, | |
| "loss": 24.5426, | |
| "step": 1178 | |
| }, | |
| { | |
| "epoch": 2.85024154589372, | |
| "grad_norm": 7.0820088386535645, | |
| "learning_rate": 0.00010515297906602255, | |
| "loss": 27.1353, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 2.855072463768116, | |
| "grad_norm": 7.135811805725098, | |
| "learning_rate": 0.00010499194847020934, | |
| "loss": 25.9438, | |
| "step": 1182 | |
| }, | |
| { | |
| "epoch": 2.8599033816425123, | |
| "grad_norm": 7.968995571136475, | |
| "learning_rate": 0.00010483091787439614, | |
| "loss": 25.7914, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 2.864734299516908, | |
| "grad_norm": 7.4556193351745605, | |
| "learning_rate": 0.00010466988727858293, | |
| "loss": 28.4208, | |
| "step": 1186 | |
| }, | |
| { | |
| "epoch": 2.869565217391304, | |
| "grad_norm": 8.124032974243164, | |
| "learning_rate": 0.00010450885668276972, | |
| "loss": 26.6249, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 2.8743961352657004, | |
| "grad_norm": 6.682657718658447, | |
| "learning_rate": 0.00010434782608695653, | |
| "loss": 27.7629, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 2.8792270531400965, | |
| "grad_norm": 7.784018516540527, | |
| "learning_rate": 0.00010418679549114332, | |
| "loss": 26.3142, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 2.8840579710144927, | |
| "grad_norm": 6.824240207672119, | |
| "learning_rate": 0.00010402576489533012, | |
| "loss": 26.4967, | |
| "step": 1194 | |
| }, | |
| { | |
| "epoch": 2.888888888888889, | |
| "grad_norm": 6.703210353851318, | |
| "learning_rate": 0.00010386473429951691, | |
| "loss": 27.9698, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 2.893719806763285, | |
| "grad_norm": 7.0591840744018555, | |
| "learning_rate": 0.0001037037037037037, | |
| "loss": 26.4026, | |
| "step": 1198 | |
| }, | |
| { | |
| "epoch": 2.898550724637681, | |
| "grad_norm": 6.3246564865112305, | |
| "learning_rate": 0.0001035426731078905, | |
| "loss": 26.839, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 2.9033816425120773, | |
| "grad_norm": 8.211289405822754, | |
| "learning_rate": 0.00010338164251207729, | |
| "loss": 27.0174, | |
| "step": 1202 | |
| }, | |
| { | |
| "epoch": 2.9082125603864735, | |
| "grad_norm": 6.735382556915283, | |
| "learning_rate": 0.00010322061191626408, | |
| "loss": 26.3102, | |
| "step": 1204 | |
| }, | |
| { | |
| "epoch": 2.9130434782608696, | |
| "grad_norm": 8.0295991897583, | |
| "learning_rate": 0.00010305958132045089, | |
| "loss": 25.7761, | |
| "step": 1206 | |
| }, | |
| { | |
| "epoch": 2.917874396135266, | |
| "grad_norm": 8.097826957702637, | |
| "learning_rate": 0.00010289855072463768, | |
| "loss": 28.9129, | |
| "step": 1208 | |
| }, | |
| { | |
| "epoch": 2.922705314009662, | |
| "grad_norm": 8.124273300170898, | |
| "learning_rate": 0.00010273752012882447, | |
| "loss": 26.1519, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 2.927536231884058, | |
| "grad_norm": 8.470534324645996, | |
| "learning_rate": 0.00010257648953301127, | |
| "loss": 25.6004, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 2.9323671497584543, | |
| "grad_norm": 7.348142147064209, | |
| "learning_rate": 0.00010241545893719809, | |
| "loss": 27.1859, | |
| "step": 1214 | |
| }, | |
| { | |
| "epoch": 2.9371980676328504, | |
| "grad_norm": 8.258639335632324, | |
| "learning_rate": 0.00010225442834138488, | |
| "loss": 24.1802, | |
| "step": 1216 | |
| }, | |
| { | |
| "epoch": 2.942028985507246, | |
| "grad_norm": 8.160893440246582, | |
| "learning_rate": 0.00010209339774557167, | |
| "loss": 26.3956, | |
| "step": 1218 | |
| }, | |
| { | |
| "epoch": 2.9468599033816423, | |
| "grad_norm": 7.1116814613342285, | |
| "learning_rate": 0.00010193236714975847, | |
| "loss": 25.9712, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.9516908212560384, | |
| "grad_norm": 6.059470176696777, | |
| "learning_rate": 0.00010177133655394526, | |
| "loss": 27.1363, | |
| "step": 1222 | |
| }, | |
| { | |
| "epoch": 2.9565217391304346, | |
| "grad_norm": 7.71455192565918, | |
| "learning_rate": 0.00010161030595813205, | |
| "loss": 26.5133, | |
| "step": 1224 | |
| }, | |
| { | |
| "epoch": 2.9613526570048307, | |
| "grad_norm": 9.131839752197266, | |
| "learning_rate": 0.00010144927536231885, | |
| "loss": 27.7297, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 2.966183574879227, | |
| "grad_norm": 6.740046977996826, | |
| "learning_rate": 0.00010128824476650564, | |
| "loss": 25.8968, | |
| "step": 1228 | |
| }, | |
| { | |
| "epoch": 2.971014492753623, | |
| "grad_norm": 7.255392074584961, | |
| "learning_rate": 0.00010112721417069245, | |
| "loss": 26.343, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.975845410628019, | |
| "grad_norm": 7.241657733917236, | |
| "learning_rate": 0.00010096618357487924, | |
| "loss": 26.2671, | |
| "step": 1232 | |
| }, | |
| { | |
| "epoch": 2.9806763285024154, | |
| "grad_norm": 8.625435829162598, | |
| "learning_rate": 0.00010080515297906603, | |
| "loss": 26.2536, | |
| "step": 1234 | |
| }, | |
| { | |
| "epoch": 2.9855072463768115, | |
| "grad_norm": 7.044302940368652, | |
| "learning_rate": 0.00010064412238325283, | |
| "loss": 27.3368, | |
| "step": 1236 | |
| }, | |
| { | |
| "epoch": 2.9903381642512077, | |
| "grad_norm": 7.077991485595703, | |
| "learning_rate": 0.00010048309178743962, | |
| "loss": 28.0877, | |
| "step": 1238 | |
| }, | |
| { | |
| "epoch": 2.995169082125604, | |
| "grad_norm": 7.624186992645264, | |
| "learning_rate": 0.00010032206119162641, | |
| "loss": 27.3155, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 7.315317630767822, | |
| "learning_rate": 0.00010016103059581321, | |
| "loss": 26.3495, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 3.004830917874396, | |
| "grad_norm": 6.168877124786377, | |
| "learning_rate": 0.0001, | |
| "loss": 27.1989, | |
| "step": 1244 | |
| }, | |
| { | |
| "epoch": 3.0096618357487923, | |
| "grad_norm": 7.338534832000732, | |
| "learning_rate": 9.98389694041868e-05, | |
| "loss": 26.6896, | |
| "step": 1246 | |
| }, | |
| { | |
| "epoch": 3.0144927536231885, | |
| "grad_norm": 7.950836658477783, | |
| "learning_rate": 9.96779388083736e-05, | |
| "loss": 26.1743, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 3.0193236714975846, | |
| "grad_norm": 7.836818218231201, | |
| "learning_rate": 9.951690821256039e-05, | |
| "loss": 24.6431, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.024154589371981, | |
| "grad_norm": 7.391972064971924, | |
| "learning_rate": 9.935587761674718e-05, | |
| "loss": 26.9987, | |
| "step": 1252 | |
| }, | |
| { | |
| "epoch": 3.028985507246377, | |
| "grad_norm": 6.927128314971924, | |
| "learning_rate": 9.919484702093398e-05, | |
| "loss": 26.3314, | |
| "step": 1254 | |
| }, | |
| { | |
| "epoch": 3.033816425120773, | |
| "grad_norm": 6.5931267738342285, | |
| "learning_rate": 9.903381642512077e-05, | |
| "loss": 28.486, | |
| "step": 1256 | |
| }, | |
| { | |
| "epoch": 3.0386473429951693, | |
| "grad_norm": 6.712624549865723, | |
| "learning_rate": 9.887278582930756e-05, | |
| "loss": 23.3735, | |
| "step": 1258 | |
| }, | |
| { | |
| "epoch": 3.0434782608695654, | |
| "grad_norm": 7.244742393493652, | |
| "learning_rate": 9.871175523349438e-05, | |
| "loss": 28.1393, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 3.0483091787439616, | |
| "grad_norm": 7.571489334106445, | |
| "learning_rate": 9.855072463768117e-05, | |
| "loss": 26.1208, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 3.0531400966183573, | |
| "grad_norm": 7.6882643699646, | |
| "learning_rate": 9.838969404186796e-05, | |
| "loss": 25.3927, | |
| "step": 1264 | |
| }, | |
| { | |
| "epoch": 3.0579710144927534, | |
| "grad_norm": 7.103066444396973, | |
| "learning_rate": 9.822866344605476e-05, | |
| "loss": 25.6778, | |
| "step": 1266 | |
| }, | |
| { | |
| "epoch": 3.0628019323671496, | |
| "grad_norm": 7.564841270446777, | |
| "learning_rate": 9.806763285024155e-05, | |
| "loss": 26.3471, | |
| "step": 1268 | |
| }, | |
| { | |
| "epoch": 3.0676328502415457, | |
| "grad_norm": 7.3738508224487305, | |
| "learning_rate": 9.790660225442834e-05, | |
| "loss": 26.4939, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 3.072463768115942, | |
| "grad_norm": 8.300433158874512, | |
| "learning_rate": 9.774557165861515e-05, | |
| "loss": 27.4497, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 3.077294685990338, | |
| "grad_norm": 6.373605251312256, | |
| "learning_rate": 9.758454106280194e-05, | |
| "loss": 27.1139, | |
| "step": 1274 | |
| }, | |
| { | |
| "epoch": 3.082125603864734, | |
| "grad_norm": 7.21131706237793, | |
| "learning_rate": 9.742351046698873e-05, | |
| "loss": 25.3131, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 3.0869565217391304, | |
| "grad_norm": 7.3897504806518555, | |
| "learning_rate": 9.726247987117553e-05, | |
| "loss": 24.7751, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 3.0917874396135265, | |
| "grad_norm": 6.666619777679443, | |
| "learning_rate": 9.710144927536232e-05, | |
| "loss": 25.6616, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 3.0966183574879227, | |
| "grad_norm": 6.16898250579834, | |
| "learning_rate": 9.694041867954912e-05, | |
| "loss": 23.4636, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 3.101449275362319, | |
| "grad_norm": 6.940250396728516, | |
| "learning_rate": 9.677938808373591e-05, | |
| "loss": 27.0285, | |
| "step": 1284 | |
| }, | |
| { | |
| "epoch": 3.106280193236715, | |
| "grad_norm": 8.428845405578613, | |
| "learning_rate": 9.66183574879227e-05, | |
| "loss": 26.6035, | |
| "step": 1286 | |
| }, | |
| { | |
| "epoch": 3.111111111111111, | |
| "grad_norm": 7.685654640197754, | |
| "learning_rate": 9.64573268921095e-05, | |
| "loss": 27.0342, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 3.1159420289855073, | |
| "grad_norm": 8.046797752380371, | |
| "learning_rate": 9.62962962962963e-05, | |
| "loss": 26.7352, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 3.1207729468599035, | |
| "grad_norm": 7.739950180053711, | |
| "learning_rate": 9.61352657004831e-05, | |
| "loss": 27.607, | |
| "step": 1292 | |
| }, | |
| { | |
| "epoch": 3.1256038647342996, | |
| "grad_norm": 8.301579475402832, | |
| "learning_rate": 9.597423510466989e-05, | |
| "loss": 26.7545, | |
| "step": 1294 | |
| }, | |
| { | |
| "epoch": 3.130434782608696, | |
| "grad_norm": 7.416752338409424, | |
| "learning_rate": 9.58132045088567e-05, | |
| "loss": 25.5911, | |
| "step": 1296 | |
| }, | |
| { | |
| "epoch": 3.135265700483092, | |
| "grad_norm": 7.364454746246338, | |
| "learning_rate": 9.565217391304348e-05, | |
| "loss": 25.8314, | |
| "step": 1298 | |
| }, | |
| { | |
| "epoch": 3.140096618357488, | |
| "grad_norm": 7.930257797241211, | |
| "learning_rate": 9.549114331723029e-05, | |
| "loss": 23.9096, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.1449275362318843, | |
| "grad_norm": 6.694441795349121, | |
| "learning_rate": 9.533011272141708e-05, | |
| "loss": 26.1153, | |
| "step": 1302 | |
| }, | |
| { | |
| "epoch": 3.14975845410628, | |
| "grad_norm": 6.781352996826172, | |
| "learning_rate": 9.516908212560387e-05, | |
| "loss": 26.7928, | |
| "step": 1304 | |
| }, | |
| { | |
| "epoch": 3.154589371980676, | |
| "grad_norm": 6.676225662231445, | |
| "learning_rate": 9.500805152979067e-05, | |
| "loss": 27.0461, | |
| "step": 1306 | |
| }, | |
| { | |
| "epoch": 3.1594202898550723, | |
| "grad_norm": 7.4368767738342285, | |
| "learning_rate": 9.484702093397746e-05, | |
| "loss": 26.7284, | |
| "step": 1308 | |
| }, | |
| { | |
| "epoch": 3.1642512077294684, | |
| "grad_norm": 7.008518695831299, | |
| "learning_rate": 9.468599033816425e-05, | |
| "loss": 27.4804, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 3.1690821256038646, | |
| "grad_norm": 7.6441850662231445, | |
| "learning_rate": 9.452495974235105e-05, | |
| "loss": 27.3274, | |
| "step": 1312 | |
| }, | |
| { | |
| "epoch": 3.1739130434782608, | |
| "grad_norm": 7.242411136627197, | |
| "learning_rate": 9.436392914653784e-05, | |
| "loss": 24.6985, | |
| "step": 1314 | |
| }, | |
| { | |
| "epoch": 3.178743961352657, | |
| "grad_norm": 6.712805271148682, | |
| "learning_rate": 9.420289855072463e-05, | |
| "loss": 25.8327, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 3.183574879227053, | |
| "grad_norm": 6.724958419799805, | |
| "learning_rate": 9.404186795491144e-05, | |
| "loss": 26.9393, | |
| "step": 1318 | |
| }, | |
| { | |
| "epoch": 3.1884057971014492, | |
| "grad_norm": 7.451432228088379, | |
| "learning_rate": 9.388083735909823e-05, | |
| "loss": 25.1356, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 3.1932367149758454, | |
| "grad_norm": 7.7775421142578125, | |
| "learning_rate": 9.371980676328503e-05, | |
| "loss": 26.6738, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 3.1980676328502415, | |
| "grad_norm": 7.692890167236328, | |
| "learning_rate": 9.355877616747183e-05, | |
| "loss": 25.1647, | |
| "step": 1324 | |
| }, | |
| { | |
| "epoch": 3.2028985507246377, | |
| "grad_norm": 7.3927812576293945, | |
| "learning_rate": 9.339774557165862e-05, | |
| "loss": 26.9764, | |
| "step": 1326 | |
| }, | |
| { | |
| "epoch": 3.207729468599034, | |
| "grad_norm": 7.326320171356201, | |
| "learning_rate": 9.323671497584541e-05, | |
| "loss": 25.9006, | |
| "step": 1328 | |
| }, | |
| { | |
| "epoch": 3.21256038647343, | |
| "grad_norm": 8.4861421585083, | |
| "learning_rate": 9.307568438003222e-05, | |
| "loss": 26.148, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 3.217391304347826, | |
| "grad_norm": 8.520912170410156, | |
| "learning_rate": 9.291465378421901e-05, | |
| "loss": 26.3554, | |
| "step": 1332 | |
| }, | |
| { | |
| "epoch": 3.2222222222222223, | |
| "grad_norm": 7.051355361938477, | |
| "learning_rate": 9.27536231884058e-05, | |
| "loss": 26.3572, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 3.2270531400966185, | |
| "grad_norm": 9.287524223327637, | |
| "learning_rate": 9.25925925925926e-05, | |
| "loss": 25.9726, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 3.2318840579710146, | |
| "grad_norm": 7.160129070281982, | |
| "learning_rate": 9.243156199677939e-05, | |
| "loss": 28.7179, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 3.236714975845411, | |
| "grad_norm": 7.048616886138916, | |
| "learning_rate": 9.227053140096618e-05, | |
| "loss": 25.7061, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 3.241545893719807, | |
| "grad_norm": 7.782952785491943, | |
| "learning_rate": 9.210950080515299e-05, | |
| "loss": 26.6252, | |
| "step": 1342 | |
| }, | |
| { | |
| "epoch": 3.246376811594203, | |
| "grad_norm": 8.396957397460938, | |
| "learning_rate": 9.194847020933978e-05, | |
| "loss": 25.4261, | |
| "step": 1344 | |
| }, | |
| { | |
| "epoch": 3.2512077294685993, | |
| "grad_norm": 7.221895217895508, | |
| "learning_rate": 9.178743961352657e-05, | |
| "loss": 25.6437, | |
| "step": 1346 | |
| }, | |
| { | |
| "epoch": 3.2560386473429954, | |
| "grad_norm": 7.694455146789551, | |
| "learning_rate": 9.162640901771337e-05, | |
| "loss": 26.2562, | |
| "step": 1348 | |
| }, | |
| { | |
| "epoch": 3.260869565217391, | |
| "grad_norm": 7.642673492431641, | |
| "learning_rate": 9.146537842190016e-05, | |
| "loss": 25.1317, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 3.2657004830917873, | |
| "grad_norm": 6.599581241607666, | |
| "learning_rate": 9.130434782608696e-05, | |
| "loss": 25.9692, | |
| "step": 1352 | |
| }, | |
| { | |
| "epoch": 3.2705314009661834, | |
| "grad_norm": 8.950820922851562, | |
| "learning_rate": 9.114331723027377e-05, | |
| "loss": 27.4472, | |
| "step": 1354 | |
| }, | |
| { | |
| "epoch": 3.2753623188405796, | |
| "grad_norm": 6.30159854888916, | |
| "learning_rate": 9.098228663446056e-05, | |
| "loss": 25.9316, | |
| "step": 1356 | |
| }, | |
| { | |
| "epoch": 3.2801932367149758, | |
| "grad_norm": 6.927635192871094, | |
| "learning_rate": 9.082125603864735e-05, | |
| "loss": 27.5305, | |
| "step": 1358 | |
| }, | |
| { | |
| "epoch": 3.285024154589372, | |
| "grad_norm": 6.424526214599609, | |
| "learning_rate": 9.066022544283415e-05, | |
| "loss": 26.0866, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.289855072463768, | |
| "grad_norm": 8.260842323303223, | |
| "learning_rate": 9.049919484702094e-05, | |
| "loss": 25.4734, | |
| "step": 1362 | |
| }, | |
| { | |
| "epoch": 3.2946859903381642, | |
| "grad_norm": 6.600332736968994, | |
| "learning_rate": 9.033816425120773e-05, | |
| "loss": 25.1304, | |
| "step": 1364 | |
| }, | |
| { | |
| "epoch": 3.2995169082125604, | |
| "grad_norm": 6.801137447357178, | |
| "learning_rate": 9.017713365539453e-05, | |
| "loss": 27.4591, | |
| "step": 1366 | |
| }, | |
| { | |
| "epoch": 3.3043478260869565, | |
| "grad_norm": 7.686280250549316, | |
| "learning_rate": 9.001610305958132e-05, | |
| "loss": 26.2466, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 3.3091787439613527, | |
| "grad_norm": 6.084709644317627, | |
| "learning_rate": 8.985507246376813e-05, | |
| "loss": 25.2827, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.314009661835749, | |
| "grad_norm": 7.699804306030273, | |
| "learning_rate": 8.969404186795492e-05, | |
| "loss": 28.068, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 3.318840579710145, | |
| "grad_norm": 8.359792709350586, | |
| "learning_rate": 8.95330112721417e-05, | |
| "loss": 28.9643, | |
| "step": 1374 | |
| }, | |
| { | |
| "epoch": 3.323671497584541, | |
| "grad_norm": 7.701099872589111, | |
| "learning_rate": 8.937198067632851e-05, | |
| "loss": 26.1439, | |
| "step": 1376 | |
| }, | |
| { | |
| "epoch": 3.3285024154589373, | |
| "grad_norm": 8.339729309082031, | |
| "learning_rate": 8.92109500805153e-05, | |
| "loss": 26.0983, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 3.3333333333333335, | |
| "grad_norm": 8.924784660339355, | |
| "learning_rate": 8.904991948470209e-05, | |
| "loss": 25.8818, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.3381642512077296, | |
| "grad_norm": 8.396602630615234, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 27.7536, | |
| "step": 1382 | |
| }, | |
| { | |
| "epoch": 3.342995169082126, | |
| "grad_norm": 8.177582740783691, | |
| "learning_rate": 8.87278582930757e-05, | |
| "loss": 25.7908, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 3.3478260869565215, | |
| "grad_norm": 6.711874008178711, | |
| "learning_rate": 8.856682769726249e-05, | |
| "loss": 27.9945, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 3.3526570048309177, | |
| "grad_norm": 6.735175132751465, | |
| "learning_rate": 8.840579710144929e-05, | |
| "loss": 27.7595, | |
| "step": 1388 | |
| }, | |
| { | |
| "epoch": 3.357487922705314, | |
| "grad_norm": 8.890625, | |
| "learning_rate": 8.824476650563608e-05, | |
| "loss": 25.7886, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.36231884057971, | |
| "grad_norm": 7.918723106384277, | |
| "learning_rate": 8.808373590982287e-05, | |
| "loss": 27.3296, | |
| "step": 1392 | |
| }, | |
| { | |
| "epoch": 3.367149758454106, | |
| "grad_norm": 8.405486106872559, | |
| "learning_rate": 8.792270531400967e-05, | |
| "loss": 24.8263, | |
| "step": 1394 | |
| }, | |
| { | |
| "epoch": 3.3719806763285023, | |
| "grad_norm": 7.2000837326049805, | |
| "learning_rate": 8.776167471819646e-05, | |
| "loss": 27.6412, | |
| "step": 1396 | |
| }, | |
| { | |
| "epoch": 3.3768115942028984, | |
| "grad_norm": 9.657790184020996, | |
| "learning_rate": 8.760064412238325e-05, | |
| "loss": 24.8264, | |
| "step": 1398 | |
| }, | |
| { | |
| "epoch": 3.3816425120772946, | |
| "grad_norm": 7.06240177154541, | |
| "learning_rate": 8.743961352657006e-05, | |
| "loss": 26.162, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.3864734299516908, | |
| "grad_norm": 7.3674116134643555, | |
| "learning_rate": 8.727858293075685e-05, | |
| "loss": 27.8042, | |
| "step": 1402 | |
| }, | |
| { | |
| "epoch": 3.391304347826087, | |
| "grad_norm": 7.9507737159729, | |
| "learning_rate": 8.711755233494364e-05, | |
| "loss": 26.6252, | |
| "step": 1404 | |
| }, | |
| { | |
| "epoch": 3.396135265700483, | |
| "grad_norm": 8.195547103881836, | |
| "learning_rate": 8.695652173913044e-05, | |
| "loss": 26.886, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 3.4009661835748792, | |
| "grad_norm": 7.462141513824463, | |
| "learning_rate": 8.679549114331723e-05, | |
| "loss": 27.8522, | |
| "step": 1408 | |
| }, | |
| { | |
| "epoch": 3.4057971014492754, | |
| "grad_norm": 7.903439521789551, | |
| "learning_rate": 8.663446054750402e-05, | |
| "loss": 26.1915, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.4106280193236715, | |
| "grad_norm": 7.791518211364746, | |
| "learning_rate": 8.647342995169082e-05, | |
| "loss": 27.6484, | |
| "step": 1412 | |
| }, | |
| { | |
| "epoch": 3.4154589371980677, | |
| "grad_norm": 7.624407768249512, | |
| "learning_rate": 8.631239935587761e-05, | |
| "loss": 28.3851, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 3.420289855072464, | |
| "grad_norm": 7.524753570556641, | |
| "learning_rate": 8.615136876006442e-05, | |
| "loss": 25.3125, | |
| "step": 1416 | |
| }, | |
| { | |
| "epoch": 3.42512077294686, | |
| "grad_norm": 8.102710723876953, | |
| "learning_rate": 8.599033816425122e-05, | |
| "loss": 24.9681, | |
| "step": 1418 | |
| }, | |
| { | |
| "epoch": 3.429951690821256, | |
| "grad_norm": 6.52889347076416, | |
| "learning_rate": 8.582930756843801e-05, | |
| "loss": 27.6317, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.4347826086956523, | |
| "grad_norm": 8.491759300231934, | |
| "learning_rate": 8.56682769726248e-05, | |
| "loss": 26.7627, | |
| "step": 1422 | |
| }, | |
| { | |
| "epoch": 3.4396135265700485, | |
| "grad_norm": 8.082484245300293, | |
| "learning_rate": 8.55072463768116e-05, | |
| "loss": 25.5842, | |
| "step": 1424 | |
| }, | |
| { | |
| "epoch": 3.4444444444444446, | |
| "grad_norm": 8.158738136291504, | |
| "learning_rate": 8.53462157809984e-05, | |
| "loss": 27.7775, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 3.449275362318841, | |
| "grad_norm": 6.948888778686523, | |
| "learning_rate": 8.518518518518518e-05, | |
| "loss": 24.269, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 3.454106280193237, | |
| "grad_norm": 7.217655181884766, | |
| "learning_rate": 8.502415458937199e-05, | |
| "loss": 27.2054, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.4589371980676327, | |
| "grad_norm": 7.5419440269470215, | |
| "learning_rate": 8.486312399355878e-05, | |
| "loss": 25.9815, | |
| "step": 1432 | |
| }, | |
| { | |
| "epoch": 3.463768115942029, | |
| "grad_norm": 7.58052921295166, | |
| "learning_rate": 8.470209339774557e-05, | |
| "loss": 27.4913, | |
| "step": 1434 | |
| }, | |
| { | |
| "epoch": 3.468599033816425, | |
| "grad_norm": 7.221286296844482, | |
| "learning_rate": 8.454106280193237e-05, | |
| "loss": 26.7118, | |
| "step": 1436 | |
| }, | |
| { | |
| "epoch": 3.473429951690821, | |
| "grad_norm": 7.131877899169922, | |
| "learning_rate": 8.438003220611916e-05, | |
| "loss": 27.0661, | |
| "step": 1438 | |
| }, | |
| { | |
| "epoch": 3.4782608695652173, | |
| "grad_norm": 6.600888729095459, | |
| "learning_rate": 8.421900161030597e-05, | |
| "loss": 24.237, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.4830917874396135, | |
| "grad_norm": 7.91683292388916, | |
| "learning_rate": 8.405797101449276e-05, | |
| "loss": 24.8586, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 3.4879227053140096, | |
| "grad_norm": 6.824517250061035, | |
| "learning_rate": 8.389694041867955e-05, | |
| "loss": 26.9819, | |
| "step": 1444 | |
| }, | |
| { | |
| "epoch": 3.4927536231884058, | |
| "grad_norm": 6.753680229187012, | |
| "learning_rate": 8.373590982286635e-05, | |
| "loss": 26.0397, | |
| "step": 1446 | |
| }, | |
| { | |
| "epoch": 3.497584541062802, | |
| "grad_norm": 7.486673831939697, | |
| "learning_rate": 8.357487922705315e-05, | |
| "loss": 26.7425, | |
| "step": 1448 | |
| }, | |
| { | |
| "epoch": 3.502415458937198, | |
| "grad_norm": 8.475358009338379, | |
| "learning_rate": 8.341384863123994e-05, | |
| "loss": 26.1292, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.5072463768115942, | |
| "grad_norm": 6.859409332275391, | |
| "learning_rate": 8.325281803542673e-05, | |
| "loss": 26.4357, | |
| "step": 1452 | |
| }, | |
| { | |
| "epoch": 3.5120772946859904, | |
| "grad_norm": 7.169741630554199, | |
| "learning_rate": 8.309178743961354e-05, | |
| "loss": 27.2822, | |
| "step": 1454 | |
| }, | |
| { | |
| "epoch": 3.5169082125603865, | |
| "grad_norm": 8.31079387664795, | |
| "learning_rate": 8.293075684380033e-05, | |
| "loss": 27.7764, | |
| "step": 1456 | |
| }, | |
| { | |
| "epoch": 3.5217391304347827, | |
| "grad_norm": 6.888429164886475, | |
| "learning_rate": 8.276972624798713e-05, | |
| "loss": 26.5406, | |
| "step": 1458 | |
| }, | |
| { | |
| "epoch": 3.526570048309179, | |
| "grad_norm": 7.568389892578125, | |
| "learning_rate": 8.260869565217392e-05, | |
| "loss": 25.9647, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.531400966183575, | |
| "grad_norm": 6.64613151550293, | |
| "learning_rate": 8.244766505636071e-05, | |
| "loss": 26.9271, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 3.536231884057971, | |
| "grad_norm": 6.534989833831787, | |
| "learning_rate": 8.228663446054751e-05, | |
| "loss": 26.2684, | |
| "step": 1464 | |
| }, | |
| { | |
| "epoch": 3.541062801932367, | |
| "grad_norm": 7.926050662994385, | |
| "learning_rate": 8.21256038647343e-05, | |
| "loss": 26.845, | |
| "step": 1466 | |
| }, | |
| { | |
| "epoch": 3.545893719806763, | |
| "grad_norm": 7.452934741973877, | |
| "learning_rate": 8.196457326892109e-05, | |
| "loss": 25.8417, | |
| "step": 1468 | |
| }, | |
| { | |
| "epoch": 3.550724637681159, | |
| "grad_norm": 7.26784086227417, | |
| "learning_rate": 8.18035426731079e-05, | |
| "loss": 26.0035, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 3.5555555555555554, | |
| "grad_norm": 7.318904399871826, | |
| "learning_rate": 8.164251207729469e-05, | |
| "loss": 27.4574, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 3.5603864734299515, | |
| "grad_norm": 6.999464511871338, | |
| "learning_rate": 8.148148148148148e-05, | |
| "loss": 25.949, | |
| "step": 1474 | |
| }, | |
| { | |
| "epoch": 3.5652173913043477, | |
| "grad_norm": 7.244204044342041, | |
| "learning_rate": 8.132045088566828e-05, | |
| "loss": 26.636, | |
| "step": 1476 | |
| }, | |
| { | |
| "epoch": 3.570048309178744, | |
| "grad_norm": 8.60554027557373, | |
| "learning_rate": 8.115942028985508e-05, | |
| "loss": 27.4116, | |
| "step": 1478 | |
| }, | |
| { | |
| "epoch": 3.57487922705314, | |
| "grad_norm": 6.701752662658691, | |
| "learning_rate": 8.099838969404187e-05, | |
| "loss": 25.0194, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 3.579710144927536, | |
| "grad_norm": 6.613931655883789, | |
| "learning_rate": 8.083735909822868e-05, | |
| "loss": 25.7569, | |
| "step": 1482 | |
| }, | |
| { | |
| "epoch": 3.5845410628019323, | |
| "grad_norm": 7.828546524047852, | |
| "learning_rate": 8.067632850241547e-05, | |
| "loss": 27.1483, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 3.5893719806763285, | |
| "grad_norm": 7.983916282653809, | |
| "learning_rate": 8.051529790660226e-05, | |
| "loss": 24.6017, | |
| "step": 1486 | |
| }, | |
| { | |
| "epoch": 3.5942028985507246, | |
| "grad_norm": 8.500826835632324, | |
| "learning_rate": 8.035426731078906e-05, | |
| "loss": 26.6575, | |
| "step": 1488 | |
| }, | |
| { | |
| "epoch": 3.5990338164251208, | |
| "grad_norm": 8.88049030303955, | |
| "learning_rate": 8.019323671497585e-05, | |
| "loss": 23.7421, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 3.603864734299517, | |
| "grad_norm": 7.034642696380615, | |
| "learning_rate": 8.003220611916264e-05, | |
| "loss": 27.8291, | |
| "step": 1492 | |
| }, | |
| { | |
| "epoch": 3.608695652173913, | |
| "grad_norm": 7.1023077964782715, | |
| "learning_rate": 7.987117552334944e-05, | |
| "loss": 26.7066, | |
| "step": 1494 | |
| }, | |
| { | |
| "epoch": 3.6135265700483092, | |
| "grad_norm": 8.332448959350586, | |
| "learning_rate": 7.971014492753623e-05, | |
| "loss": 25.7769, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 3.6183574879227054, | |
| "grad_norm": 7.105356693267822, | |
| "learning_rate": 7.954911433172302e-05, | |
| "loss": 25.7133, | |
| "step": 1498 | |
| }, | |
| { | |
| "epoch": 3.6231884057971016, | |
| "grad_norm": 7.028257369995117, | |
| "learning_rate": 7.938808373590983e-05, | |
| "loss": 25.0051, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 3.6280193236714977, | |
| "grad_norm": 7.71824312210083, | |
| "learning_rate": 7.922705314009662e-05, | |
| "loss": 25.9737, | |
| "step": 1502 | |
| }, | |
| { | |
| "epoch": 3.632850241545894, | |
| "grad_norm": 5.497483253479004, | |
| "learning_rate": 7.906602254428341e-05, | |
| "loss": 27.4592, | |
| "step": 1504 | |
| }, | |
| { | |
| "epoch": 3.63768115942029, | |
| "grad_norm": 8.458606719970703, | |
| "learning_rate": 7.890499194847021e-05, | |
| "loss": 24.0378, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 3.642512077294686, | |
| "grad_norm": 8.406185150146484, | |
| "learning_rate": 7.874396135265701e-05, | |
| "loss": 26.3229, | |
| "step": 1508 | |
| }, | |
| { | |
| "epoch": 3.6473429951690823, | |
| "grad_norm": 7.685035228729248, | |
| "learning_rate": 7.85829307568438e-05, | |
| "loss": 25.97, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 3.6521739130434785, | |
| "grad_norm": 8.686131477355957, | |
| "learning_rate": 7.842190016103061e-05, | |
| "loss": 26.591, | |
| "step": 1512 | |
| }, | |
| { | |
| "epoch": 3.6570048309178746, | |
| "grad_norm": 6.984585285186768, | |
| "learning_rate": 7.82608695652174e-05, | |
| "loss": 25.8358, | |
| "step": 1514 | |
| }, | |
| { | |
| "epoch": 3.661835748792271, | |
| "grad_norm": 5.834330081939697, | |
| "learning_rate": 7.809983896940419e-05, | |
| "loss": 26.1456, | |
| "step": 1516 | |
| }, | |
| { | |
| "epoch": 3.6666666666666665, | |
| "grad_norm": 6.367677688598633, | |
| "learning_rate": 7.793880837359099e-05, | |
| "loss": 26.5751, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 3.6714975845410627, | |
| "grad_norm": 6.723855018615723, | |
| "learning_rate": 7.777777777777778e-05, | |
| "loss": 26.5181, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 3.676328502415459, | |
| "grad_norm": 6.305589199066162, | |
| "learning_rate": 7.761674718196457e-05, | |
| "loss": 25.6111, | |
| "step": 1522 | |
| }, | |
| { | |
| "epoch": 3.681159420289855, | |
| "grad_norm": 6.444118976593018, | |
| "learning_rate": 7.745571658615138e-05, | |
| "loss": 25.0445, | |
| "step": 1524 | |
| }, | |
| { | |
| "epoch": 3.685990338164251, | |
| "grad_norm": 7.176147937774658, | |
| "learning_rate": 7.729468599033817e-05, | |
| "loss": 25.2998, | |
| "step": 1526 | |
| }, | |
| { | |
| "epoch": 3.6908212560386473, | |
| "grad_norm": 8.422863006591797, | |
| "learning_rate": 7.713365539452497e-05, | |
| "loss": 27.3324, | |
| "step": 1528 | |
| }, | |
| { | |
| "epoch": 3.6956521739130435, | |
| "grad_norm": 6.9630913734436035, | |
| "learning_rate": 7.697262479871176e-05, | |
| "loss": 25.7144, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 3.7004830917874396, | |
| "grad_norm": 5.846348285675049, | |
| "learning_rate": 7.681159420289855e-05, | |
| "loss": 27.7589, | |
| "step": 1532 | |
| }, | |
| { | |
| "epoch": 3.7053140096618358, | |
| "grad_norm": 7.343765735626221, | |
| "learning_rate": 7.665056360708535e-05, | |
| "loss": 25.8322, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 3.710144927536232, | |
| "grad_norm": 6.997490882873535, | |
| "learning_rate": 7.648953301127214e-05, | |
| "loss": 28.2401, | |
| "step": 1536 | |
| }, | |
| { | |
| "epoch": 3.714975845410628, | |
| "grad_norm": 6.830377101898193, | |
| "learning_rate": 7.632850241545893e-05, | |
| "loss": 25.1853, | |
| "step": 1538 | |
| }, | |
| { | |
| "epoch": 3.7198067632850242, | |
| "grad_norm": 7.353569030761719, | |
| "learning_rate": 7.616747181964574e-05, | |
| "loss": 27.8896, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 3.7246376811594204, | |
| "grad_norm": 6.923029899597168, | |
| "learning_rate": 7.600644122383254e-05, | |
| "loss": 27.466, | |
| "step": 1542 | |
| }, | |
| { | |
| "epoch": 3.7294685990338166, | |
| "grad_norm": 7.982540607452393, | |
| "learning_rate": 7.584541062801933e-05, | |
| "loss": 26.5827, | |
| "step": 1544 | |
| }, | |
| { | |
| "epoch": 3.7342995169082127, | |
| "grad_norm": 7.8132758140563965, | |
| "learning_rate": 7.568438003220612e-05, | |
| "loss": 25.5102, | |
| "step": 1546 | |
| }, | |
| { | |
| "epoch": 3.7391304347826084, | |
| "grad_norm": 6.979062557220459, | |
| "learning_rate": 7.552334943639292e-05, | |
| "loss": 26.6007, | |
| "step": 1548 | |
| }, | |
| { | |
| "epoch": 3.7439613526570046, | |
| "grad_norm": 6.988529682159424, | |
| "learning_rate": 7.536231884057971e-05, | |
| "loss": 27.2199, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 3.7487922705314007, | |
| "grad_norm": 6.884960174560547, | |
| "learning_rate": 7.520128824476652e-05, | |
| "loss": 28.3951, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 3.753623188405797, | |
| "grad_norm": 7.593159198760986, | |
| "learning_rate": 7.50402576489533e-05, | |
| "loss": 26.4899, | |
| "step": 1554 | |
| }, | |
| { | |
| "epoch": 3.758454106280193, | |
| "grad_norm": 7.603058815002441, | |
| "learning_rate": 7.48792270531401e-05, | |
| "loss": 25.2797, | |
| "step": 1556 | |
| }, | |
| { | |
| "epoch": 3.763285024154589, | |
| "grad_norm": 8.542155265808105, | |
| "learning_rate": 7.47181964573269e-05, | |
| "loss": 25.7991, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 3.7681159420289854, | |
| "grad_norm": 7.652464389801025, | |
| "learning_rate": 7.455716586151369e-05, | |
| "loss": 26.4845, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 3.7729468599033815, | |
| "grad_norm": 8.047564506530762, | |
| "learning_rate": 7.439613526570048e-05, | |
| "loss": 26.0127, | |
| "step": 1562 | |
| }, | |
| { | |
| "epoch": 3.7777777777777777, | |
| "grad_norm": 6.38883113861084, | |
| "learning_rate": 7.423510466988728e-05, | |
| "loss": 27.0549, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 3.782608695652174, | |
| "grad_norm": 6.353972434997559, | |
| "learning_rate": 7.407407407407407e-05, | |
| "loss": 25.2923, | |
| "step": 1566 | |
| }, | |
| { | |
| "epoch": 3.78743961352657, | |
| "grad_norm": 6.962271690368652, | |
| "learning_rate": 7.391304347826086e-05, | |
| "loss": 27.8826, | |
| "step": 1568 | |
| }, | |
| { | |
| "epoch": 3.792270531400966, | |
| "grad_norm": 6.521156311035156, | |
| "learning_rate": 7.375201288244767e-05, | |
| "loss": 28.1107, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 3.7971014492753623, | |
| "grad_norm": 8.195451736450195, | |
| "learning_rate": 7.359098228663447e-05, | |
| "loss": 26.5253, | |
| "step": 1572 | |
| }, | |
| { | |
| "epoch": 3.8019323671497585, | |
| "grad_norm": 6.806168556213379, | |
| "learning_rate": 7.342995169082126e-05, | |
| "loss": 27.1728, | |
| "step": 1574 | |
| }, | |
| { | |
| "epoch": 3.8067632850241546, | |
| "grad_norm": 7.229825973510742, | |
| "learning_rate": 7.326892109500806e-05, | |
| "loss": 25.333, | |
| "step": 1576 | |
| }, | |
| { | |
| "epoch": 3.8115942028985508, | |
| "grad_norm": 6.635615825653076, | |
| "learning_rate": 7.310789049919485e-05, | |
| "loss": 27.0733, | |
| "step": 1578 | |
| }, | |
| { | |
| "epoch": 3.816425120772947, | |
| "grad_norm": 6.50180721282959, | |
| "learning_rate": 7.294685990338164e-05, | |
| "loss": 27.4529, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 3.821256038647343, | |
| "grad_norm": 7.335048675537109, | |
| "learning_rate": 7.278582930756845e-05, | |
| "loss": 25.6855, | |
| "step": 1582 | |
| }, | |
| { | |
| "epoch": 3.8260869565217392, | |
| "grad_norm": 6.961329460144043, | |
| "learning_rate": 7.262479871175524e-05, | |
| "loss": 26.0103, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 3.8309178743961354, | |
| "grad_norm": 6.842545986175537, | |
| "learning_rate": 7.246376811594203e-05, | |
| "loss": 27.9322, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 3.8357487922705316, | |
| "grad_norm": 6.83944845199585, | |
| "learning_rate": 7.230273752012883e-05, | |
| "loss": 27.5723, | |
| "step": 1588 | |
| }, | |
| { | |
| "epoch": 3.8405797101449277, | |
| "grad_norm": 7.0610127449035645, | |
| "learning_rate": 7.214170692431562e-05, | |
| "loss": 26.011, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 3.845410628019324, | |
| "grad_norm": 7.726437568664551, | |
| "learning_rate": 7.198067632850241e-05, | |
| "loss": 26.6807, | |
| "step": 1592 | |
| }, | |
| { | |
| "epoch": 3.85024154589372, | |
| "grad_norm": 9.280223846435547, | |
| "learning_rate": 7.181964573268921e-05, | |
| "loss": 26.9886, | |
| "step": 1594 | |
| }, | |
| { | |
| "epoch": 3.855072463768116, | |
| "grad_norm": 6.995485782623291, | |
| "learning_rate": 7.1658615136876e-05, | |
| "loss": 27.2315, | |
| "step": 1596 | |
| }, | |
| { | |
| "epoch": 3.8599033816425123, | |
| "grad_norm": 7.200146198272705, | |
| "learning_rate": 7.14975845410628e-05, | |
| "loss": 25.7971, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 3.864734299516908, | |
| "grad_norm": 7.404515743255615, | |
| "learning_rate": 7.13365539452496e-05, | |
| "loss": 25.3168, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 3.869565217391304, | |
| "grad_norm": 7.142045497894287, | |
| "learning_rate": 7.11755233494364e-05, | |
| "loss": 24.6409, | |
| "step": 1602 | |
| }, | |
| { | |
| "epoch": 3.8743961352657004, | |
| "grad_norm": 7.02120304107666, | |
| "learning_rate": 7.101449275362319e-05, | |
| "loss": 27.3518, | |
| "step": 1604 | |
| }, | |
| { | |
| "epoch": 3.8792270531400965, | |
| "grad_norm": 7.604321002960205, | |
| "learning_rate": 7.085346215781e-05, | |
| "loss": 26.8926, | |
| "step": 1606 | |
| }, | |
| { | |
| "epoch": 3.8840579710144927, | |
| "grad_norm": 7.089973449707031, | |
| "learning_rate": 7.069243156199678e-05, | |
| "loss": 24.8074, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 3.888888888888889, | |
| "grad_norm": 8.049272537231445, | |
| "learning_rate": 7.053140096618357e-05, | |
| "loss": 25.8524, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 3.893719806763285, | |
| "grad_norm": 7.1630144119262695, | |
| "learning_rate": 7.037037037037038e-05, | |
| "loss": 26.9071, | |
| "step": 1612 | |
| }, | |
| { | |
| "epoch": 3.898550724637681, | |
| "grad_norm": 6.2005510330200195, | |
| "learning_rate": 7.020933977455717e-05, | |
| "loss": 27.107, | |
| "step": 1614 | |
| }, | |
| { | |
| "epoch": 3.9033816425120773, | |
| "grad_norm": 8.320915222167969, | |
| "learning_rate": 7.004830917874396e-05, | |
| "loss": 27.0582, | |
| "step": 1616 | |
| }, | |
| { | |
| "epoch": 3.9082125603864735, | |
| "grad_norm": 6.952855110168457, | |
| "learning_rate": 6.988727858293076e-05, | |
| "loss": 26.4762, | |
| "step": 1618 | |
| }, | |
| { | |
| "epoch": 3.9130434782608696, | |
| "grad_norm": 7.927274227142334, | |
| "learning_rate": 6.972624798711755e-05, | |
| "loss": 24.2706, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 3.917874396135266, | |
| "grad_norm": 7.922103404998779, | |
| "learning_rate": 6.956521739130436e-05, | |
| "loss": 25.8205, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 3.922705314009662, | |
| "grad_norm": 7.824489116668701, | |
| "learning_rate": 6.940418679549115e-05, | |
| "loss": 26.4827, | |
| "step": 1624 | |
| }, | |
| { | |
| "epoch": 3.927536231884058, | |
| "grad_norm": 6.419587135314941, | |
| "learning_rate": 6.924315619967794e-05, | |
| "loss": 28.2682, | |
| "step": 1626 | |
| }, | |
| { | |
| "epoch": 3.9323671497584543, | |
| "grad_norm": 8.104780197143555, | |
| "learning_rate": 6.908212560386474e-05, | |
| "loss": 25.7621, | |
| "step": 1628 | |
| }, | |
| { | |
| "epoch": 3.9371980676328504, | |
| "grad_norm": 7.307147979736328, | |
| "learning_rate": 6.892109500805153e-05, | |
| "loss": 27.1352, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 3.942028985507246, | |
| "grad_norm": 7.069173812866211, | |
| "learning_rate": 6.876006441223832e-05, | |
| "loss": 25.538, | |
| "step": 1632 | |
| }, | |
| { | |
| "epoch": 3.9468599033816423, | |
| "grad_norm": 7.971487522125244, | |
| "learning_rate": 6.859903381642512e-05, | |
| "loss": 26.754, | |
| "step": 1634 | |
| }, | |
| { | |
| "epoch": 3.9516908212560384, | |
| "grad_norm": 7.200797080993652, | |
| "learning_rate": 6.843800322061193e-05, | |
| "loss": 25.5438, | |
| "step": 1636 | |
| }, | |
| { | |
| "epoch": 3.9565217391304346, | |
| "grad_norm": 8.00469970703125, | |
| "learning_rate": 6.827697262479872e-05, | |
| "loss": 26.6568, | |
| "step": 1638 | |
| }, | |
| { | |
| "epoch": 3.9613526570048307, | |
| "grad_norm": 6.9250359535217285, | |
| "learning_rate": 6.811594202898552e-05, | |
| "loss": 25.4743, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 3.966183574879227, | |
| "grad_norm": 6.473790168762207, | |
| "learning_rate": 6.795491143317231e-05, | |
| "loss": 25.9443, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 3.971014492753623, | |
| "grad_norm": 8.05759048461914, | |
| "learning_rate": 6.77938808373591e-05, | |
| "loss": 25.5339, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 3.975845410628019, | |
| "grad_norm": 7.342809200286865, | |
| "learning_rate": 6.76328502415459e-05, | |
| "loss": 24.9969, | |
| "step": 1646 | |
| }, | |
| { | |
| "epoch": 3.9806763285024154, | |
| "grad_norm": 7.265125274658203, | |
| "learning_rate": 6.747181964573269e-05, | |
| "loss": 27.3797, | |
| "step": 1648 | |
| }, | |
| { | |
| "epoch": 3.9855072463768115, | |
| "grad_norm": 7.021026134490967, | |
| "learning_rate": 6.731078904991948e-05, | |
| "loss": 26.1091, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 3.9903381642512077, | |
| "grad_norm": 7.2072529792785645, | |
| "learning_rate": 6.714975845410629e-05, | |
| "loss": 27.5467, | |
| "step": 1652 | |
| }, | |
| { | |
| "epoch": 3.995169082125604, | |
| "grad_norm": 7.393160820007324, | |
| "learning_rate": 6.698872785829308e-05, | |
| "loss": 26.9415, | |
| "step": 1654 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 7.511723518371582, | |
| "learning_rate": 6.682769726247987e-05, | |
| "loss": 26.0607, | |
| "step": 1656 | |
| }, | |
| { | |
| "epoch": 4.004830917874396, | |
| "grad_norm": 8.766012191772461, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 27.5218, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 4.009661835748792, | |
| "grad_norm": 6.866961479187012, | |
| "learning_rate": 6.650563607085346e-05, | |
| "loss": 26.818, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 4.0144927536231885, | |
| "grad_norm": 7.680884838104248, | |
| "learning_rate": 6.634460547504025e-05, | |
| "loss": 26.8022, | |
| "step": 1662 | |
| }, | |
| { | |
| "epoch": 4.019323671497585, | |
| "grad_norm": 7.392796039581299, | |
| "learning_rate": 6.618357487922707e-05, | |
| "loss": 26.8426, | |
| "step": 1664 | |
| }, | |
| { | |
| "epoch": 4.024154589371981, | |
| "grad_norm": 7.595928192138672, | |
| "learning_rate": 6.602254428341386e-05, | |
| "loss": 24.3986, | |
| "step": 1666 | |
| }, | |
| { | |
| "epoch": 4.028985507246377, | |
| "grad_norm": 7.379922866821289, | |
| "learning_rate": 6.586151368760065e-05, | |
| "loss": 28.0114, | |
| "step": 1668 | |
| }, | |
| { | |
| "epoch": 4.033816425120773, | |
| "grad_norm": 7.208115100860596, | |
| "learning_rate": 6.570048309178745e-05, | |
| "loss": 27.5758, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 4.038647342995169, | |
| "grad_norm": 7.357963562011719, | |
| "learning_rate": 6.553945249597424e-05, | |
| "loss": 24.8787, | |
| "step": 1672 | |
| }, | |
| { | |
| "epoch": 4.043478260869565, | |
| "grad_norm": 7.291189670562744, | |
| "learning_rate": 6.537842190016103e-05, | |
| "loss": 26.2749, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 4.048309178743962, | |
| "grad_norm": 7.44353723526001, | |
| "learning_rate": 6.521739130434783e-05, | |
| "loss": 25.121, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 4.053140096618358, | |
| "grad_norm": 6.338862419128418, | |
| "learning_rate": 6.505636070853462e-05, | |
| "loss": 25.7155, | |
| "step": 1678 | |
| }, | |
| { | |
| "epoch": 4.057971014492754, | |
| "grad_norm": 6.6159162521362305, | |
| "learning_rate": 6.489533011272141e-05, | |
| "loss": 24.0727, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 4.06280193236715, | |
| "grad_norm": 6.825524806976318, | |
| "learning_rate": 6.473429951690822e-05, | |
| "loss": 24.1144, | |
| "step": 1682 | |
| }, | |
| { | |
| "epoch": 4.067632850241546, | |
| "grad_norm": 6.563850402832031, | |
| "learning_rate": 6.457326892109501e-05, | |
| "loss": 25.211, | |
| "step": 1684 | |
| }, | |
| { | |
| "epoch": 4.072463768115942, | |
| "grad_norm": 6.340920925140381, | |
| "learning_rate": 6.44122383252818e-05, | |
| "loss": 25.9026, | |
| "step": 1686 | |
| }, | |
| { | |
| "epoch": 4.0772946859903385, | |
| "grad_norm": 6.728626251220703, | |
| "learning_rate": 6.42512077294686e-05, | |
| "loss": 27.3648, | |
| "step": 1688 | |
| }, | |
| { | |
| "epoch": 4.082125603864735, | |
| "grad_norm": 6.788083553314209, | |
| "learning_rate": 6.409017713365539e-05, | |
| "loss": 26.6329, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 4.086956521739131, | |
| "grad_norm": 7.323519706726074, | |
| "learning_rate": 6.39291465378422e-05, | |
| "loss": 25.6293, | |
| "step": 1692 | |
| }, | |
| { | |
| "epoch": 4.091787439613527, | |
| "grad_norm": 6.454324245452881, | |
| "learning_rate": 6.376811594202898e-05, | |
| "loss": 26.1033, | |
| "step": 1694 | |
| }, | |
| { | |
| "epoch": 4.096618357487923, | |
| "grad_norm": 6.53643798828125, | |
| "learning_rate": 6.360708534621579e-05, | |
| "loss": 28.4655, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 4.101449275362318, | |
| "grad_norm": 8.033370971679688, | |
| "learning_rate": 6.344605475040258e-05, | |
| "loss": 26.4287, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 4.106280193236715, | |
| "grad_norm": 6.196560382843018, | |
| "learning_rate": 6.328502415458938e-05, | |
| "loss": 26.5747, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 4.111111111111111, | |
| "grad_norm": 6.994458198547363, | |
| "learning_rate": 6.312399355877617e-05, | |
| "loss": 25.3307, | |
| "step": 1702 | |
| }, | |
| { | |
| "epoch": 4.115942028985507, | |
| "grad_norm": 7.29825496673584, | |
| "learning_rate": 6.296296296296296e-05, | |
| "loss": 25.2931, | |
| "step": 1704 | |
| }, | |
| { | |
| "epoch": 4.120772946859903, | |
| "grad_norm": 7.366706371307373, | |
| "learning_rate": 6.280193236714976e-05, | |
| "loss": 25.1327, | |
| "step": 1706 | |
| }, | |
| { | |
| "epoch": 4.125603864734299, | |
| "grad_norm": 7.066011428833008, | |
| "learning_rate": 6.264090177133655e-05, | |
| "loss": 27.8359, | |
| "step": 1708 | |
| }, | |
| { | |
| "epoch": 4.130434782608695, | |
| "grad_norm": 7.165285587310791, | |
| "learning_rate": 6.247987117552336e-05, | |
| "loss": 26.1166, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 4.1352657004830915, | |
| "grad_norm": 6.823864936828613, | |
| "learning_rate": 6.231884057971015e-05, | |
| "loss": 27.5943, | |
| "step": 1712 | |
| }, | |
| { | |
| "epoch": 4.140096618357488, | |
| "grad_norm": 7.767164707183838, | |
| "learning_rate": 6.215780998389694e-05, | |
| "loss": 24.9854, | |
| "step": 1714 | |
| }, | |
| { | |
| "epoch": 4.144927536231884, | |
| "grad_norm": 6.458461284637451, | |
| "learning_rate": 6.199677938808374e-05, | |
| "loss": 26.8271, | |
| "step": 1716 | |
| }, | |
| { | |
| "epoch": 4.14975845410628, | |
| "grad_norm": 7.082225322723389, | |
| "learning_rate": 6.183574879227053e-05, | |
| "loss": 25.678, | |
| "step": 1718 | |
| }, | |
| { | |
| "epoch": 4.154589371980676, | |
| "grad_norm": 7.867661476135254, | |
| "learning_rate": 6.167471819645732e-05, | |
| "loss": 26.7575, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 4.159420289855072, | |
| "grad_norm": 7.803908824920654, | |
| "learning_rate": 6.151368760064413e-05, | |
| "loss": 27.785, | |
| "step": 1722 | |
| }, | |
| { | |
| "epoch": 4.164251207729468, | |
| "grad_norm": 7.704416751861572, | |
| "learning_rate": 6.135265700483092e-05, | |
| "loss": 26.4086, | |
| "step": 1724 | |
| }, | |
| { | |
| "epoch": 4.169082125603865, | |
| "grad_norm": 7.166048049926758, | |
| "learning_rate": 6.119162640901772e-05, | |
| "loss": 25.6944, | |
| "step": 1726 | |
| }, | |
| { | |
| "epoch": 4.173913043478261, | |
| "grad_norm": 7.665358066558838, | |
| "learning_rate": 6.1030595813204516e-05, | |
| "loss": 25.7421, | |
| "step": 1728 | |
| }, | |
| { | |
| "epoch": 4.178743961352657, | |
| "grad_norm": 6.582197666168213, | |
| "learning_rate": 6.086956521739131e-05, | |
| "loss": 24.2085, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 4.183574879227053, | |
| "grad_norm": 6.641133785247803, | |
| "learning_rate": 6.07085346215781e-05, | |
| "loss": 26.5714, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 4.188405797101449, | |
| "grad_norm": 8.203088760375977, | |
| "learning_rate": 6.05475040257649e-05, | |
| "loss": 24.042, | |
| "step": 1734 | |
| }, | |
| { | |
| "epoch": 4.193236714975845, | |
| "grad_norm": 7.593963146209717, | |
| "learning_rate": 6.0386473429951696e-05, | |
| "loss": 26.1024, | |
| "step": 1736 | |
| }, | |
| { | |
| "epoch": 4.1980676328502415, | |
| "grad_norm": 6.2828450202941895, | |
| "learning_rate": 6.022544283413849e-05, | |
| "loss": 25.6751, | |
| "step": 1738 | |
| }, | |
| { | |
| "epoch": 4.202898550724638, | |
| "grad_norm": 7.936067581176758, | |
| "learning_rate": 6.006441223832528e-05, | |
| "loss": 26.5624, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 4.207729468599034, | |
| "grad_norm": 7.069867134094238, | |
| "learning_rate": 5.990338164251208e-05, | |
| "loss": 26.0663, | |
| "step": 1742 | |
| }, | |
| { | |
| "epoch": 4.21256038647343, | |
| "grad_norm": 7.237870693206787, | |
| "learning_rate": 5.9742351046698876e-05, | |
| "loss": 26.6947, | |
| "step": 1744 | |
| }, | |
| { | |
| "epoch": 4.217391304347826, | |
| "grad_norm": 6.671788692474365, | |
| "learning_rate": 5.9581320450885666e-05, | |
| "loss": 26.2133, | |
| "step": 1746 | |
| }, | |
| { | |
| "epoch": 4.222222222222222, | |
| "grad_norm": 6.456491947174072, | |
| "learning_rate": 5.942028985507246e-05, | |
| "loss": 25.4374, | |
| "step": 1748 | |
| }, | |
| { | |
| "epoch": 4.2270531400966185, | |
| "grad_norm": 6.428054332733154, | |
| "learning_rate": 5.925925925925926e-05, | |
| "loss": 27.421, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 4.231884057971015, | |
| "grad_norm": 6.948849678039551, | |
| "learning_rate": 5.9098228663446057e-05, | |
| "loss": 24.7258, | |
| "step": 1752 | |
| }, | |
| { | |
| "epoch": 4.236714975845411, | |
| "grad_norm": 7.914185047149658, | |
| "learning_rate": 5.8937198067632847e-05, | |
| "loss": 26.731, | |
| "step": 1754 | |
| }, | |
| { | |
| "epoch": 4.241545893719807, | |
| "grad_norm": 6.79870080947876, | |
| "learning_rate": 5.877616747181964e-05, | |
| "loss": 27.7706, | |
| "step": 1756 | |
| }, | |
| { | |
| "epoch": 4.246376811594203, | |
| "grad_norm": 7.053183078765869, | |
| "learning_rate": 5.861513687600645e-05, | |
| "loss": 25.8897, | |
| "step": 1758 | |
| }, | |
| { | |
| "epoch": 4.251207729468599, | |
| "grad_norm": 7.341165065765381, | |
| "learning_rate": 5.8454106280193244e-05, | |
| "loss": 25.7238, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 4.256038647342995, | |
| "grad_norm": 6.499047756195068, | |
| "learning_rate": 5.829307568438004e-05, | |
| "loss": 26.5338, | |
| "step": 1762 | |
| }, | |
| { | |
| "epoch": 4.260869565217392, | |
| "grad_norm": 6.891699314117432, | |
| "learning_rate": 5.813204508856683e-05, | |
| "loss": 26.3659, | |
| "step": 1764 | |
| }, | |
| { | |
| "epoch": 4.265700483091788, | |
| "grad_norm": 6.726503849029541, | |
| "learning_rate": 5.797101449275363e-05, | |
| "loss": 24.1357, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 4.270531400966184, | |
| "grad_norm": 7.38776159286499, | |
| "learning_rate": 5.7809983896940424e-05, | |
| "loss": 25.8197, | |
| "step": 1768 | |
| }, | |
| { | |
| "epoch": 4.27536231884058, | |
| "grad_norm": 6.880035400390625, | |
| "learning_rate": 5.764895330112722e-05, | |
| "loss": 26.0566, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 4.280193236714976, | |
| "grad_norm": 6.925288677215576, | |
| "learning_rate": 5.748792270531401e-05, | |
| "loss": 27.931, | |
| "step": 1772 | |
| }, | |
| { | |
| "epoch": 4.285024154589372, | |
| "grad_norm": 8.501145362854004, | |
| "learning_rate": 5.732689210950081e-05, | |
| "loss": 24.3897, | |
| "step": 1774 | |
| }, | |
| { | |
| "epoch": 4.2898550724637685, | |
| "grad_norm": 7.33554744720459, | |
| "learning_rate": 5.7165861513687604e-05, | |
| "loss": 26.0568, | |
| "step": 1776 | |
| }, | |
| { | |
| "epoch": 4.294685990338165, | |
| "grad_norm": 6.757916450500488, | |
| "learning_rate": 5.7004830917874394e-05, | |
| "loss": 26.4327, | |
| "step": 1778 | |
| }, | |
| { | |
| "epoch": 4.29951690821256, | |
| "grad_norm": 7.093183517456055, | |
| "learning_rate": 5.684380032206119e-05, | |
| "loss": 25.8601, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 4.304347826086957, | |
| "grad_norm": 5.872477054595947, | |
| "learning_rate": 5.668276972624799e-05, | |
| "loss": 26.353, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 4.309178743961352, | |
| "grad_norm": 5.949990272521973, | |
| "learning_rate": 5.652173913043478e-05, | |
| "loss": 27.0481, | |
| "step": 1784 | |
| }, | |
| { | |
| "epoch": 4.314009661835748, | |
| "grad_norm": 6.953137397766113, | |
| "learning_rate": 5.6360708534621574e-05, | |
| "loss": 26.569, | |
| "step": 1786 | |
| }, | |
| { | |
| "epoch": 4.318840579710145, | |
| "grad_norm": 7.875227928161621, | |
| "learning_rate": 5.6199677938808385e-05, | |
| "loss": 23.7947, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 4.323671497584541, | |
| "grad_norm": 6.187444686889648, | |
| "learning_rate": 5.6038647342995175e-05, | |
| "loss": 25.1237, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 4.328502415458937, | |
| "grad_norm": 6.970160961151123, | |
| "learning_rate": 5.587761674718197e-05, | |
| "loss": 25.7053, | |
| "step": 1792 | |
| }, | |
| { | |
| "epoch": 4.333333333333333, | |
| "grad_norm": 6.903000831604004, | |
| "learning_rate": 5.571658615136877e-05, | |
| "loss": 26.7737, | |
| "step": 1794 | |
| }, | |
| { | |
| "epoch": 4.338164251207729, | |
| "grad_norm": 7.370026111602783, | |
| "learning_rate": 5.555555555555556e-05, | |
| "loss": 25.3098, | |
| "step": 1796 | |
| }, | |
| { | |
| "epoch": 4.342995169082125, | |
| "grad_norm": 6.926233768463135, | |
| "learning_rate": 5.5394524959742355e-05, | |
| "loss": 24.9499, | |
| "step": 1798 | |
| }, | |
| { | |
| "epoch": 4.3478260869565215, | |
| "grad_norm": 6.8403544425964355, | |
| "learning_rate": 5.523349436392915e-05, | |
| "loss": 25.6363, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.352657004830918, | |
| "grad_norm": 7.1537089347839355, | |
| "learning_rate": 5.507246376811594e-05, | |
| "loss": 24.911, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 4.357487922705314, | |
| "grad_norm": 6.798279285430908, | |
| "learning_rate": 5.491143317230274e-05, | |
| "loss": 27.4748, | |
| "step": 1804 | |
| }, | |
| { | |
| "epoch": 4.36231884057971, | |
| "grad_norm": 5.993078231811523, | |
| "learning_rate": 5.4750402576489535e-05, | |
| "loss": 27.1362, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 4.367149758454106, | |
| "grad_norm": 7.4096574783325195, | |
| "learning_rate": 5.458937198067633e-05, | |
| "loss": 26.0591, | |
| "step": 1808 | |
| }, | |
| { | |
| "epoch": 4.371980676328502, | |
| "grad_norm": 6.903232574462891, | |
| "learning_rate": 5.442834138486312e-05, | |
| "loss": 26.8211, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 4.3768115942028984, | |
| "grad_norm": 7.838393211364746, | |
| "learning_rate": 5.426731078904992e-05, | |
| "loss": 26.2384, | |
| "step": 1812 | |
| }, | |
| { | |
| "epoch": 4.381642512077295, | |
| "grad_norm": 7.33106803894043, | |
| "learning_rate": 5.4106280193236716e-05, | |
| "loss": 26.5385, | |
| "step": 1814 | |
| }, | |
| { | |
| "epoch": 4.386473429951691, | |
| "grad_norm": 6.619305610656738, | |
| "learning_rate": 5.3945249597423505e-05, | |
| "loss": 22.8647, | |
| "step": 1816 | |
| }, | |
| { | |
| "epoch": 4.391304347826087, | |
| "grad_norm": 7.007352352142334, | |
| "learning_rate": 5.37842190016103e-05, | |
| "loss": 28.2365, | |
| "step": 1818 | |
| }, | |
| { | |
| "epoch": 4.396135265700483, | |
| "grad_norm": 7.026554584503174, | |
| "learning_rate": 5.3623188405797106e-05, | |
| "loss": 25.9467, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 4.400966183574879, | |
| "grad_norm": 8.395278930664062, | |
| "learning_rate": 5.34621578099839e-05, | |
| "loss": 24.3139, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 4.405797101449275, | |
| "grad_norm": 6.9680495262146, | |
| "learning_rate": 5.33011272141707e-05, | |
| "loss": 24.9039, | |
| "step": 1824 | |
| }, | |
| { | |
| "epoch": 4.4106280193236715, | |
| "grad_norm": 7.212375164031982, | |
| "learning_rate": 5.3140096618357496e-05, | |
| "loss": 25.4465, | |
| "step": 1826 | |
| }, | |
| { | |
| "epoch": 4.415458937198068, | |
| "grad_norm": 6.966728210449219, | |
| "learning_rate": 5.2979066022544286e-05, | |
| "loss": 25.7792, | |
| "step": 1828 | |
| }, | |
| { | |
| "epoch": 4.420289855072464, | |
| "grad_norm": 6.4454522132873535, | |
| "learning_rate": 5.281803542673108e-05, | |
| "loss": 25.9821, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 4.42512077294686, | |
| "grad_norm": 7.032574653625488, | |
| "learning_rate": 5.265700483091788e-05, | |
| "loss": 26.4527, | |
| "step": 1832 | |
| }, | |
| { | |
| "epoch": 4.429951690821256, | |
| "grad_norm": 7.715813159942627, | |
| "learning_rate": 5.249597423510467e-05, | |
| "loss": 25.1597, | |
| "step": 1834 | |
| }, | |
| { | |
| "epoch": 4.434782608695652, | |
| "grad_norm": 8.366538047790527, | |
| "learning_rate": 5.2334943639291466e-05, | |
| "loss": 26.6993, | |
| "step": 1836 | |
| }, | |
| { | |
| "epoch": 4.4396135265700485, | |
| "grad_norm": 6.7702484130859375, | |
| "learning_rate": 5.217391304347826e-05, | |
| "loss": 25.7026, | |
| "step": 1838 | |
| }, | |
| { | |
| "epoch": 4.444444444444445, | |
| "grad_norm": 7.936936378479004, | |
| "learning_rate": 5.201288244766506e-05, | |
| "loss": 26.0788, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 4.449275362318841, | |
| "grad_norm": 9.027806282043457, | |
| "learning_rate": 5.185185185185185e-05, | |
| "loss": 26.8397, | |
| "step": 1842 | |
| }, | |
| { | |
| "epoch": 4.454106280193237, | |
| "grad_norm": 7.541802406311035, | |
| "learning_rate": 5.1690821256038647e-05, | |
| "loss": 25.2463, | |
| "step": 1844 | |
| }, | |
| { | |
| "epoch": 4.458937198067633, | |
| "grad_norm": 6.402732849121094, | |
| "learning_rate": 5.152979066022544e-05, | |
| "loss": 26.2031, | |
| "step": 1846 | |
| }, | |
| { | |
| "epoch": 4.463768115942029, | |
| "grad_norm": 9.23645305633545, | |
| "learning_rate": 5.136876006441223e-05, | |
| "loss": 25.5027, | |
| "step": 1848 | |
| }, | |
| { | |
| "epoch": 4.468599033816425, | |
| "grad_norm": 7.548840045928955, | |
| "learning_rate": 5.1207729468599044e-05, | |
| "loss": 25.3803, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 4.473429951690822, | |
| "grad_norm": 6.839424133300781, | |
| "learning_rate": 5.1046698872785834e-05, | |
| "loss": 25.4142, | |
| "step": 1852 | |
| }, | |
| { | |
| "epoch": 4.478260869565218, | |
| "grad_norm": 6.8843512535095215, | |
| "learning_rate": 5.088566827697263e-05, | |
| "loss": 25.0163, | |
| "step": 1854 | |
| }, | |
| { | |
| "epoch": 4.483091787439614, | |
| "grad_norm": 6.359217643737793, | |
| "learning_rate": 5.072463768115943e-05, | |
| "loss": 26.7153, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 4.48792270531401, | |
| "grad_norm": 7.048843860626221, | |
| "learning_rate": 5.0563607085346224e-05, | |
| "loss": 25.6665, | |
| "step": 1858 | |
| }, | |
| { | |
| "epoch": 4.492753623188406, | |
| "grad_norm": 7.086437702178955, | |
| "learning_rate": 5.0402576489533014e-05, | |
| "loss": 26.86, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 4.4975845410628015, | |
| "grad_norm": 6.8362507820129395, | |
| "learning_rate": 5.024154589371981e-05, | |
| "loss": 26.5117, | |
| "step": 1862 | |
| }, | |
| { | |
| "epoch": 4.5024154589371985, | |
| "grad_norm": 6.434200763702393, | |
| "learning_rate": 5.008051529790661e-05, | |
| "loss": 25.2265, | |
| "step": 1864 | |
| }, | |
| { | |
| "epoch": 4.507246376811594, | |
| "grad_norm": 8.030712127685547, | |
| "learning_rate": 4.99194847020934e-05, | |
| "loss": 24.4644, | |
| "step": 1866 | |
| }, | |
| { | |
| "epoch": 4.512077294685991, | |
| "grad_norm": 7.7696051597595215, | |
| "learning_rate": 4.9758454106280194e-05, | |
| "loss": 27.1793, | |
| "step": 1868 | |
| }, | |
| { | |
| "epoch": 4.516908212560386, | |
| "grad_norm": 6.404499530792236, | |
| "learning_rate": 4.959742351046699e-05, | |
| "loss": 26.4276, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 4.521739130434782, | |
| "grad_norm": 7.412373065948486, | |
| "learning_rate": 4.943639291465378e-05, | |
| "loss": 25.6971, | |
| "step": 1872 | |
| }, | |
| { | |
| "epoch": 4.526570048309178, | |
| "grad_norm": 7.425329685211182, | |
| "learning_rate": 4.9275362318840584e-05, | |
| "loss": 23.4725, | |
| "step": 1874 | |
| }, | |
| { | |
| "epoch": 4.531400966183575, | |
| "grad_norm": 6.722659587860107, | |
| "learning_rate": 4.911433172302738e-05, | |
| "loss": 26.158, | |
| "step": 1876 | |
| }, | |
| { | |
| "epoch": 4.536231884057971, | |
| "grad_norm": 7.206009387969971, | |
| "learning_rate": 4.895330112721417e-05, | |
| "loss": 26.8682, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 4.541062801932367, | |
| "grad_norm": 7.180261135101318, | |
| "learning_rate": 4.879227053140097e-05, | |
| "loss": 26.191, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 4.545893719806763, | |
| "grad_norm": 7.371028900146484, | |
| "learning_rate": 4.8631239935587765e-05, | |
| "loss": 23.7948, | |
| "step": 1882 | |
| }, | |
| { | |
| "epoch": 4.550724637681159, | |
| "grad_norm": 6.874049663543701, | |
| "learning_rate": 4.847020933977456e-05, | |
| "loss": 26.638, | |
| "step": 1884 | |
| }, | |
| { | |
| "epoch": 4.555555555555555, | |
| "grad_norm": 7.5235795974731445, | |
| "learning_rate": 4.830917874396135e-05, | |
| "loss": 26.9283, | |
| "step": 1886 | |
| }, | |
| { | |
| "epoch": 4.5603864734299515, | |
| "grad_norm": 7.371413707733154, | |
| "learning_rate": 4.814814814814815e-05, | |
| "loss": 26.5723, | |
| "step": 1888 | |
| }, | |
| { | |
| "epoch": 4.565217391304348, | |
| "grad_norm": 6.487553119659424, | |
| "learning_rate": 4.7987117552334945e-05, | |
| "loss": 25.9711, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 4.570048309178744, | |
| "grad_norm": 6.800736427307129, | |
| "learning_rate": 4.782608695652174e-05, | |
| "loss": 23.376, | |
| "step": 1892 | |
| }, | |
| { | |
| "epoch": 4.57487922705314, | |
| "grad_norm": 7.149484634399414, | |
| "learning_rate": 4.766505636070854e-05, | |
| "loss": 26.0393, | |
| "step": 1894 | |
| }, | |
| { | |
| "epoch": 4.579710144927536, | |
| "grad_norm": 7.532267093658447, | |
| "learning_rate": 4.7504025764895335e-05, | |
| "loss": 26.1861, | |
| "step": 1896 | |
| }, | |
| { | |
| "epoch": 4.584541062801932, | |
| "grad_norm": 7.492485046386719, | |
| "learning_rate": 4.7342995169082125e-05, | |
| "loss": 26.4485, | |
| "step": 1898 | |
| }, | |
| { | |
| "epoch": 4.5893719806763285, | |
| "grad_norm": 6.885655879974365, | |
| "learning_rate": 4.718196457326892e-05, | |
| "loss": 25.1794, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 4.594202898550725, | |
| "grad_norm": 6.430235862731934, | |
| "learning_rate": 4.702093397745572e-05, | |
| "loss": 25.6459, | |
| "step": 1902 | |
| }, | |
| { | |
| "epoch": 4.599033816425121, | |
| "grad_norm": 6.470332145690918, | |
| "learning_rate": 4.6859903381642516e-05, | |
| "loss": 26.0056, | |
| "step": 1904 | |
| }, | |
| { | |
| "epoch": 4.603864734299517, | |
| "grad_norm": 6.93711519241333, | |
| "learning_rate": 4.669887278582931e-05, | |
| "loss": 26.8588, | |
| "step": 1906 | |
| }, | |
| { | |
| "epoch": 4.608695652173913, | |
| "grad_norm": 7.658902168273926, | |
| "learning_rate": 4.653784219001611e-05, | |
| "loss": 25.5478, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 4.613526570048309, | |
| "grad_norm": 7.67640495300293, | |
| "learning_rate": 4.63768115942029e-05, | |
| "loss": 26.2871, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 4.618357487922705, | |
| "grad_norm": 7.06746244430542, | |
| "learning_rate": 4.6215780998389696e-05, | |
| "loss": 25.8891, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 4.6231884057971016, | |
| "grad_norm": 7.047806739807129, | |
| "learning_rate": 4.605475040257649e-05, | |
| "loss": 27.2715, | |
| "step": 1914 | |
| }, | |
| { | |
| "epoch": 4.628019323671498, | |
| "grad_norm": 7.097225189208984, | |
| "learning_rate": 4.589371980676328e-05, | |
| "loss": 24.1438, | |
| "step": 1916 | |
| }, | |
| { | |
| "epoch": 4.632850241545894, | |
| "grad_norm": 7.487665176391602, | |
| "learning_rate": 4.573268921095008e-05, | |
| "loss": 25.5925, | |
| "step": 1918 | |
| }, | |
| { | |
| "epoch": 4.63768115942029, | |
| "grad_norm": 6.561511516571045, | |
| "learning_rate": 4.557165861513688e-05, | |
| "loss": 27.0304, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 4.642512077294686, | |
| "grad_norm": 7.644463539123535, | |
| "learning_rate": 4.541062801932367e-05, | |
| "loss": 26.007, | |
| "step": 1922 | |
| }, | |
| { | |
| "epoch": 4.647342995169082, | |
| "grad_norm": 7.329721927642822, | |
| "learning_rate": 4.524959742351047e-05, | |
| "loss": 23.3239, | |
| "step": 1924 | |
| }, | |
| { | |
| "epoch": 4.6521739130434785, | |
| "grad_norm": 6.725891590118408, | |
| "learning_rate": 4.5088566827697266e-05, | |
| "loss": 25.8835, | |
| "step": 1926 | |
| }, | |
| { | |
| "epoch": 4.657004830917875, | |
| "grad_norm": 7.27399206161499, | |
| "learning_rate": 4.492753623188406e-05, | |
| "loss": 26.4273, | |
| "step": 1928 | |
| }, | |
| { | |
| "epoch": 4.661835748792271, | |
| "grad_norm": 6.614084720611572, | |
| "learning_rate": 4.476650563607085e-05, | |
| "loss": 25.8323, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 4.666666666666667, | |
| "grad_norm": 6.703570365905762, | |
| "learning_rate": 4.460547504025765e-05, | |
| "loss": 25.0444, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 4.671497584541063, | |
| "grad_norm": 7.8840556144714355, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 26.0548, | |
| "step": 1934 | |
| }, | |
| { | |
| "epoch": 4.676328502415459, | |
| "grad_norm": 6.566593170166016, | |
| "learning_rate": 4.428341384863124e-05, | |
| "loss": 25.8758, | |
| "step": 1936 | |
| }, | |
| { | |
| "epoch": 4.681159420289855, | |
| "grad_norm": 6.961997985839844, | |
| "learning_rate": 4.412238325281804e-05, | |
| "loss": 26.1125, | |
| "step": 1938 | |
| }, | |
| { | |
| "epoch": 4.685990338164252, | |
| "grad_norm": 8.170991897583008, | |
| "learning_rate": 4.396135265700484e-05, | |
| "loss": 27.3513, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 4.690821256038648, | |
| "grad_norm": 6.823581218719482, | |
| "learning_rate": 4.380032206119163e-05, | |
| "loss": 25.9433, | |
| "step": 1942 | |
| }, | |
| { | |
| "epoch": 4.695652173913043, | |
| "grad_norm": 7.356668949127197, | |
| "learning_rate": 4.3639291465378424e-05, | |
| "loss": 27.2403, | |
| "step": 1944 | |
| }, | |
| { | |
| "epoch": 4.70048309178744, | |
| "grad_norm": 7.08234977722168, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 25.7369, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 4.705314009661835, | |
| "grad_norm": 6.981078147888184, | |
| "learning_rate": 4.331723027375201e-05, | |
| "loss": 25.5263, | |
| "step": 1948 | |
| }, | |
| { | |
| "epoch": 4.710144927536232, | |
| "grad_norm": 6.724111080169678, | |
| "learning_rate": 4.315619967793881e-05, | |
| "loss": 25.9395, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 4.714975845410628, | |
| "grad_norm": 6.105647563934326, | |
| "learning_rate": 4.299516908212561e-05, | |
| "loss": 26.302, | |
| "step": 1952 | |
| }, | |
| { | |
| "epoch": 4.719806763285024, | |
| "grad_norm": 7.321731090545654, | |
| "learning_rate": 4.28341384863124e-05, | |
| "loss": 25.4126, | |
| "step": 1954 | |
| }, | |
| { | |
| "epoch": 4.72463768115942, | |
| "grad_norm": 6.488819599151611, | |
| "learning_rate": 4.26731078904992e-05, | |
| "loss": 26.1355, | |
| "step": 1956 | |
| }, | |
| { | |
| "epoch": 4.729468599033816, | |
| "grad_norm": 6.578047752380371, | |
| "learning_rate": 4.2512077294685994e-05, | |
| "loss": 27.0345, | |
| "step": 1958 | |
| }, | |
| { | |
| "epoch": 4.734299516908212, | |
| "grad_norm": 6.070748805999756, | |
| "learning_rate": 4.2351046698872784e-05, | |
| "loss": 24.2139, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 4.739130434782608, | |
| "grad_norm": 6.960094451904297, | |
| "learning_rate": 4.219001610305958e-05, | |
| "loss": 26.6307, | |
| "step": 1962 | |
| }, | |
| { | |
| "epoch": 4.743961352657005, | |
| "grad_norm": 6.557458877563477, | |
| "learning_rate": 4.202898550724638e-05, | |
| "loss": 24.7468, | |
| "step": 1964 | |
| }, | |
| { | |
| "epoch": 4.748792270531401, | |
| "grad_norm": 7.3893656730651855, | |
| "learning_rate": 4.1867954911433174e-05, | |
| "loss": 25.1312, | |
| "step": 1966 | |
| }, | |
| { | |
| "epoch": 4.753623188405797, | |
| "grad_norm": 7.08898401260376, | |
| "learning_rate": 4.170692431561997e-05, | |
| "loss": 26.4543, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 4.758454106280193, | |
| "grad_norm": 7.590085029602051, | |
| "learning_rate": 4.154589371980677e-05, | |
| "loss": 26.8427, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 4.763285024154589, | |
| "grad_norm": 6.841743469238281, | |
| "learning_rate": 4.1384863123993565e-05, | |
| "loss": 25.9606, | |
| "step": 1972 | |
| }, | |
| { | |
| "epoch": 4.768115942028985, | |
| "grad_norm": 7.612220764160156, | |
| "learning_rate": 4.1223832528180355e-05, | |
| "loss": 27.338, | |
| "step": 1974 | |
| }, | |
| { | |
| "epoch": 4.7729468599033815, | |
| "grad_norm": 6.759093761444092, | |
| "learning_rate": 4.106280193236715e-05, | |
| "loss": 26.6189, | |
| "step": 1976 | |
| }, | |
| { | |
| "epoch": 4.777777777777778, | |
| "grad_norm": 7.5177226066589355, | |
| "learning_rate": 4.090177133655395e-05, | |
| "loss": 26.6014, | |
| "step": 1978 | |
| }, | |
| { | |
| "epoch": 4.782608695652174, | |
| "grad_norm": 6.755998611450195, | |
| "learning_rate": 4.074074074074074e-05, | |
| "loss": 24.8074, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 4.78743961352657, | |
| "grad_norm": 7.969665050506592, | |
| "learning_rate": 4.057971014492754e-05, | |
| "loss": 26.8168, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 4.792270531400966, | |
| "grad_norm": 6.537661552429199, | |
| "learning_rate": 4.041867954911434e-05, | |
| "loss": 26.8878, | |
| "step": 1984 | |
| }, | |
| { | |
| "epoch": 4.797101449275362, | |
| "grad_norm": 7.462778091430664, | |
| "learning_rate": 4.025764895330113e-05, | |
| "loss": 26.6395, | |
| "step": 1986 | |
| }, | |
| { | |
| "epoch": 4.8019323671497585, | |
| "grad_norm": 7.199199199676514, | |
| "learning_rate": 4.0096618357487925e-05, | |
| "loss": 26.6467, | |
| "step": 1988 | |
| }, | |
| { | |
| "epoch": 4.806763285024155, | |
| "grad_norm": 6.970396995544434, | |
| "learning_rate": 3.993558776167472e-05, | |
| "loss": 27.3238, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 4.811594202898551, | |
| "grad_norm": 6.526374340057373, | |
| "learning_rate": 3.977455716586151e-05, | |
| "loss": 26.7327, | |
| "step": 1992 | |
| }, | |
| { | |
| "epoch": 4.816425120772947, | |
| "grad_norm": 7.019384384155273, | |
| "learning_rate": 3.961352657004831e-05, | |
| "loss": 24.6199, | |
| "step": 1994 | |
| }, | |
| { | |
| "epoch": 4.821256038647343, | |
| "grad_norm": 7.474978923797607, | |
| "learning_rate": 3.9452495974235105e-05, | |
| "loss": 26.8535, | |
| "step": 1996 | |
| }, | |
| { | |
| "epoch": 4.826086956521739, | |
| "grad_norm": 7.651355266571045, | |
| "learning_rate": 3.92914653784219e-05, | |
| "loss": 25.2036, | |
| "step": 1998 | |
| }, | |
| { | |
| "epoch": 4.830917874396135, | |
| "grad_norm": 6.540372848510742, | |
| "learning_rate": 3.91304347826087e-05, | |
| "loss": 26.1222, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 4.835748792270532, | |
| "grad_norm": 7.769553184509277, | |
| "learning_rate": 3.8969404186795496e-05, | |
| "loss": 25.3271, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 4.840579710144928, | |
| "grad_norm": 7.059219837188721, | |
| "learning_rate": 3.8808373590982286e-05, | |
| "loss": 27.5878, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 4.845410628019324, | |
| "grad_norm": 7.040493011474609, | |
| "learning_rate": 3.864734299516908e-05, | |
| "loss": 24.8298, | |
| "step": 2006 | |
| }, | |
| { | |
| "epoch": 4.85024154589372, | |
| "grad_norm": 6.8158111572265625, | |
| "learning_rate": 3.848631239935588e-05, | |
| "loss": 25.9933, | |
| "step": 2008 | |
| }, | |
| { | |
| "epoch": 4.855072463768116, | |
| "grad_norm": 6.576706886291504, | |
| "learning_rate": 3.8325281803542676e-05, | |
| "loss": 25.7341, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 4.859903381642512, | |
| "grad_norm": 6.51364803314209, | |
| "learning_rate": 3.8164251207729466e-05, | |
| "loss": 27.156, | |
| "step": 2012 | |
| }, | |
| { | |
| "epoch": 4.8647342995169085, | |
| "grad_norm": 7.035210609436035, | |
| "learning_rate": 3.800322061191627e-05, | |
| "loss": 25.8662, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 4.869565217391305, | |
| "grad_norm": 8.57784366607666, | |
| "learning_rate": 3.784219001610306e-05, | |
| "loss": 27.2607, | |
| "step": 2016 | |
| }, | |
| { | |
| "epoch": 4.874396135265701, | |
| "grad_norm": 7.060666084289551, | |
| "learning_rate": 3.7681159420289856e-05, | |
| "loss": 25.5058, | |
| "step": 2018 | |
| }, | |
| { | |
| "epoch": 4.879227053140097, | |
| "grad_norm": 6.544167995452881, | |
| "learning_rate": 3.752012882447665e-05, | |
| "loss": 27.5042, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 4.884057971014493, | |
| "grad_norm": 7.82602071762085, | |
| "learning_rate": 3.735909822866345e-05, | |
| "loss": 25.1871, | |
| "step": 2022 | |
| }, | |
| { | |
| "epoch": 4.888888888888889, | |
| "grad_norm": 6.692302227020264, | |
| "learning_rate": 3.719806763285024e-05, | |
| "loss": 24.8878, | |
| "step": 2024 | |
| }, | |
| { | |
| "epoch": 4.8937198067632846, | |
| "grad_norm": 6.907380104064941, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 26.2569, | |
| "step": 2026 | |
| }, | |
| { | |
| "epoch": 4.898550724637682, | |
| "grad_norm": 6.529886245727539, | |
| "learning_rate": 3.687600644122383e-05, | |
| "loss": 25.2621, | |
| "step": 2028 | |
| }, | |
| { | |
| "epoch": 4.903381642512077, | |
| "grad_norm": 8.162117958068848, | |
| "learning_rate": 3.671497584541063e-05, | |
| "loss": 25.3648, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 4.908212560386474, | |
| "grad_norm": 7.2825422286987305, | |
| "learning_rate": 3.655394524959743e-05, | |
| "loss": 25.0396, | |
| "step": 2032 | |
| }, | |
| { | |
| "epoch": 4.913043478260869, | |
| "grad_norm": 7.4677886962890625, | |
| "learning_rate": 3.6392914653784224e-05, | |
| "loss": 26.3367, | |
| "step": 2034 | |
| }, | |
| { | |
| "epoch": 4.917874396135265, | |
| "grad_norm": 6.709794521331787, | |
| "learning_rate": 3.6231884057971014e-05, | |
| "loss": 25.5507, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 4.9227053140096615, | |
| "grad_norm": 6.555368423461914, | |
| "learning_rate": 3.607085346215781e-05, | |
| "loss": 24.5421, | |
| "step": 2038 | |
| }, | |
| { | |
| "epoch": 4.927536231884058, | |
| "grad_norm": 6.405154705047607, | |
| "learning_rate": 3.590982286634461e-05, | |
| "loss": 25.699, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 4.932367149758454, | |
| "grad_norm": 7.2418012619018555, | |
| "learning_rate": 3.57487922705314e-05, | |
| "loss": 25.3032, | |
| "step": 2042 | |
| }, | |
| { | |
| "epoch": 4.93719806763285, | |
| "grad_norm": 7.165282726287842, | |
| "learning_rate": 3.55877616747182e-05, | |
| "loss": 27.57, | |
| "step": 2044 | |
| }, | |
| { | |
| "epoch": 4.942028985507246, | |
| "grad_norm": 8.555087089538574, | |
| "learning_rate": 3.5426731078905e-05, | |
| "loss": 25.6545, | |
| "step": 2046 | |
| }, | |
| { | |
| "epoch": 4.946859903381642, | |
| "grad_norm": 7.7885613441467285, | |
| "learning_rate": 3.526570048309179e-05, | |
| "loss": 25.3195, | |
| "step": 2048 | |
| }, | |
| { | |
| "epoch": 4.951690821256038, | |
| "grad_norm": 6.383197784423828, | |
| "learning_rate": 3.5104669887278584e-05, | |
| "loss": 27.5458, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 4.956521739130435, | |
| "grad_norm": 7.210457801818848, | |
| "learning_rate": 3.494363929146538e-05, | |
| "loss": 26.0986, | |
| "step": 2052 | |
| }, | |
| { | |
| "epoch": 4.961352657004831, | |
| "grad_norm": 6.477179050445557, | |
| "learning_rate": 3.478260869565218e-05, | |
| "loss": 26.3113, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 4.966183574879227, | |
| "grad_norm": 6.750316619873047, | |
| "learning_rate": 3.462157809983897e-05, | |
| "loss": 26.5696, | |
| "step": 2056 | |
| }, | |
| { | |
| "epoch": 4.971014492753623, | |
| "grad_norm": 6.577611923217773, | |
| "learning_rate": 3.4460547504025764e-05, | |
| "loss": 26.4256, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 4.975845410628019, | |
| "grad_norm": 7.024559020996094, | |
| "learning_rate": 3.429951690821256e-05, | |
| "loss": 24.9867, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 4.980676328502415, | |
| "grad_norm": 7.051502704620361, | |
| "learning_rate": 3.413848631239936e-05, | |
| "loss": 25.3915, | |
| "step": 2062 | |
| }, | |
| { | |
| "epoch": 4.9855072463768115, | |
| "grad_norm": 7.6836838722229, | |
| "learning_rate": 3.3977455716586155e-05, | |
| "loss": 24.8861, | |
| "step": 2064 | |
| }, | |
| { | |
| "epoch": 4.990338164251208, | |
| "grad_norm": 7.69392204284668, | |
| "learning_rate": 3.381642512077295e-05, | |
| "loss": 26.2732, | |
| "step": 2066 | |
| }, | |
| { | |
| "epoch": 4.995169082125604, | |
| "grad_norm": 7.139024257659912, | |
| "learning_rate": 3.365539452495974e-05, | |
| "loss": 24.6846, | |
| "step": 2068 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 6.70409631729126, | |
| "learning_rate": 3.349436392914654e-05, | |
| "loss": 27.4742, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 5.004830917874396, | |
| "grad_norm": 6.803808212280273, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 24.7629, | |
| "step": 2072 | |
| }, | |
| { | |
| "epoch": 5.009661835748792, | |
| "grad_norm": 6.341485977172852, | |
| "learning_rate": 3.3172302737520125e-05, | |
| "loss": 27.7422, | |
| "step": 2074 | |
| }, | |
| { | |
| "epoch": 5.0144927536231885, | |
| "grad_norm": 6.5449066162109375, | |
| "learning_rate": 3.301127214170693e-05, | |
| "loss": 26.3897, | |
| "step": 2076 | |
| }, | |
| { | |
| "epoch": 5.019323671497585, | |
| "grad_norm": 6.326546669006348, | |
| "learning_rate": 3.2850241545893725e-05, | |
| "loss": 26.7782, | |
| "step": 2078 | |
| }, | |
| { | |
| "epoch": 5.024154589371981, | |
| "grad_norm": 7.492796897888184, | |
| "learning_rate": 3.2689210950080515e-05, | |
| "loss": 25.2565, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 5.028985507246377, | |
| "grad_norm": 7.679995536804199, | |
| "learning_rate": 3.252818035426731e-05, | |
| "loss": 25.2879, | |
| "step": 2082 | |
| }, | |
| { | |
| "epoch": 5.033816425120773, | |
| "grad_norm": 6.634117126464844, | |
| "learning_rate": 3.236714975845411e-05, | |
| "loss": 27.5415, | |
| "step": 2084 | |
| }, | |
| { | |
| "epoch": 5.038647342995169, | |
| "grad_norm": 6.707841873168945, | |
| "learning_rate": 3.22061191626409e-05, | |
| "loss": 26.2413, | |
| "step": 2086 | |
| }, | |
| { | |
| "epoch": 5.043478260869565, | |
| "grad_norm": 7.303376197814941, | |
| "learning_rate": 3.2045088566827695e-05, | |
| "loss": 22.9713, | |
| "step": 2088 | |
| }, | |
| { | |
| "epoch": 5.048309178743962, | |
| "grad_norm": 5.641716957092285, | |
| "learning_rate": 3.188405797101449e-05, | |
| "loss": 23.7231, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 5.053140096618358, | |
| "grad_norm": 7.5472636222839355, | |
| "learning_rate": 3.172302737520129e-05, | |
| "loss": 26.3035, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 5.057971014492754, | |
| "grad_norm": 6.629962921142578, | |
| "learning_rate": 3.1561996779388086e-05, | |
| "loss": 27.2519, | |
| "step": 2094 | |
| }, | |
| { | |
| "epoch": 5.06280193236715, | |
| "grad_norm": 6.610307216644287, | |
| "learning_rate": 3.140096618357488e-05, | |
| "loss": 24.3596, | |
| "step": 2096 | |
| }, | |
| { | |
| "epoch": 5.067632850241546, | |
| "grad_norm": 8.222330093383789, | |
| "learning_rate": 3.123993558776168e-05, | |
| "loss": 26.812, | |
| "step": 2098 | |
| }, | |
| { | |
| "epoch": 5.072463768115942, | |
| "grad_norm": 7.391679763793945, | |
| "learning_rate": 3.107890499194847e-05, | |
| "loss": 23.9302, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 5.0772946859903385, | |
| "grad_norm": 7.474515914916992, | |
| "learning_rate": 3.0917874396135266e-05, | |
| "loss": 26.0697, | |
| "step": 2102 | |
| }, | |
| { | |
| "epoch": 5.082125603864735, | |
| "grad_norm": 6.373252868652344, | |
| "learning_rate": 3.075684380032206e-05, | |
| "loss": 26.3179, | |
| "step": 2104 | |
| }, | |
| { | |
| "epoch": 5.086956521739131, | |
| "grad_norm": 7.464061260223389, | |
| "learning_rate": 3.059581320450886e-05, | |
| "loss": 25.6336, | |
| "step": 2106 | |
| }, | |
| { | |
| "epoch": 5.091787439613527, | |
| "grad_norm": 6.995118618011475, | |
| "learning_rate": 3.0434782608695656e-05, | |
| "loss": 26.2471, | |
| "step": 2108 | |
| }, | |
| { | |
| "epoch": 5.096618357487923, | |
| "grad_norm": 7.116311550140381, | |
| "learning_rate": 3.027375201288245e-05, | |
| "loss": 26.614, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 5.101449275362318, | |
| "grad_norm": 6.943987846374512, | |
| "learning_rate": 3.0112721417069246e-05, | |
| "loss": 25.0339, | |
| "step": 2112 | |
| }, | |
| { | |
| "epoch": 5.106280193236715, | |
| "grad_norm": 7.350955009460449, | |
| "learning_rate": 2.995169082125604e-05, | |
| "loss": 26.2694, | |
| "step": 2114 | |
| }, | |
| { | |
| "epoch": 5.111111111111111, | |
| "grad_norm": 6.849686622619629, | |
| "learning_rate": 2.9790660225442833e-05, | |
| "loss": 27.1826, | |
| "step": 2116 | |
| }, | |
| { | |
| "epoch": 5.115942028985507, | |
| "grad_norm": 7.7651567459106445, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 24.8541, | |
| "step": 2118 | |
| }, | |
| { | |
| "epoch": 5.120772946859903, | |
| "grad_norm": 5.836477279663086, | |
| "learning_rate": 2.9468599033816423e-05, | |
| "loss": 25.0215, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 5.125603864734299, | |
| "grad_norm": 6.189184665679932, | |
| "learning_rate": 2.9307568438003223e-05, | |
| "loss": 26.9194, | |
| "step": 2122 | |
| }, | |
| { | |
| "epoch": 5.130434782608695, | |
| "grad_norm": 6.857696533203125, | |
| "learning_rate": 2.914653784219002e-05, | |
| "loss": 26.1156, | |
| "step": 2124 | |
| }, | |
| { | |
| "epoch": 5.1352657004830915, | |
| "grad_norm": 6.773160934448242, | |
| "learning_rate": 2.8985507246376814e-05, | |
| "loss": 25.4986, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 5.140096618357488, | |
| "grad_norm": 8.016234397888184, | |
| "learning_rate": 2.882447665056361e-05, | |
| "loss": 26.8887, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 5.144927536231884, | |
| "grad_norm": 7.765948295593262, | |
| "learning_rate": 2.8663446054750404e-05, | |
| "loss": 24.2822, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 5.14975845410628, | |
| "grad_norm": 7.044548511505127, | |
| "learning_rate": 2.8502415458937197e-05, | |
| "loss": 25.6158, | |
| "step": 2132 | |
| }, | |
| { | |
| "epoch": 5.154589371980676, | |
| "grad_norm": 6.452057361602783, | |
| "learning_rate": 2.8341384863123994e-05, | |
| "loss": 24.7033, | |
| "step": 2134 | |
| }, | |
| { | |
| "epoch": 5.159420289855072, | |
| "grad_norm": 6.443338394165039, | |
| "learning_rate": 2.8180354267310787e-05, | |
| "loss": 25.911, | |
| "step": 2136 | |
| }, | |
| { | |
| "epoch": 5.164251207729468, | |
| "grad_norm": 7.172874450683594, | |
| "learning_rate": 2.8019323671497587e-05, | |
| "loss": 25.0313, | |
| "step": 2138 | |
| }, | |
| { | |
| "epoch": 5.169082125603865, | |
| "grad_norm": 7.001052379608154, | |
| "learning_rate": 2.7858293075684384e-05, | |
| "loss": 27.4582, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 5.173913043478261, | |
| "grad_norm": 6.618391513824463, | |
| "learning_rate": 2.7697262479871177e-05, | |
| "loss": 25.3195, | |
| "step": 2142 | |
| }, | |
| { | |
| "epoch": 5.178743961352657, | |
| "grad_norm": 7.667540073394775, | |
| "learning_rate": 2.753623188405797e-05, | |
| "loss": 24.9384, | |
| "step": 2144 | |
| }, | |
| { | |
| "epoch": 5.183574879227053, | |
| "grad_norm": 7.570556163787842, | |
| "learning_rate": 2.7375201288244768e-05, | |
| "loss": 25.1559, | |
| "step": 2146 | |
| }, | |
| { | |
| "epoch": 5.188405797101449, | |
| "grad_norm": 8.569737434387207, | |
| "learning_rate": 2.721417069243156e-05, | |
| "loss": 24.5403, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 5.193236714975845, | |
| "grad_norm": 6.5838623046875, | |
| "learning_rate": 2.7053140096618358e-05, | |
| "loss": 25.5745, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 5.1980676328502415, | |
| "grad_norm": 6.626333713531494, | |
| "learning_rate": 2.689210950080515e-05, | |
| "loss": 25.6172, | |
| "step": 2152 | |
| }, | |
| { | |
| "epoch": 5.202898550724638, | |
| "grad_norm": 7.9010186195373535, | |
| "learning_rate": 2.673107890499195e-05, | |
| "loss": 25.8519, | |
| "step": 2154 | |
| }, | |
| { | |
| "epoch": 5.207729468599034, | |
| "grad_norm": 6.161978244781494, | |
| "learning_rate": 2.6570048309178748e-05, | |
| "loss": 24.2309, | |
| "step": 2156 | |
| }, | |
| { | |
| "epoch": 5.21256038647343, | |
| "grad_norm": 6.870685577392578, | |
| "learning_rate": 2.640901771336554e-05, | |
| "loss": 27.1665, | |
| "step": 2158 | |
| }, | |
| { | |
| "epoch": 5.217391304347826, | |
| "grad_norm": 7.303822040557861, | |
| "learning_rate": 2.6247987117552335e-05, | |
| "loss": 25.556, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 5.222222222222222, | |
| "grad_norm": 6.584065914154053, | |
| "learning_rate": 2.608695652173913e-05, | |
| "loss": 25.468, | |
| "step": 2162 | |
| }, | |
| { | |
| "epoch": 5.2270531400966185, | |
| "grad_norm": 7.221360683441162, | |
| "learning_rate": 2.5925925925925925e-05, | |
| "loss": 25.8624, | |
| "step": 2164 | |
| }, | |
| { | |
| "epoch": 5.231884057971015, | |
| "grad_norm": 7.08326530456543, | |
| "learning_rate": 2.576489533011272e-05, | |
| "loss": 26.5428, | |
| "step": 2166 | |
| }, | |
| { | |
| "epoch": 5.236714975845411, | |
| "grad_norm": 6.360510349273682, | |
| "learning_rate": 2.5603864734299522e-05, | |
| "loss": 26.695, | |
| "step": 2168 | |
| }, | |
| { | |
| "epoch": 5.241545893719807, | |
| "grad_norm": 7.52411413192749, | |
| "learning_rate": 2.5442834138486315e-05, | |
| "loss": 25.9067, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 5.246376811594203, | |
| "grad_norm": 6.968140602111816, | |
| "learning_rate": 2.5281803542673112e-05, | |
| "loss": 25.0371, | |
| "step": 2172 | |
| }, | |
| { | |
| "epoch": 5.251207729468599, | |
| "grad_norm": 7.372687339782715, | |
| "learning_rate": 2.5120772946859905e-05, | |
| "loss": 26.3727, | |
| "step": 2174 | |
| }, | |
| { | |
| "epoch": 5.256038647342995, | |
| "grad_norm": 7.292659759521484, | |
| "learning_rate": 2.49597423510467e-05, | |
| "loss": 26.8115, | |
| "step": 2176 | |
| }, | |
| { | |
| "epoch": 5.260869565217392, | |
| "grad_norm": 6.425929546356201, | |
| "learning_rate": 2.4798711755233495e-05, | |
| "loss": 27.7444, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 5.265700483091788, | |
| "grad_norm": 7.451976776123047, | |
| "learning_rate": 2.4637681159420292e-05, | |
| "loss": 26.5838, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 5.270531400966184, | |
| "grad_norm": 7.282567024230957, | |
| "learning_rate": 2.4476650563607086e-05, | |
| "loss": 25.9177, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 5.27536231884058, | |
| "grad_norm": 7.04587459564209, | |
| "learning_rate": 2.4315619967793882e-05, | |
| "loss": 25.8643, | |
| "step": 2184 | |
| }, | |
| { | |
| "epoch": 5.280193236714976, | |
| "grad_norm": 7.137731075286865, | |
| "learning_rate": 2.4154589371980676e-05, | |
| "loss": 26.6203, | |
| "step": 2186 | |
| }, | |
| { | |
| "epoch": 5.285024154589372, | |
| "grad_norm": 6.674662113189697, | |
| "learning_rate": 2.3993558776167472e-05, | |
| "loss": 25.3759, | |
| "step": 2188 | |
| }, | |
| { | |
| "epoch": 5.2898550724637685, | |
| "grad_norm": 6.6438164710998535, | |
| "learning_rate": 2.383252818035427e-05, | |
| "loss": 24.2837, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 5.294685990338165, | |
| "grad_norm": 7.651294708251953, | |
| "learning_rate": 2.3671497584541063e-05, | |
| "loss": 26.9551, | |
| "step": 2192 | |
| }, | |
| { | |
| "epoch": 5.29951690821256, | |
| "grad_norm": 6.606574058532715, | |
| "learning_rate": 2.351046698872786e-05, | |
| "loss": 24.8967, | |
| "step": 2194 | |
| }, | |
| { | |
| "epoch": 5.304347826086957, | |
| "grad_norm": 6.956263065338135, | |
| "learning_rate": 2.3349436392914656e-05, | |
| "loss": 26.3236, | |
| "step": 2196 | |
| }, | |
| { | |
| "epoch": 5.309178743961352, | |
| "grad_norm": 7.141554832458496, | |
| "learning_rate": 2.318840579710145e-05, | |
| "loss": 26.4252, | |
| "step": 2198 | |
| }, | |
| { | |
| "epoch": 5.314009661835748, | |
| "grad_norm": 6.030832290649414, | |
| "learning_rate": 2.3027375201288246e-05, | |
| "loss": 28.0596, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 5.318840579710145, | |
| "grad_norm": 6.431146621704102, | |
| "learning_rate": 2.286634460547504e-05, | |
| "loss": 26.1359, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 5.323671497584541, | |
| "grad_norm": 7.26776647567749, | |
| "learning_rate": 2.2705314009661836e-05, | |
| "loss": 23.1691, | |
| "step": 2204 | |
| }, | |
| { | |
| "epoch": 5.328502415458937, | |
| "grad_norm": 7.198235988616943, | |
| "learning_rate": 2.2544283413848633e-05, | |
| "loss": 24.2558, | |
| "step": 2206 | |
| }, | |
| { | |
| "epoch": 5.333333333333333, | |
| "grad_norm": 7.205248832702637, | |
| "learning_rate": 2.2383252818035427e-05, | |
| "loss": 26.1497, | |
| "step": 2208 | |
| }, | |
| { | |
| "epoch": 5.338164251207729, | |
| "grad_norm": 6.834975242614746, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 25.5315, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 5.342995169082125, | |
| "grad_norm": 6.981115341186523, | |
| "learning_rate": 2.206119162640902e-05, | |
| "loss": 25.0263, | |
| "step": 2212 | |
| }, | |
| { | |
| "epoch": 5.3478260869565215, | |
| "grad_norm": 6.798349380493164, | |
| "learning_rate": 2.1900161030595813e-05, | |
| "loss": 27.7364, | |
| "step": 2214 | |
| }, | |
| { | |
| "epoch": 5.352657004830918, | |
| "grad_norm": 7.136117458343506, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 26.0488, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 5.357487922705314, | |
| "grad_norm": 6.846739768981934, | |
| "learning_rate": 2.1578099838969404e-05, | |
| "loss": 24.9772, | |
| "step": 2218 | |
| }, | |
| { | |
| "epoch": 5.36231884057971, | |
| "grad_norm": 7.294228553771973, | |
| "learning_rate": 2.14170692431562e-05, | |
| "loss": 27.0872, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 5.367149758454106, | |
| "grad_norm": 7.222455978393555, | |
| "learning_rate": 2.1256038647342997e-05, | |
| "loss": 25.2418, | |
| "step": 2222 | |
| }, | |
| { | |
| "epoch": 5.371980676328502, | |
| "grad_norm": 6.867911338806152, | |
| "learning_rate": 2.109500805152979e-05, | |
| "loss": 26.6485, | |
| "step": 2224 | |
| }, | |
| { | |
| "epoch": 5.3768115942028984, | |
| "grad_norm": 7.119537353515625, | |
| "learning_rate": 2.0933977455716587e-05, | |
| "loss": 24.7069, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 5.381642512077295, | |
| "grad_norm": 6.486376762390137, | |
| "learning_rate": 2.0772946859903384e-05, | |
| "loss": 25.5981, | |
| "step": 2228 | |
| }, | |
| { | |
| "epoch": 5.386473429951691, | |
| "grad_norm": 6.030795097351074, | |
| "learning_rate": 2.0611916264090177e-05, | |
| "loss": 26.4169, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 5.391304347826087, | |
| "grad_norm": 6.1018171310424805, | |
| "learning_rate": 2.0450885668276974e-05, | |
| "loss": 25.8114, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 5.396135265700483, | |
| "grad_norm": 6.3123860359191895, | |
| "learning_rate": 2.028985507246377e-05, | |
| "loss": 27.2772, | |
| "step": 2234 | |
| }, | |
| { | |
| "epoch": 5.400966183574879, | |
| "grad_norm": 7.111965179443359, | |
| "learning_rate": 2.0128824476650564e-05, | |
| "loss": 22.8083, | |
| "step": 2236 | |
| }, | |
| { | |
| "epoch": 5.405797101449275, | |
| "grad_norm": 6.663313865661621, | |
| "learning_rate": 1.996779388083736e-05, | |
| "loss": 26.1006, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 5.4106280193236715, | |
| "grad_norm": 7.1827287673950195, | |
| "learning_rate": 1.9806763285024154e-05, | |
| "loss": 25.9993, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 5.415458937198068, | |
| "grad_norm": 6.989486217498779, | |
| "learning_rate": 1.964573268921095e-05, | |
| "loss": 27.1008, | |
| "step": 2242 | |
| }, | |
| { | |
| "epoch": 5.420289855072464, | |
| "grad_norm": 7.407745361328125, | |
| "learning_rate": 1.9484702093397748e-05, | |
| "loss": 25.4639, | |
| "step": 2244 | |
| }, | |
| { | |
| "epoch": 5.42512077294686, | |
| "grad_norm": 6.708901405334473, | |
| "learning_rate": 1.932367149758454e-05, | |
| "loss": 26.7837, | |
| "step": 2246 | |
| }, | |
| { | |
| "epoch": 5.429951690821256, | |
| "grad_norm": 6.670323848724365, | |
| "learning_rate": 1.9162640901771338e-05, | |
| "loss": 25.7905, | |
| "step": 2248 | |
| }, | |
| { | |
| "epoch": 5.434782608695652, | |
| "grad_norm": 7.481121063232422, | |
| "learning_rate": 1.9001610305958135e-05, | |
| "loss": 25.6084, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 5.4396135265700485, | |
| "grad_norm": 7.1586480140686035, | |
| "learning_rate": 1.8840579710144928e-05, | |
| "loss": 25.4086, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 5.444444444444445, | |
| "grad_norm": 6.693662166595459, | |
| "learning_rate": 1.8679549114331725e-05, | |
| "loss": 25.3542, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 5.449275362318841, | |
| "grad_norm": 6.597439289093018, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 24.7352, | |
| "step": 2256 | |
| }, | |
| { | |
| "epoch": 5.454106280193237, | |
| "grad_norm": 6.5035400390625, | |
| "learning_rate": 1.8357487922705315e-05, | |
| "loss": 25.9761, | |
| "step": 2258 | |
| }, | |
| { | |
| "epoch": 5.458937198067633, | |
| "grad_norm": 6.170787811279297, | |
| "learning_rate": 1.8196457326892112e-05, | |
| "loss": 26.8993, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 5.463768115942029, | |
| "grad_norm": 6.216879367828369, | |
| "learning_rate": 1.8035426731078905e-05, | |
| "loss": 23.9469, | |
| "step": 2262 | |
| }, | |
| { | |
| "epoch": 5.468599033816425, | |
| "grad_norm": 6.804856777191162, | |
| "learning_rate": 1.78743961352657e-05, | |
| "loss": 27.1758, | |
| "step": 2264 | |
| }, | |
| { | |
| "epoch": 5.473429951690822, | |
| "grad_norm": 7.740478038787842, | |
| "learning_rate": 1.77133655394525e-05, | |
| "loss": 26.7605, | |
| "step": 2266 | |
| }, | |
| { | |
| "epoch": 5.478260869565218, | |
| "grad_norm": 6.862391471862793, | |
| "learning_rate": 1.7552334943639292e-05, | |
| "loss": 26.0278, | |
| "step": 2268 | |
| }, | |
| { | |
| "epoch": 5.483091787439614, | |
| "grad_norm": 6.675685882568359, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 24.291, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 5.48792270531401, | |
| "grad_norm": 7.202348232269287, | |
| "learning_rate": 1.7230273752012882e-05, | |
| "loss": 25.3346, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 5.492753623188406, | |
| "grad_norm": 7.335130214691162, | |
| "learning_rate": 1.706924315619968e-05, | |
| "loss": 24.2558, | |
| "step": 2274 | |
| }, | |
| { | |
| "epoch": 5.4975845410628015, | |
| "grad_norm": 6.820517539978027, | |
| "learning_rate": 1.6908212560386476e-05, | |
| "loss": 24.8161, | |
| "step": 2276 | |
| }, | |
| { | |
| "epoch": 5.5024154589371985, | |
| "grad_norm": 6.23611307144165, | |
| "learning_rate": 1.674718196457327e-05, | |
| "loss": 25.4897, | |
| "step": 2278 | |
| }, | |
| { | |
| "epoch": 5.507246376811594, | |
| "grad_norm": 6.273251056671143, | |
| "learning_rate": 1.6586151368760062e-05, | |
| "loss": 25.4466, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 5.512077294685991, | |
| "grad_norm": 6.126486301422119, | |
| "learning_rate": 1.6425120772946863e-05, | |
| "loss": 26.4718, | |
| "step": 2282 | |
| }, | |
| { | |
| "epoch": 5.516908212560386, | |
| "grad_norm": 6.196963787078857, | |
| "learning_rate": 1.6264090177133656e-05, | |
| "loss": 25.5031, | |
| "step": 2284 | |
| }, | |
| { | |
| "epoch": 5.521739130434782, | |
| "grad_norm": 6.553043842315674, | |
| "learning_rate": 1.610305958132045e-05, | |
| "loss": 26.3876, | |
| "step": 2286 | |
| }, | |
| { | |
| "epoch": 5.526570048309178, | |
| "grad_norm": 6.308940887451172, | |
| "learning_rate": 1.5942028985507246e-05, | |
| "loss": 25.3395, | |
| "step": 2288 | |
| }, | |
| { | |
| "epoch": 5.531400966183575, | |
| "grad_norm": 5.9868059158325195, | |
| "learning_rate": 1.5780998389694043e-05, | |
| "loss": 26.1367, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 5.536231884057971, | |
| "grad_norm": 5.966738224029541, | |
| "learning_rate": 1.561996779388084e-05, | |
| "loss": 24.9832, | |
| "step": 2292 | |
| }, | |
| { | |
| "epoch": 5.541062801932367, | |
| "grad_norm": 6.130259990692139, | |
| "learning_rate": 1.5458937198067633e-05, | |
| "loss": 26.0377, | |
| "step": 2294 | |
| }, | |
| { | |
| "epoch": 5.545893719806763, | |
| "grad_norm": 6.351025104522705, | |
| "learning_rate": 1.529790660225443e-05, | |
| "loss": 25.387, | |
| "step": 2296 | |
| }, | |
| { | |
| "epoch": 5.550724637681159, | |
| "grad_norm": 7.592315673828125, | |
| "learning_rate": 1.5136876006441225e-05, | |
| "loss": 25.8822, | |
| "step": 2298 | |
| }, | |
| { | |
| "epoch": 5.555555555555555, | |
| "grad_norm": 7.366810321807861, | |
| "learning_rate": 1.497584541062802e-05, | |
| "loss": 25.716, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 5.5603864734299515, | |
| "grad_norm": 6.494503974914551, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 24.903, | |
| "step": 2302 | |
| }, | |
| { | |
| "epoch": 5.565217391304348, | |
| "grad_norm": 6.354084491729736, | |
| "learning_rate": 1.4653784219001612e-05, | |
| "loss": 24.19, | |
| "step": 2304 | |
| }, | |
| { | |
| "epoch": 5.570048309178744, | |
| "grad_norm": 6.83246374130249, | |
| "learning_rate": 1.4492753623188407e-05, | |
| "loss": 25.3202, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 5.57487922705314, | |
| "grad_norm": 7.3366379737854, | |
| "learning_rate": 1.4331723027375202e-05, | |
| "loss": 26.5993, | |
| "step": 2308 | |
| }, | |
| { | |
| "epoch": 5.579710144927536, | |
| "grad_norm": 6.854272842407227, | |
| "learning_rate": 1.4170692431561997e-05, | |
| "loss": 27.5142, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 5.584541062801932, | |
| "grad_norm": 7.033668041229248, | |
| "learning_rate": 1.4009661835748794e-05, | |
| "loss": 24.9908, | |
| "step": 2312 | |
| }, | |
| { | |
| "epoch": 5.5893719806763285, | |
| "grad_norm": 5.725836277008057, | |
| "learning_rate": 1.3848631239935589e-05, | |
| "loss": 26.9088, | |
| "step": 2314 | |
| }, | |
| { | |
| "epoch": 5.594202898550725, | |
| "grad_norm": 6.002683162689209, | |
| "learning_rate": 1.3687600644122384e-05, | |
| "loss": 26.1845, | |
| "step": 2316 | |
| }, | |
| { | |
| "epoch": 5.599033816425121, | |
| "grad_norm": 6.32890510559082, | |
| "learning_rate": 1.3526570048309179e-05, | |
| "loss": 24.4862, | |
| "step": 2318 | |
| }, | |
| { | |
| "epoch": 5.603864734299517, | |
| "grad_norm": 6.316839694976807, | |
| "learning_rate": 1.3365539452495976e-05, | |
| "loss": 25.2277, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 5.608695652173913, | |
| "grad_norm": 6.241401672363281, | |
| "learning_rate": 1.320450885668277e-05, | |
| "loss": 26.0008, | |
| "step": 2322 | |
| }, | |
| { | |
| "epoch": 5.613526570048309, | |
| "grad_norm": 6.929868221282959, | |
| "learning_rate": 1.3043478260869566e-05, | |
| "loss": 24.5377, | |
| "step": 2324 | |
| }, | |
| { | |
| "epoch": 5.618357487922705, | |
| "grad_norm": 6.343822956085205, | |
| "learning_rate": 1.288244766505636e-05, | |
| "loss": 24.4674, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 5.6231884057971016, | |
| "grad_norm": 7.933018684387207, | |
| "learning_rate": 1.2721417069243158e-05, | |
| "loss": 25.4539, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 5.628019323671498, | |
| "grad_norm": 6.561947345733643, | |
| "learning_rate": 1.2560386473429953e-05, | |
| "loss": 25.2955, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 5.632850241545894, | |
| "grad_norm": 7.06411075592041, | |
| "learning_rate": 1.2399355877616748e-05, | |
| "loss": 26.2145, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 5.63768115942029, | |
| "grad_norm": 8.267963409423828, | |
| "learning_rate": 1.2238325281803543e-05, | |
| "loss": 24.2516, | |
| "step": 2334 | |
| }, | |
| { | |
| "epoch": 5.642512077294686, | |
| "grad_norm": 7.202125072479248, | |
| "learning_rate": 1.2077294685990338e-05, | |
| "loss": 27.0705, | |
| "step": 2336 | |
| }, | |
| { | |
| "epoch": 5.647342995169082, | |
| "grad_norm": 6.419391632080078, | |
| "learning_rate": 1.1916264090177135e-05, | |
| "loss": 23.7174, | |
| "step": 2338 | |
| }, | |
| { | |
| "epoch": 5.6521739130434785, | |
| "grad_norm": 6.510631561279297, | |
| "learning_rate": 1.175523349436393e-05, | |
| "loss": 26.1881, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 5.657004830917875, | |
| "grad_norm": 7.408875465393066, | |
| "learning_rate": 1.1594202898550725e-05, | |
| "loss": 24.7877, | |
| "step": 2342 | |
| }, | |
| { | |
| "epoch": 5.661835748792271, | |
| "grad_norm": 6.503478050231934, | |
| "learning_rate": 1.143317230273752e-05, | |
| "loss": 24.4056, | |
| "step": 2344 | |
| }, | |
| { | |
| "epoch": 5.666666666666667, | |
| "grad_norm": 6.382200241088867, | |
| "learning_rate": 1.1272141706924317e-05, | |
| "loss": 25.4992, | |
| "step": 2346 | |
| }, | |
| { | |
| "epoch": 5.671497584541063, | |
| "grad_norm": 6.437609672546387, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 26.6456, | |
| "step": 2348 | |
| }, | |
| { | |
| "epoch": 5.676328502415459, | |
| "grad_norm": 6.871528625488281, | |
| "learning_rate": 1.0950080515297907e-05, | |
| "loss": 25.0839, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 5.681159420289855, | |
| "grad_norm": 8.16054630279541, | |
| "learning_rate": 1.0789049919484702e-05, | |
| "loss": 26.1491, | |
| "step": 2352 | |
| }, | |
| { | |
| "epoch": 5.685990338164252, | |
| "grad_norm": 6.024045467376709, | |
| "learning_rate": 1.0628019323671499e-05, | |
| "loss": 25.0689, | |
| "step": 2354 | |
| }, | |
| { | |
| "epoch": 5.690821256038648, | |
| "grad_norm": 7.976418972015381, | |
| "learning_rate": 1.0466988727858294e-05, | |
| "loss": 26.3663, | |
| "step": 2356 | |
| }, | |
| { | |
| "epoch": 5.695652173913043, | |
| "grad_norm": 5.949817657470703, | |
| "learning_rate": 1.0305958132045089e-05, | |
| "loss": 27.0973, | |
| "step": 2358 | |
| }, | |
| { | |
| "epoch": 5.70048309178744, | |
| "grad_norm": 6.103696823120117, | |
| "learning_rate": 1.0144927536231885e-05, | |
| "loss": 27.1756, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 5.705314009661835, | |
| "grad_norm": 6.458801746368408, | |
| "learning_rate": 9.98389694041868e-06, | |
| "loss": 25.2463, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 5.710144927536232, | |
| "grad_norm": 7.07081413269043, | |
| "learning_rate": 9.822866344605476e-06, | |
| "loss": 26.1127, | |
| "step": 2364 | |
| }, | |
| { | |
| "epoch": 5.714975845410628, | |
| "grad_norm": 8.160017967224121, | |
| "learning_rate": 9.66183574879227e-06, | |
| "loss": 26.3955, | |
| "step": 2366 | |
| }, | |
| { | |
| "epoch": 5.719806763285024, | |
| "grad_norm": 6.197200775146484, | |
| "learning_rate": 9.500805152979067e-06, | |
| "loss": 26.8253, | |
| "step": 2368 | |
| }, | |
| { | |
| "epoch": 5.72463768115942, | |
| "grad_norm": 7.202108860015869, | |
| "learning_rate": 9.339774557165862e-06, | |
| "loss": 26.4504, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 5.729468599033816, | |
| "grad_norm": 6.539680480957031, | |
| "learning_rate": 9.178743961352658e-06, | |
| "loss": 25.6419, | |
| "step": 2372 | |
| }, | |
| { | |
| "epoch": 5.734299516908212, | |
| "grad_norm": 7.3082756996154785, | |
| "learning_rate": 9.017713365539453e-06, | |
| "loss": 23.5667, | |
| "step": 2374 | |
| }, | |
| { | |
| "epoch": 5.739130434782608, | |
| "grad_norm": 6.585788726806641, | |
| "learning_rate": 8.85668276972625e-06, | |
| "loss": 27.0034, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 5.743961352657005, | |
| "grad_norm": 8.16417121887207, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 25.4119, | |
| "step": 2378 | |
| }, | |
| { | |
| "epoch": 5.748792270531401, | |
| "grad_norm": 6.153932571411133, | |
| "learning_rate": 8.53462157809984e-06, | |
| "loss": 26.3388, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 5.753623188405797, | |
| "grad_norm": 7.043217182159424, | |
| "learning_rate": 8.373590982286635e-06, | |
| "loss": 27.3709, | |
| "step": 2382 | |
| }, | |
| { | |
| "epoch": 5.758454106280193, | |
| "grad_norm": 6.4633588790893555, | |
| "learning_rate": 8.212560386473431e-06, | |
| "loss": 28.7053, | |
| "step": 2384 | |
| }, | |
| { | |
| "epoch": 5.763285024154589, | |
| "grad_norm": 7.188209056854248, | |
| "learning_rate": 8.051529790660225e-06, | |
| "loss": 26.8206, | |
| "step": 2386 | |
| }, | |
| { | |
| "epoch": 5.768115942028985, | |
| "grad_norm": 6.451449394226074, | |
| "learning_rate": 7.890499194847021e-06, | |
| "loss": 24.2479, | |
| "step": 2388 | |
| }, | |
| { | |
| "epoch": 5.7729468599033815, | |
| "grad_norm": 6.818403720855713, | |
| "learning_rate": 7.729468599033817e-06, | |
| "loss": 25.5073, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 5.777777777777778, | |
| "grad_norm": 7.128567218780518, | |
| "learning_rate": 7.568438003220612e-06, | |
| "loss": 25.6976, | |
| "step": 2392 | |
| }, | |
| { | |
| "epoch": 5.782608695652174, | |
| "grad_norm": 7.445803165435791, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 25.5917, | |
| "step": 2394 | |
| }, | |
| { | |
| "epoch": 5.78743961352657, | |
| "grad_norm": 6.30618953704834, | |
| "learning_rate": 7.246376811594203e-06, | |
| "loss": 24.4957, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 5.792270531400966, | |
| "grad_norm": 6.549522399902344, | |
| "learning_rate": 7.0853462157809985e-06, | |
| "loss": 24.7214, | |
| "step": 2398 | |
| }, | |
| { | |
| "epoch": 5.797101449275362, | |
| "grad_norm": 7.38835334777832, | |
| "learning_rate": 6.924315619967794e-06, | |
| "loss": 25.5254, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 5.8019323671497585, | |
| "grad_norm": 5.928407669067383, | |
| "learning_rate": 6.7632850241545894e-06, | |
| "loss": 26.4047, | |
| "step": 2402 | |
| }, | |
| { | |
| "epoch": 5.806763285024155, | |
| "grad_norm": 6.4094014167785645, | |
| "learning_rate": 6.602254428341385e-06, | |
| "loss": 27.6215, | |
| "step": 2404 | |
| }, | |
| { | |
| "epoch": 5.811594202898551, | |
| "grad_norm": 6.558480739593506, | |
| "learning_rate": 6.44122383252818e-06, | |
| "loss": 26.5965, | |
| "step": 2406 | |
| }, | |
| { | |
| "epoch": 5.816425120772947, | |
| "grad_norm": 6.696255207061768, | |
| "learning_rate": 6.280193236714976e-06, | |
| "loss": 26.2955, | |
| "step": 2408 | |
| }, | |
| { | |
| "epoch": 5.821256038647343, | |
| "grad_norm": 6.232416152954102, | |
| "learning_rate": 6.119162640901771e-06, | |
| "loss": 27.1097, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 5.826086956521739, | |
| "grad_norm": 6.8521199226379395, | |
| "learning_rate": 5.958132045088567e-06, | |
| "loss": 22.8195, | |
| "step": 2412 | |
| }, | |
| { | |
| "epoch": 5.830917874396135, | |
| "grad_norm": 6.833296298980713, | |
| "learning_rate": 5.797101449275362e-06, | |
| "loss": 26.155, | |
| "step": 2414 | |
| }, | |
| { | |
| "epoch": 5.835748792270532, | |
| "grad_norm": 7.534513473510742, | |
| "learning_rate": 5.636070853462158e-06, | |
| "loss": 25.9633, | |
| "step": 2416 | |
| }, | |
| { | |
| "epoch": 5.840579710144928, | |
| "grad_norm": 7.544939041137695, | |
| "learning_rate": 5.475040257648953e-06, | |
| "loss": 26.5301, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 5.845410628019324, | |
| "grad_norm": 6.818538188934326, | |
| "learning_rate": 5.314009661835749e-06, | |
| "loss": 27.4522, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 5.85024154589372, | |
| "grad_norm": 6.0586395263671875, | |
| "learning_rate": 5.152979066022544e-06, | |
| "loss": 24.9713, | |
| "step": 2422 | |
| }, | |
| { | |
| "epoch": 5.855072463768116, | |
| "grad_norm": 6.871267318725586, | |
| "learning_rate": 4.99194847020934e-06, | |
| "loss": 24.1245, | |
| "step": 2424 | |
| }, | |
| { | |
| "epoch": 5.859903381642512, | |
| "grad_norm": 6.431079387664795, | |
| "learning_rate": 4.830917874396135e-06, | |
| "loss": 26.3168, | |
| "step": 2426 | |
| }, | |
| { | |
| "epoch": 5.8647342995169085, | |
| "grad_norm": 6.309189319610596, | |
| "learning_rate": 4.669887278582931e-06, | |
| "loss": 25.1819, | |
| "step": 2428 | |
| }, | |
| { | |
| "epoch": 5.869565217391305, | |
| "grad_norm": 7.3601250648498535, | |
| "learning_rate": 4.508856682769726e-06, | |
| "loss": 25.487, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 5.874396135265701, | |
| "grad_norm": 6.830559730529785, | |
| "learning_rate": 4.347826086956522e-06, | |
| "loss": 24.2465, | |
| "step": 2432 | |
| }, | |
| { | |
| "epoch": 5.879227053140097, | |
| "grad_norm": 6.231956481933594, | |
| "learning_rate": 4.186795491143317e-06, | |
| "loss": 25.4524, | |
| "step": 2434 | |
| }, | |
| { | |
| "epoch": 5.884057971014493, | |
| "grad_norm": 7.170751094818115, | |
| "learning_rate": 4.025764895330112e-06, | |
| "loss": 25.4575, | |
| "step": 2436 | |
| }, | |
| { | |
| "epoch": 5.888888888888889, | |
| "grad_norm": 6.459787845611572, | |
| "learning_rate": 3.864734299516908e-06, | |
| "loss": 25.8052, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 5.8937198067632846, | |
| "grad_norm": 7.013184070587158, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 25.0231, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 5.898550724637682, | |
| "grad_norm": 6.488290786743164, | |
| "learning_rate": 3.5426731078904992e-06, | |
| "loss": 26.5961, | |
| "step": 2442 | |
| }, | |
| { | |
| "epoch": 5.903381642512077, | |
| "grad_norm": 6.819639205932617, | |
| "learning_rate": 3.3816425120772947e-06, | |
| "loss": 24.8935, | |
| "step": 2444 | |
| }, | |
| { | |
| "epoch": 5.908212560386474, | |
| "grad_norm": 6.606305122375488, | |
| "learning_rate": 3.22061191626409e-06, | |
| "loss": 25.7707, | |
| "step": 2446 | |
| }, | |
| { | |
| "epoch": 5.913043478260869, | |
| "grad_norm": 6.314495086669922, | |
| "learning_rate": 3.0595813204508857e-06, | |
| "loss": 25.5029, | |
| "step": 2448 | |
| }, | |
| { | |
| "epoch": 5.917874396135265, | |
| "grad_norm": 6.191902160644531, | |
| "learning_rate": 2.898550724637681e-06, | |
| "loss": 24.0935, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 5.9227053140096615, | |
| "grad_norm": 7.267618179321289, | |
| "learning_rate": 2.7375201288244767e-06, | |
| "loss": 26.7584, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 5.927536231884058, | |
| "grad_norm": 6.361123561859131, | |
| "learning_rate": 2.576489533011272e-06, | |
| "loss": 26.5634, | |
| "step": 2454 | |
| }, | |
| { | |
| "epoch": 5.932367149758454, | |
| "grad_norm": 6.8421173095703125, | |
| "learning_rate": 2.4154589371980677e-06, | |
| "loss": 25.2036, | |
| "step": 2456 | |
| }, | |
| { | |
| "epoch": 5.93719806763285, | |
| "grad_norm": 6.87398099899292, | |
| "learning_rate": 2.254428341384863e-06, | |
| "loss": 26.766, | |
| "step": 2458 | |
| }, | |
| { | |
| "epoch": 5.942028985507246, | |
| "grad_norm": 10.025320053100586, | |
| "learning_rate": 2.0933977455716586e-06, | |
| "loss": 24.7975, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 5.946859903381642, | |
| "grad_norm": 6.717752933502197, | |
| "learning_rate": 1.932367149758454e-06, | |
| "loss": 26.9315, | |
| "step": 2462 | |
| }, | |
| { | |
| "epoch": 5.951690821256038, | |
| "grad_norm": 6.499180793762207, | |
| "learning_rate": 1.7713365539452496e-06, | |
| "loss": 25.7196, | |
| "step": 2464 | |
| }, | |
| { | |
| "epoch": 5.956521739130435, | |
| "grad_norm": 6.772797107696533, | |
| "learning_rate": 1.610305958132045e-06, | |
| "loss": 23.9813, | |
| "step": 2466 | |
| }, | |
| { | |
| "epoch": 5.961352657004831, | |
| "grad_norm": 6.387327671051025, | |
| "learning_rate": 1.4492753623188406e-06, | |
| "loss": 24.631, | |
| "step": 2468 | |
| }, | |
| { | |
| "epoch": 5.966183574879227, | |
| "grad_norm": 6.289485931396484, | |
| "learning_rate": 1.288244766505636e-06, | |
| "loss": 26.311, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 5.971014492753623, | |
| "grad_norm": 6.260473251342773, | |
| "learning_rate": 1.1272141706924316e-06, | |
| "loss": 25.3955, | |
| "step": 2472 | |
| }, | |
| { | |
| "epoch": 5.975845410628019, | |
| "grad_norm": 6.831587791442871, | |
| "learning_rate": 9.66183574879227e-07, | |
| "loss": 26.9048, | |
| "step": 2474 | |
| }, | |
| { | |
| "epoch": 5.980676328502415, | |
| "grad_norm": 7.09013032913208, | |
| "learning_rate": 8.051529790660226e-07, | |
| "loss": 26.662, | |
| "step": 2476 | |
| }, | |
| { | |
| "epoch": 5.9855072463768115, | |
| "grad_norm": 6.909030914306641, | |
| "learning_rate": 6.44122383252818e-07, | |
| "loss": 24.5214, | |
| "step": 2478 | |
| }, | |
| { | |
| "epoch": 5.990338164251208, | |
| "grad_norm": 6.548914432525635, | |
| "learning_rate": 4.830917874396135e-07, | |
| "loss": 26.6767, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 5.995169082125604, | |
| "grad_norm": 6.59926176071167, | |
| "learning_rate": 3.22061191626409e-07, | |
| "loss": 27.5884, | |
| "step": 2482 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 6.204819679260254, | |
| "learning_rate": 1.610305958132045e-07, | |
| "loss": 25.388, | |
| "step": 2484 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "step": 2484, | |
| "total_flos": 629711046062928.0, | |
| "train_loss": 27.644595153857736, | |
| "train_runtime": 6156.0959, | |
| "train_samples_per_second": 6.456, | |
| "train_steps_per_second": 0.404 | |
| } | |
| ], | |
| "logging_steps": 2, | |
| "max_steps": 2484, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 6, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 629711046062928.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |