{ "best_metric": 0.809640924741761, "best_model_checkpoint": "fr-FR/wav2vec2-large-xlsr-53-french/checkpoint-6240", "epoch": 30.0, "eval_steps": 100, "global_step": 7200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.20833333333333334, "grad_norm": 0.9218843579292297, "learning_rate": 6.805555555555556e-06, "loss": 4.0814, "step": 50 }, { "epoch": 0.4166666666666667, "grad_norm": 1.4515026807785034, "learning_rate": 1.3750000000000002e-05, "loss": 4.0295, "step": 100 }, { "epoch": 0.625, "grad_norm": 1.471714973449707, "learning_rate": 2.0416666666666667e-05, "loss": 3.9372, "step": 150 }, { "epoch": 0.8333333333333334, "grad_norm": 1.8082315921783447, "learning_rate": 2.7361111111111114e-05, "loss": 3.8405, "step": 200 }, { "epoch": 1.0, "eval_accuracy": 0.11067388096409247, "eval_f1": 0.00751275114222314, "eval_loss": 3.695161819458008, "eval_runtime": 20.4138, "eval_samples_per_second": 99.589, "eval_steps_per_second": 2.106, "step": 240 }, { "epoch": 1.0416666666666667, "grad_norm": 1.8785988092422485, "learning_rate": 3.430555555555556e-05, "loss": 3.7489, "step": 250 }, { "epoch": 1.25, "grad_norm": 3.867368221282959, "learning_rate": 4.0833333333333334e-05, "loss": 3.6437, "step": 300 }, { "epoch": 1.4583333333333333, "grad_norm": 4.055644512176514, "learning_rate": 4.7777777777777784e-05, "loss": 3.5614, "step": 350 }, { "epoch": 1.6666666666666665, "grad_norm": 17.874731063842773, "learning_rate": 5.472222222222223e-05, "loss": 3.4364, "step": 400 }, { "epoch": 1.875, "grad_norm": 4.906755447387695, "learning_rate": 6.166666666666667e-05, "loss": 3.1946, "step": 450 }, { "epoch": 2.0, "eval_accuracy": 0.24889326119035907, "eval_f1": 0.05205393334077855, "eval_loss": 2.838548183441162, "eval_runtime": 20.4558, "eval_samples_per_second": 99.385, "eval_steps_per_second": 2.102, "step": 480 }, { "epoch": 2.0833333333333335, "grad_norm": 8.265469551086426, "learning_rate": 6.861111111111111e-05, "loss": 2.9581, "step": 500 }, { "epoch": 2.2916666666666665, "grad_norm": 3.2242982387542725, "learning_rate": 7.555555555555556e-05, "loss": 2.7144, "step": 550 }, { "epoch": 2.5, "grad_norm": 4.992274761199951, "learning_rate": 8.25e-05, "loss": 2.4869, "step": 600 }, { "epoch": 2.7083333333333335, "grad_norm": 3.4717137813568115, "learning_rate": 8.944444444444446e-05, "loss": 2.3462, "step": 650 }, { "epoch": 2.9166666666666665, "grad_norm": 6.193606853485107, "learning_rate": 9.63888888888889e-05, "loss": 2.1909, "step": 700 }, { "epoch": 3.0, "eval_accuracy": 0.4746679783571077, "eval_f1": 0.2319910941132076, "eval_loss": 1.9486836194992065, "eval_runtime": 20.4255, "eval_samples_per_second": 99.532, "eval_steps_per_second": 2.105, "step": 720 }, { "epoch": 3.125, "grad_norm": 4.327600002288818, "learning_rate": 9.962962962962963e-05, "loss": 1.9321, "step": 750 }, { "epoch": 3.3333333333333335, "grad_norm": 5.130202293395996, "learning_rate": 9.885802469135803e-05, "loss": 1.7724, "step": 800 }, { "epoch": 3.5416666666666665, "grad_norm": 5.533684253692627, "learning_rate": 9.808641975308642e-05, "loss": 1.6968, "step": 850 }, { "epoch": 3.75, "grad_norm": 5.2867841720581055, "learning_rate": 9.731481481481482e-05, "loss": 1.5704, "step": 900 }, { "epoch": 3.9583333333333335, "grad_norm": 7.265196800231934, "learning_rate": 9.654320987654321e-05, "loss": 1.4959, "step": 950 }, { "epoch": 4.0, "eval_accuracy": 0.6640432857845548, "eval_f1": 0.46395598256027126, "eval_loss": 1.4248884916305542, "eval_runtime": 20.479, "eval_samples_per_second": 99.272, "eval_steps_per_second": 2.1, "step": 960 }, { "epoch": 4.166666666666667, "grad_norm": 3.4572534561157227, "learning_rate": 9.577160493827161e-05, "loss": 1.2484, "step": 1000 }, { "epoch": 4.375, "grad_norm": 4.301657199859619, "learning_rate": 9.5e-05, "loss": 1.2147, "step": 1050 }, { "epoch": 4.583333333333333, "grad_norm": 3.839873790740967, "learning_rate": 9.42283950617284e-05, "loss": 1.1932, "step": 1100 }, { "epoch": 4.791666666666667, "grad_norm": 5.490840911865234, "learning_rate": 9.34567901234568e-05, "loss": 1.0912, "step": 1150 }, { "epoch": 5.0, "grad_norm": 6.172304153442383, "learning_rate": 9.268518518518519e-05, "loss": 1.0914, "step": 1200 }, { "epoch": 5.0, "eval_accuracy": 0.7009345794392523, "eval_f1": 0.5684208573594706, "eval_loss": 1.2621203660964966, "eval_runtime": 20.705, "eval_samples_per_second": 98.189, "eval_steps_per_second": 2.077, "step": 1200 }, { "epoch": 5.208333333333333, "grad_norm": 6.997086524963379, "learning_rate": 9.191358024691359e-05, "loss": 0.8676, "step": 1250 }, { "epoch": 5.416666666666667, "grad_norm": 5.355091094970703, "learning_rate": 9.114197530864198e-05, "loss": 0.8161, "step": 1300 }, { "epoch": 5.625, "grad_norm": 4.8548126220703125, "learning_rate": 9.037037037037038e-05, "loss": 0.8341, "step": 1350 }, { "epoch": 5.833333333333333, "grad_norm": 5.332413673400879, "learning_rate": 8.959876543209877e-05, "loss": 0.7733, "step": 1400 }, { "epoch": 6.0, "eval_accuracy": 0.7584849975405804, "eval_f1": 0.6334292278351681, "eval_loss": 1.077911138534546, "eval_runtime": 20.457, "eval_samples_per_second": 99.379, "eval_steps_per_second": 2.102, "step": 1440 }, { "epoch": 6.041666666666667, "grad_norm": 6.060237407684326, "learning_rate": 8.882716049382717e-05, "loss": 0.7172, "step": 1450 }, { "epoch": 6.25, "grad_norm": 2.757173776626587, "learning_rate": 8.805555555555556e-05, "loss": 0.5509, "step": 1500 }, { "epoch": 6.458333333333333, "grad_norm": 4.485584259033203, "learning_rate": 8.728395061728394e-05, "loss": 0.5957, "step": 1550 }, { "epoch": 6.666666666666667, "grad_norm": 6.723128795623779, "learning_rate": 8.651234567901235e-05, "loss": 0.6174, "step": 1600 }, { "epoch": 6.875, "grad_norm": 3.555032253265381, "learning_rate": 8.574074074074075e-05, "loss": 0.6108, "step": 1650 }, { "epoch": 7.0, "eval_accuracy": 0.7560255779636006, "eval_f1": 0.6705683771131548, "eval_loss": 1.0672900676727295, "eval_runtime": 20.4072, "eval_samples_per_second": 99.622, "eval_steps_per_second": 2.107, "step": 1680 }, { "epoch": 7.083333333333333, "grad_norm": 3.6226491928100586, "learning_rate": 8.496913580246913e-05, "loss": 0.5246, "step": 1700 }, { "epoch": 7.291666666666667, "grad_norm": 3.4016356468200684, "learning_rate": 8.419753086419754e-05, "loss": 0.4496, "step": 1750 }, { "epoch": 7.5, "grad_norm": 4.2375264167785645, "learning_rate": 8.342592592592593e-05, "loss": 0.4367, "step": 1800 }, { "epoch": 7.708333333333333, "grad_norm": 2.5992074012756348, "learning_rate": 8.265432098765432e-05, "loss": 0.5159, "step": 1850 }, { "epoch": 7.916666666666667, "grad_norm": 7.560590744018555, "learning_rate": 8.188271604938272e-05, "loss": 0.4458, "step": 1900 }, { "epoch": 8.0, "eval_accuracy": 0.7776684702410231, "eval_f1": 0.7048604982995362, "eval_loss": 1.1228270530700684, "eval_runtime": 20.455, "eval_samples_per_second": 99.389, "eval_steps_per_second": 2.102, "step": 1920 }, { "epoch": 8.125, "grad_norm": 9.330732345581055, "learning_rate": 8.111111111111112e-05, "loss": 0.4197, "step": 1950 }, { "epoch": 8.333333333333334, "grad_norm": 7.718836784362793, "learning_rate": 8.03395061728395e-05, "loss": 0.3656, "step": 2000 }, { "epoch": 8.541666666666666, "grad_norm": 7.252094745635986, "learning_rate": 7.956790123456791e-05, "loss": 0.338, "step": 2050 }, { "epoch": 8.75, "grad_norm": 9.499176979064941, "learning_rate": 7.87962962962963e-05, "loss": 0.3823, "step": 2100 }, { "epoch": 8.958333333333334, "grad_norm": 7.217687606811523, "learning_rate": 7.802469135802469e-05, "loss": 0.3287, "step": 2150 }, { "epoch": 9.0, "eval_accuracy": 0.7717658632562715, "eval_f1": 0.7050420908730758, "eval_loss": 1.202376127243042, "eval_runtime": 20.4574, "eval_samples_per_second": 99.377, "eval_steps_per_second": 2.102, "step": 2160 }, { "epoch": 9.166666666666666, "grad_norm": 4.578555107116699, "learning_rate": 7.725308641975308e-05, "loss": 0.2838, "step": 2200 }, { "epoch": 9.375, "grad_norm": 2.4536349773406982, "learning_rate": 7.648148148148149e-05, "loss": 0.2313, "step": 2250 }, { "epoch": 9.583333333333334, "grad_norm": 4.319491386413574, "learning_rate": 7.570987654320989e-05, "loss": 0.2829, "step": 2300 }, { "epoch": 9.791666666666666, "grad_norm": 3.8762989044189453, "learning_rate": 7.493827160493827e-05, "loss": 0.2982, "step": 2350 }, { "epoch": 10.0, "grad_norm": 9.137697219848633, "learning_rate": 7.416666666666668e-05, "loss": 0.2679, "step": 2400 }, { "epoch": 10.0, "eval_accuracy": 0.7722577471716675, "eval_f1": 0.7063199437483717, "eval_loss": 1.1973373889923096, "eval_runtime": 20.4416, "eval_samples_per_second": 99.454, "eval_steps_per_second": 2.104, "step": 2400 }, { "epoch": 10.208333333333334, "grad_norm": 12.147871971130371, "learning_rate": 7.339506172839507e-05, "loss": 0.1733, "step": 2450 }, { "epoch": 10.416666666666666, "grad_norm": 3.568844795227051, "learning_rate": 7.262345679012345e-05, "loss": 0.2035, "step": 2500 }, { "epoch": 10.625, "grad_norm": 2.700684070587158, "learning_rate": 7.185185185185186e-05, "loss": 0.2191, "step": 2550 }, { "epoch": 10.833333333333334, "grad_norm": 5.541505813598633, "learning_rate": 7.108024691358026e-05, "loss": 0.1939, "step": 2600 }, { "epoch": 11.0, "eval_accuracy": 0.7835710772257747, "eval_f1": 0.720977116582221, "eval_loss": 1.2967644929885864, "eval_runtime": 20.4516, "eval_samples_per_second": 99.405, "eval_steps_per_second": 2.103, "step": 2640 }, { "epoch": 11.041666666666666, "grad_norm": 5.511509418487549, "learning_rate": 7.030864197530864e-05, "loss": 0.1774, "step": 2650 }, { "epoch": 11.25, "grad_norm": 7.002375602722168, "learning_rate": 6.953703703703705e-05, "loss": 0.227, "step": 2700 }, { "epoch": 11.458333333333334, "grad_norm": 9.876754760742188, "learning_rate": 6.876543209876544e-05, "loss": 0.1608, "step": 2750 }, { "epoch": 11.666666666666666, "grad_norm": 4.028517246246338, "learning_rate": 6.799382716049382e-05, "loss": 0.1539, "step": 2800 }, { "epoch": 11.875, "grad_norm": 15.034867286682129, "learning_rate": 6.722222222222223e-05, "loss": 0.1285, "step": 2850 }, { "epoch": 12.0, "eval_accuracy": 0.778652238071815, "eval_f1": 0.7160733882558296, "eval_loss": 1.355954647064209, "eval_runtime": 20.4496, "eval_samples_per_second": 99.415, "eval_steps_per_second": 2.103, "step": 2880 }, { "epoch": 12.083333333333334, "grad_norm": 7.225032329559326, "learning_rate": 6.645061728395063e-05, "loss": 0.2065, "step": 2900 }, { "epoch": 12.291666666666666, "grad_norm": 4.554244518280029, "learning_rate": 6.567901234567901e-05, "loss": 0.1276, "step": 2950 }, { "epoch": 12.5, "grad_norm": 2.6531524658203125, "learning_rate": 6.49074074074074e-05, "loss": 0.1232, "step": 3000 }, { "epoch": 12.708333333333334, "grad_norm": 5.957273483276367, "learning_rate": 6.413580246913581e-05, "loss": 0.1509, "step": 3050 }, { "epoch": 12.916666666666666, "grad_norm": 4.0668230056762695, "learning_rate": 6.33641975308642e-05, "loss": 0.1121, "step": 3100 }, { "epoch": 13.0, "eval_accuracy": 0.7865223807181505, "eval_f1": 0.7434181644630903, "eval_loss": 1.4407403469085693, "eval_runtime": 20.4749, "eval_samples_per_second": 99.292, "eval_steps_per_second": 2.1, "step": 3120 }, { "epoch": 13.125, "grad_norm": 11.889551162719727, "learning_rate": 6.259259259259259e-05, "loss": 0.1363, "step": 3150 }, { "epoch": 13.333333333333334, "grad_norm": 1.546557903289795, "learning_rate": 6.1820987654321e-05, "loss": 0.1152, "step": 3200 }, { "epoch": 13.541666666666666, "grad_norm": 1.3274074792861938, "learning_rate": 6.104938271604938e-05, "loss": 0.119, "step": 3250 }, { "epoch": 13.75, "grad_norm": 0.8454407453536987, "learning_rate": 6.0277777777777776e-05, "loss": 0.1359, "step": 3300 }, { "epoch": 13.958333333333334, "grad_norm": 4.895136833190918, "learning_rate": 5.950617283950618e-05, "loss": 0.1104, "step": 3350 }, { "epoch": 14.0, "eval_accuracy": 0.794884407279882, "eval_f1": 0.728210488213408, "eval_loss": 1.4156461954116821, "eval_runtime": 20.4772, "eval_samples_per_second": 99.281, "eval_steps_per_second": 2.1, "step": 3360 }, { "epoch": 14.166666666666666, "grad_norm": 0.6925948858261108, "learning_rate": 5.8734567901234573e-05, "loss": 0.1333, "step": 3400 }, { "epoch": 14.375, "grad_norm": 1.822946548461914, "learning_rate": 5.796296296296296e-05, "loss": 0.1256, "step": 3450 }, { "epoch": 14.583333333333334, "grad_norm": 0.7578862309455872, "learning_rate": 5.7191358024691364e-05, "loss": 0.1011, "step": 3500 }, { "epoch": 14.791666666666666, "grad_norm": 3.3160698413848877, "learning_rate": 5.641975308641976e-05, "loss": 0.1211, "step": 3550 }, { "epoch": 15.0, "grad_norm": 1.1490178108215332, "learning_rate": 5.564814814814815e-05, "loss": 0.0796, "step": 3600 }, { "epoch": 15.0, "eval_accuracy": 0.7968519429414658, "eval_f1": 0.7394952163587775, "eval_loss": 1.4435786008834839, "eval_runtime": 20.3922, "eval_samples_per_second": 99.695, "eval_steps_per_second": 2.109, "step": 3600 }, { "epoch": 15.208333333333334, "grad_norm": 6.289586544036865, "learning_rate": 5.487654320987654e-05, "loss": 0.0945, "step": 3650 }, { "epoch": 15.416666666666666, "grad_norm": 10.208250045776367, "learning_rate": 5.4104938271604945e-05, "loss": 0.0861, "step": 3700 }, { "epoch": 15.625, "grad_norm": 1.136026382446289, "learning_rate": 5.333333333333333e-05, "loss": 0.0864, "step": 3750 }, { "epoch": 15.833333333333334, "grad_norm": 0.8334448933601379, "learning_rate": 5.256172839506173e-05, "loss": 0.0905, "step": 3800 }, { "epoch": 16.0, "eval_accuracy": 0.7963600590260699, "eval_f1": 0.7325823030589725, "eval_loss": 1.438016653060913, "eval_runtime": 20.3702, "eval_samples_per_second": 99.802, "eval_steps_per_second": 2.111, "step": 3840 }, { "epoch": 16.041666666666668, "grad_norm": 0.7158970236778259, "learning_rate": 5.179012345679013e-05, "loss": 0.1214, "step": 3850 }, { "epoch": 16.25, "grad_norm": 1.5812829732894897, "learning_rate": 5.101851851851852e-05, "loss": 0.1095, "step": 3900 }, { "epoch": 16.458333333333332, "grad_norm": 2.2587368488311768, "learning_rate": 5.0246913580246914e-05, "loss": 0.0408, "step": 3950 }, { "epoch": 16.666666666666668, "grad_norm": 0.5447946786880493, "learning_rate": 4.947530864197531e-05, "loss": 0.0702, "step": 4000 }, { "epoch": 16.875, "grad_norm": 3.9040842056274414, "learning_rate": 4.8703703703703704e-05, "loss": 0.0923, "step": 4050 }, { "epoch": 17.0, "eval_accuracy": 0.794392523364486, "eval_f1": 0.7298191210606124, "eval_loss": 1.507544994354248, "eval_runtime": 20.3938, "eval_samples_per_second": 99.687, "eval_steps_per_second": 2.108, "step": 4080 }, { "epoch": 17.083333333333332, "grad_norm": 3.518707513809204, "learning_rate": 4.79320987654321e-05, "loss": 0.0735, "step": 4100 }, { "epoch": 17.291666666666668, "grad_norm": 0.4796667993068695, "learning_rate": 4.7160493827160495e-05, "loss": 0.0507, "step": 4150 }, { "epoch": 17.5, "grad_norm": 3.0742526054382324, "learning_rate": 4.638888888888889e-05, "loss": 0.0672, "step": 4200 }, { "epoch": 17.708333333333332, "grad_norm": 1.2230010032653809, "learning_rate": 4.5617283950617285e-05, "loss": 0.0514, "step": 4250 }, { "epoch": 17.916666666666668, "grad_norm": 0.14751353859901428, "learning_rate": 4.484567901234568e-05, "loss": 0.0824, "step": 4300 }, { "epoch": 18.0, "eval_accuracy": 0.8061977373339891, "eval_f1": 0.749870901060161, "eval_loss": 1.4922131299972534, "eval_runtime": 20.3644, "eval_samples_per_second": 99.831, "eval_steps_per_second": 2.112, "step": 4320 }, { "epoch": 18.125, "grad_norm": 0.12471245229244232, "learning_rate": 4.4074074074074076e-05, "loss": 0.0599, "step": 4350 }, { "epoch": 18.333333333333332, "grad_norm": 1.4798372983932495, "learning_rate": 4.330246913580247e-05, "loss": 0.0587, "step": 4400 }, { "epoch": 18.541666666666668, "grad_norm": 0.8141723275184631, "learning_rate": 4.2530864197530866e-05, "loss": 0.0467, "step": 4450 }, { "epoch": 18.75, "grad_norm": 0.13890697062015533, "learning_rate": 4.175925925925926e-05, "loss": 0.0892, "step": 4500 }, { "epoch": 18.958333333333332, "grad_norm": 0.3230196535587311, "learning_rate": 4.0987654320987657e-05, "loss": 0.058, "step": 4550 }, { "epoch": 19.0, "eval_accuracy": 0.8042302016724053, "eval_f1": 0.7482023120526493, "eval_loss": 1.4970366954803467, "eval_runtime": 20.3755, "eval_samples_per_second": 99.777, "eval_steps_per_second": 2.11, "step": 4560 }, { "epoch": 19.166666666666668, "grad_norm": 1.9698463678359985, "learning_rate": 4.021604938271605e-05, "loss": 0.045, "step": 4600 }, { "epoch": 19.375, "grad_norm": 0.1649627834558487, "learning_rate": 3.944444444444445e-05, "loss": 0.0405, "step": 4650 }, { "epoch": 19.583333333333332, "grad_norm": 3.7299489974975586, "learning_rate": 3.867283950617284e-05, "loss": 0.0399, "step": 4700 }, { "epoch": 19.791666666666668, "grad_norm": 5.240474224090576, "learning_rate": 3.790123456790123e-05, "loss": 0.0572, "step": 4750 }, { "epoch": 20.0, "grad_norm": 0.2733432352542877, "learning_rate": 3.712962962962963e-05, "loss": 0.0458, "step": 4800 }, { "epoch": 20.0, "eval_accuracy": 0.8047220855878012, "eval_f1": 0.759032642851733, "eval_loss": 1.4953453540802002, "eval_runtime": 20.389, "eval_samples_per_second": 99.711, "eval_steps_per_second": 2.109, "step": 4800 }, { "epoch": 20.208333333333332, "grad_norm": 0.14918775856494904, "learning_rate": 3.635802469135803e-05, "loss": 0.0744, "step": 4850 }, { "epoch": 20.416666666666668, "grad_norm": 2.150629997253418, "learning_rate": 3.5586419753086416e-05, "loss": 0.0449, "step": 4900 }, { "epoch": 20.625, "grad_norm": 4.421195030212402, "learning_rate": 3.481481481481482e-05, "loss": 0.0447, "step": 4950 }, { "epoch": 20.833333333333332, "grad_norm": 1.4415723085403442, "learning_rate": 3.4043209876543214e-05, "loss": 0.0559, "step": 5000 }, { "epoch": 21.0, "eval_accuracy": 0.8042302016724053, "eval_f1": 0.7419279898223464, "eval_loss": 1.488375186920166, "eval_runtime": 20.3567, "eval_samples_per_second": 99.869, "eval_steps_per_second": 2.112, "step": 5040 }, { "epoch": 21.041666666666668, "grad_norm": 2.6467976570129395, "learning_rate": 3.327160493827161e-05, "loss": 0.0469, "step": 5050 }, { "epoch": 21.25, "grad_norm": 2.713979721069336, "learning_rate": 3.2500000000000004e-05, "loss": 0.0619, "step": 5100 }, { "epoch": 21.458333333333332, "grad_norm": 0.12520253658294678, "learning_rate": 3.172839506172839e-05, "loss": 0.0484, "step": 5150 }, { "epoch": 21.666666666666668, "grad_norm": 0.26928290724754333, "learning_rate": 3.0956790123456794e-05, "loss": 0.048, "step": 5200 }, { "epoch": 21.875, "grad_norm": 1.4954543113708496, "learning_rate": 3.018518518518519e-05, "loss": 0.0492, "step": 5250 }, { "epoch": 22.0, "eval_accuracy": 0.8071815051647812, "eval_f1": 0.755515388463863, "eval_loss": 1.4774829149246216, "eval_runtime": 20.3932, "eval_samples_per_second": 99.69, "eval_steps_per_second": 2.109, "step": 5280 }, { "epoch": 22.083333333333332, "grad_norm": 0.3210670053958893, "learning_rate": 2.941358024691358e-05, "loss": 0.0692, "step": 5300 }, { "epoch": 22.291666666666668, "grad_norm": 0.19762489199638367, "learning_rate": 2.8641975308641977e-05, "loss": 0.0577, "step": 5350 }, { "epoch": 22.5, "grad_norm": 4.574896812438965, "learning_rate": 2.7870370370370375e-05, "loss": 0.0642, "step": 5400 }, { "epoch": 22.708333333333332, "grad_norm": 1.4412972927093506, "learning_rate": 2.7098765432098767e-05, "loss": 0.0369, "step": 5450 }, { "epoch": 22.916666666666668, "grad_norm": 2.7273476123809814, "learning_rate": 2.6327160493827162e-05, "loss": 0.0434, "step": 5500 }, { "epoch": 23.0, "eval_accuracy": 0.8017707820954255, "eval_f1": 0.7429558126046805, "eval_loss": 1.537632703781128, "eval_runtime": 20.3829, "eval_samples_per_second": 99.741, "eval_steps_per_second": 2.11, "step": 5520 }, { "epoch": 23.125, "grad_norm": 0.2368297278881073, "learning_rate": 2.5555555555555554e-05, "loss": 0.0527, "step": 5550 }, { "epoch": 23.333333333333332, "grad_norm": 4.433579444885254, "learning_rate": 2.4783950617283953e-05, "loss": 0.0377, "step": 5600 }, { "epoch": 23.541666666666668, "grad_norm": 2.415238857269287, "learning_rate": 2.4012345679012348e-05, "loss": 0.0368, "step": 5650 }, { "epoch": 23.75, "grad_norm": 3.2707390785217285, "learning_rate": 2.324074074074074e-05, "loss": 0.026, "step": 5700 }, { "epoch": 23.958333333333332, "grad_norm": 4.392247676849365, "learning_rate": 2.246913580246914e-05, "loss": 0.0602, "step": 5750 }, { "epoch": 24.0, "eval_accuracy": 0.807673389080177, "eval_f1": 0.7653388681022892, "eval_loss": 1.521716833114624, "eval_runtime": 20.37, "eval_samples_per_second": 99.804, "eval_steps_per_second": 2.111, "step": 5760 }, { "epoch": 24.166666666666668, "grad_norm": 0.05092393606901169, "learning_rate": 2.1697530864197534e-05, "loss": 0.0416, "step": 5800 }, { "epoch": 24.375, "grad_norm": 3.7013747692108154, "learning_rate": 2.0925925925925925e-05, "loss": 0.0418, "step": 5850 }, { "epoch": 24.583333333333332, "grad_norm": 0.25866004824638367, "learning_rate": 2.015432098765432e-05, "loss": 0.0389, "step": 5900 }, { "epoch": 24.791666666666668, "grad_norm": 0.5216749310493469, "learning_rate": 1.938271604938272e-05, "loss": 0.0613, "step": 5950 }, { "epoch": 25.0, "grad_norm": 0.40118977427482605, "learning_rate": 1.861111111111111e-05, "loss": 0.0305, "step": 6000 }, { "epoch": 25.0, "eval_accuracy": 0.809149040826365, "eval_f1": 0.7530485406738984, "eval_loss": 1.5094499588012695, "eval_runtime": 20.3828, "eval_samples_per_second": 99.741, "eval_steps_per_second": 2.11, "step": 6000 }, { "epoch": 25.208333333333332, "grad_norm": 5.624143123626709, "learning_rate": 1.7839506172839506e-05, "loss": 0.0235, "step": 6050 }, { "epoch": 25.416666666666668, "grad_norm": 0.1935749650001526, "learning_rate": 1.70679012345679e-05, "loss": 0.0186, "step": 6100 }, { "epoch": 25.625, "grad_norm": 1.6896755695343018, "learning_rate": 1.62962962962963e-05, "loss": 0.0282, "step": 6150 }, { "epoch": 25.833333333333332, "grad_norm": 0.05880526453256607, "learning_rate": 1.5524691358024692e-05, "loss": 0.0438, "step": 6200 }, { "epoch": 26.0, "eval_accuracy": 0.809640924741761, "eval_f1": 0.757063786404648, "eval_loss": 1.5237007141113281, "eval_runtime": 20.188, "eval_samples_per_second": 100.703, "eval_steps_per_second": 2.13, "step": 6240 }, { "epoch": 26.041666666666668, "grad_norm": 0.083707794547081, "learning_rate": 1.4753086419753087e-05, "loss": 0.018, "step": 6250 }, { "epoch": 26.25, "grad_norm": 0.07946202903985977, "learning_rate": 1.3981481481481482e-05, "loss": 0.0187, "step": 6300 }, { "epoch": 26.458333333333332, "grad_norm": 0.83076411485672, "learning_rate": 1.3209876543209876e-05, "loss": 0.0267, "step": 6350 }, { "epoch": 26.666666666666668, "grad_norm": 0.31989285349845886, "learning_rate": 1.2438271604938271e-05, "loss": 0.018, "step": 6400 }, { "epoch": 26.875, "grad_norm": 0.5059137344360352, "learning_rate": 1.1666666666666668e-05, "loss": 0.0227, "step": 6450 }, { "epoch": 27.0, "eval_accuracy": 0.809640924741761, "eval_f1": 0.7535283554310079, "eval_loss": 1.527543306350708, "eval_runtime": 20.389, "eval_samples_per_second": 99.711, "eval_steps_per_second": 2.109, "step": 6480 }, { "epoch": 27.083333333333332, "grad_norm": 2.3440067768096924, "learning_rate": 1.0895061728395061e-05, "loss": 0.0136, "step": 6500 }, { "epoch": 27.291666666666668, "grad_norm": 0.06165444850921631, "learning_rate": 1.0123456790123458e-05, "loss": 0.0621, "step": 6550 }, { "epoch": 27.5, "grad_norm": 0.1119479387998581, "learning_rate": 9.351851851851852e-06, "loss": 0.0513, "step": 6600 }, { "epoch": 27.708333333333332, "grad_norm": 0.18408450484275818, "learning_rate": 8.580246913580247e-06, "loss": 0.0132, "step": 6650 }, { "epoch": 27.916666666666668, "grad_norm": 2.2639083862304688, "learning_rate": 7.824074074074074e-06, "loss": 0.0233, "step": 6700 }, { "epoch": 28.0, "eval_accuracy": 0.8052139695031972, "eval_f1": 0.7480853178453498, "eval_loss": 1.5534404516220093, "eval_runtime": 20.3325, "eval_samples_per_second": 99.988, "eval_steps_per_second": 2.115, "step": 6720 }, { "epoch": 28.125, "grad_norm": 0.058636877685785294, "learning_rate": 7.05246913580247e-06, "loss": 0.0254, "step": 6750 }, { "epoch": 28.333333333333332, "grad_norm": 0.19175127148628235, "learning_rate": 6.280864197530864e-06, "loss": 0.0708, "step": 6800 }, { "epoch": 28.541666666666668, "grad_norm": 0.19785375893115997, "learning_rate": 5.5092592592592595e-06, "loss": 0.0254, "step": 6850 }, { "epoch": 28.75, "grad_norm": 0.17022226750850677, "learning_rate": 4.737654320987654e-06, "loss": 0.0217, "step": 6900 }, { "epoch": 28.958333333333332, "grad_norm": 0.47576045989990234, "learning_rate": 3.966049382716049e-06, "loss": 0.0221, "step": 6950 }, { "epoch": 29.0, "eval_accuracy": 0.8066896212493851, "eval_f1": 0.7499035664765309, "eval_loss": 1.5335384607315063, "eval_runtime": 20.3238, "eval_samples_per_second": 100.03, "eval_steps_per_second": 2.116, "step": 6960 }, { "epoch": 29.166666666666668, "grad_norm": 0.13686342537403107, "learning_rate": 3.1944444444444443e-06, "loss": 0.0268, "step": 7000 }, { "epoch": 29.375, "grad_norm": 0.1316099911928177, "learning_rate": 2.42283950617284e-06, "loss": 0.0349, "step": 7050 }, { "epoch": 29.583333333333332, "grad_norm": 0.0337589792907238, "learning_rate": 1.6512345679012345e-06, "loss": 0.0305, "step": 7100 }, { "epoch": 29.791666666666668, "grad_norm": 5.198480129241943, "learning_rate": 8.796296296296297e-07, "loss": 0.0271, "step": 7150 }, { "epoch": 30.0, "grad_norm": 0.15439973771572113, "learning_rate": 1.0802469135802469e-07, "loss": 0.0132, "step": 7200 }, { "epoch": 30.0, "eval_accuracy": 0.8081652729955731, "eval_f1": 0.7485061568915461, "eval_loss": 1.5384868383407593, "eval_runtime": 20.2515, "eval_samples_per_second": 100.388, "eval_steps_per_second": 2.123, "step": 7200 } ], "logging_steps": 50, "max_steps": 7200, "num_input_tokens_seen": 0, "num_train_epochs": 30, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0469261495692793e+20, "train_batch_size": 48, "trial_name": null, "trial_params": null }