{ "best_metric": 0.2197728157043457, "best_model_checkpoint": "output_pipe/1/origin/checkpoint-1600", "epoch": 4.0, "eval_steps": 200, "global_step": 3372, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11862396204033215, "grad_norm": 9.243636131286621, "learning_rate": 2.957555689343769e-05, "loss": 0.4501, "step": 100 }, { "epoch": 0.2372479240806643, "grad_norm": 6.825350761413574, "learning_rate": 2.8672486453943407e-05, "loss": 0.2984, "step": 200 }, { "epoch": 0.2372479240806643, "eval_accuracy": 0.8756115641215715, "eval_f1": 0.8753583742291109, "eval_loss": 0.2889566719532013, "eval_matthews_correlation": 0.7552199511937604, "eval_precision": 0.8793427094656048, "eval_recall": 0.8758851563831229, "eval_runtime": 1.416, "eval_samples_per_second": 4763.557, "eval_steps_per_second": 74.861, "step": 200 }, { "epoch": 0.35587188612099646, "grad_norm": 9.723295211791992, "learning_rate": 2.7769416014449128e-05, "loss": 0.2867, "step": 300 }, { "epoch": 0.4744958481613286, "grad_norm": 4.6286139488220215, "learning_rate": 2.686634557495485e-05, "loss": 0.2692, "step": 400 }, { "epoch": 0.4744958481613286, "eval_accuracy": 0.8923647146034099, "eval_f1": 0.8923344963363296, "eval_loss": 0.2463846057653427, "eval_matthews_correlation": 0.7855525209874886, "eval_precision": 0.8930692687442148, "eval_recall": 0.892483470662371, "eval_runtime": 1.4056, "eval_samples_per_second": 4798.74, "eval_steps_per_second": 75.414, "step": 400 }, { "epoch": 0.5931198102016607, "grad_norm": 6.366510391235352, "learning_rate": 2.5963275135460566e-05, "loss": 0.2648, "step": 500 }, { "epoch": 0.7117437722419929, "grad_norm": 6.894649505615234, "learning_rate": 2.5060204695966287e-05, "loss": 0.2594, "step": 600 }, { "epoch": 0.7117437722419929, "eval_accuracy": 0.9012601927353595, "eval_f1": 0.901260173202269, "eval_loss": 0.22498522698879242, "eval_matthews_correlation": 0.8025661484949196, "eval_precision": 0.9012812748493957, "eval_recall": 0.9012848736535926, "eval_runtime": 1.4052, "eval_samples_per_second": 4800.055, "eval_steps_per_second": 75.435, "step": 600 }, { "epoch": 0.830367734282325, "grad_norm": 16.520292282104492, "learning_rate": 2.4157134256472004e-05, "loss": 0.2517, "step": 700 }, { "epoch": 0.9489916963226572, "grad_norm": 3.2996578216552734, "learning_rate": 2.3254063816977725e-05, "loss": 0.2432, "step": 800 }, { "epoch": 0.9489916963226572, "eval_accuracy": 0.8917716827279466, "eval_f1": 0.8917528361445037, "eval_loss": 0.24542774260044098, "eval_matthews_correlation": 0.7836117160608459, "eval_precision": 0.8918855979873312, "eval_recall": 0.891726134298813, "eval_runtime": 1.4043, "eval_samples_per_second": 4803.154, "eval_steps_per_second": 75.483, "step": 800 }, { "epoch": 1.0676156583629894, "grad_norm": 6.839262962341309, "learning_rate": 2.2350993377483446e-05, "loss": 0.2125, "step": 900 }, { "epoch": 1.1862396204033214, "grad_norm": 10.704083442687988, "learning_rate": 2.1447922937989163e-05, "loss": 0.1561, "step": 1000 }, { "epoch": 1.1862396204033214, "eval_accuracy": 0.8956263899184581, "eval_f1": 0.8951017745713171, "eval_loss": 0.2612854242324829, "eval_matthews_correlation": 0.8007606389596266, "eval_precision": 0.904766892075258, "eval_recall": 0.8960412854047166, "eval_runtime": 1.4053, "eval_samples_per_second": 4799.797, "eval_steps_per_second": 75.43, "step": 1000 }, { "epoch": 1.3048635824436536, "grad_norm": 4.566791534423828, "learning_rate": 2.0544852498494884e-05, "loss": 0.1685, "step": 1100 }, { "epoch": 1.4234875444839858, "grad_norm": 3.4650704860687256, "learning_rate": 1.96417820590006e-05, "loss": 0.1607, "step": 1200 }, { "epoch": 1.4234875444839858, "eval_accuracy": 0.9071905114899926, "eval_f1": 0.9071251020390567, "eval_loss": 0.22077599167823792, "eval_matthews_correlation": 0.8161151585361717, "eval_precision": 0.9087529499988971, "eval_recall": 0.9073633915023229, "eval_runtime": 1.4041, "eval_samples_per_second": 4803.697, "eval_steps_per_second": 75.492, "step": 1200 }, { "epoch": 1.5421115065243178, "grad_norm": 9.395886421203613, "learning_rate": 1.8738711619506322e-05, "loss": 0.1751, "step": 1300 }, { "epoch": 1.66073546856465, "grad_norm": 5.750766277313232, "learning_rate": 1.7835641180012043e-05, "loss": 0.1617, "step": 1400 }, { "epoch": 1.66073546856465, "eval_accuracy": 0.9033358042994811, "eval_f1": 0.9032721716896848, "eval_loss": 0.23882386088371277, "eval_matthews_correlation": 0.8072800092044834, "eval_precision": 0.9040581552977713, "eval_recall": 0.9032222866414605, "eval_runtime": 1.4054, "eval_samples_per_second": 4799.439, "eval_steps_per_second": 75.425, "step": 1400 }, { "epoch": 1.7793594306049823, "grad_norm": 10.305908203125, "learning_rate": 1.693257074051776e-05, "loss": 0.1615, "step": 1500 }, { "epoch": 1.8979833926453145, "grad_norm": 6.007554531097412, "learning_rate": 1.602950030102348e-05, "loss": 0.1669, "step": 1600 }, { "epoch": 1.8979833926453145, "eval_accuracy": 0.9058561897702001, "eval_f1": 0.9057890959881881, "eval_loss": 0.2197728157043457, "eval_matthews_correlation": 0.8134577263257711, "eval_precision": 0.9074290868275333, "eval_recall": 0.9060298429301878, "eval_runtime": 1.4038, "eval_samples_per_second": 4804.94, "eval_steps_per_second": 75.511, "step": 1600 }, { "epoch": 2.0166073546856467, "grad_norm": 17.52599334716797, "learning_rate": 1.5126429861529199e-05, "loss": 0.1482, "step": 1700 }, { "epoch": 2.135231316725979, "grad_norm": 13.2647066116333, "learning_rate": 1.422335942203492e-05, "loss": 0.0629, "step": 1800 }, { "epoch": 2.135231316725979, "eval_accuracy": 0.900815418828762, "eval_f1": 0.9008021532090624, "eval_loss": 0.37351372838020325, "eval_matthews_correlation": 0.8016708933554149, "eval_precision": 0.9008918124334665, "eval_recall": 0.9007790888470252, "eval_runtime": 1.4037, "eval_samples_per_second": 4805.322, "eval_steps_per_second": 75.517, "step": 1800 }, { "epoch": 2.2538552787663106, "grad_norm": 5.28846549987793, "learning_rate": 1.3320288982540638e-05, "loss": 0.0622, "step": 1900 }, { "epoch": 2.372479240806643, "grad_norm": 11.060345649719238, "learning_rate": 1.2417218543046358e-05, "loss": 0.0748, "step": 2000 }, { "epoch": 2.372479240806643, "eval_accuracy": 0.9082283172720533, "eval_f1": 0.9082064942582599, "eval_loss": 0.342942476272583, "eval_matthews_correlation": 0.8172074539472458, "eval_precision": 0.908867392978981, "eval_recall": 0.9083402309983616, "eval_runtime": 1.425, "eval_samples_per_second": 4733.445, "eval_steps_per_second": 74.388, "step": 2000 }, { "epoch": 2.491103202846975, "grad_norm": 16.14167594909668, "learning_rate": 1.1514148103552077e-05, "loss": 0.0678, "step": 2100 }, { "epoch": 2.6097271648873073, "grad_norm": 14.60816478729248, "learning_rate": 1.0611077664057798e-05, "loss": 0.0651, "step": 2200 }, { "epoch": 2.6097271648873073, "eval_accuracy": 0.9015567086730912, "eval_f1": 0.9015537463069576, "eval_loss": 0.3432765305042267, "eval_matthews_correlation": 0.8031074926139152, "eval_precision": 0.9015537463069576, "eval_recall": 0.9015537463069576, "eval_runtime": 1.413, "eval_samples_per_second": 4773.585, "eval_steps_per_second": 75.019, "step": 2200 }, { "epoch": 2.7283511269276395, "grad_norm": 19.60405731201172, "learning_rate": 9.708007224563517e-06, "loss": 0.0573, "step": 2300 }, { "epoch": 2.8469750889679717, "grad_norm": 12.747634887695312, "learning_rate": 8.804936785069236e-06, "loss": 0.0587, "step": 2400 }, { "epoch": 2.8469750889679717, "eval_accuracy": 0.9012601927353595, "eval_f1": 0.9012397677542721, "eval_loss": 0.3955250680446625, "eval_matthews_correlation": 0.8026222319497477, "eval_precision": 0.9014138348009262, "eval_recall": 0.901208423433808, "eval_runtime": 1.4171, "eval_samples_per_second": 4759.731, "eval_steps_per_second": 74.801, "step": 2400 }, { "epoch": 2.9655990510083035, "grad_norm": 13.178675651550293, "learning_rate": 7.901866345574955e-06, "loss": 0.0569, "step": 2500 }, { "epoch": 3.0842230130486357, "grad_norm": 0.011187891475856304, "learning_rate": 6.998795906080675e-06, "loss": 0.0263, "step": 2600 }, { "epoch": 3.0842230130486357, "eval_accuracy": 0.9057079318013344, "eval_f1": 0.9057078799868117, "eval_loss": 0.525695264339447, "eval_matthews_correlation": 0.8114565321991912, "eval_precision": 0.9057254122582752, "eval_recall": 0.9057311199609897, "eval_runtime": 1.4153, "eval_samples_per_second": 4765.722, "eval_steps_per_second": 74.895, "step": 2600 }, { "epoch": 3.202846975088968, "grad_norm": 0.2136336714029312, "learning_rate": 6.104756170981336e-06, "loss": 0.011, "step": 2700 }, { "epoch": 3.3214709371293, "grad_norm": 33.0368766784668, "learning_rate": 5.201685731487056e-06, "loss": 0.018, "step": 2800 }, { "epoch": 3.3214709371293, "eval_accuracy": 0.9057079318013344, "eval_f1": 0.9056997050145418, "eval_loss": 0.6456906199455261, "eval_matthews_correlation": 0.8117977167650796, "eval_precision": 0.9060113235426336, "eval_recall": 0.905786424375302, "eval_runtime": 1.4245, "eval_samples_per_second": 4734.973, "eval_steps_per_second": 74.412, "step": 2800 }, { "epoch": 3.4400948991696323, "grad_norm": 0.03287828713655472, "learning_rate": 4.2986152919927755e-06, "loss": 0.0134, "step": 2900 }, { "epoch": 3.5587188612099645, "grad_norm": 0.05251970514655113, "learning_rate": 3.395544852498495e-06, "loss": 0.0197, "step": 3000 }, { "epoch": 3.5587188612099645, "eval_accuracy": 0.9027427724240178, "eval_f1": 0.9027319948057138, "eval_loss": 0.5632529258728027, "eval_matthews_correlation": 0.805927089741698, "eval_precision": 0.9030994533926766, "eval_recall": 0.9028276821717736, "eval_runtime": 1.4184, "eval_samples_per_second": 4755.316, "eval_steps_per_second": 74.731, "step": 3000 }, { "epoch": 3.6773428232502967, "grad_norm": 0.18964755535125732, "learning_rate": 2.4924744130042145e-06, "loss": 0.0103, "step": 3100 }, { "epoch": 3.795966785290629, "grad_norm": 2.074934244155884, "learning_rate": 1.5894039735099338e-06, "loss": 0.0067, "step": 3200 }, { "epoch": 3.795966785290629, "eval_accuracy": 0.9052631578947369, "eval_f1": 0.9052587514243154, "eval_loss": 0.6363104581832886, "eval_matthews_correlation": 0.8107917228123425, "eval_precision": 0.905463923256376, "eval_recall": 0.9053278109809421, "eval_runtime": 1.4184, "eval_samples_per_second": 4755.362, "eval_steps_per_second": 74.732, "step": 3200 }, { "epoch": 3.914590747330961, "grad_norm": 0.007920457050204277, "learning_rate": 6.863335340156533e-07, "loss": 0.0084, "step": 3300 }, { "epoch": 4.0, "step": 3372, "total_flos": 5866539036180480.0, "train_loss": 0.1323915239189302, "train_runtime": 204.5834, "train_samples_per_second": 1054.865, "train_steps_per_second": 16.482 } ], "logging_steps": 100, "max_steps": 3372, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 5866539036180480.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }