| { | |
| "best_metric": 0.2197728157043457, | |
| "best_model_checkpoint": "output_pipe/1/origin/checkpoint-1600", | |
| "epoch": 4.0, | |
| "eval_steps": 200, | |
| "global_step": 3372, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11862396204033215, | |
| "grad_norm": 9.243636131286621, | |
| "learning_rate": 2.957555689343769e-05, | |
| "loss": 0.4501, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2372479240806643, | |
| "grad_norm": 6.825350761413574, | |
| "learning_rate": 2.8672486453943407e-05, | |
| "loss": 0.2984, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2372479240806643, | |
| "eval_accuracy": 0.8756115641215715, | |
| "eval_f1": 0.8753583742291109, | |
| "eval_loss": 0.2889566719532013, | |
| "eval_matthews_correlation": 0.7552199511937604, | |
| "eval_precision": 0.8793427094656048, | |
| "eval_recall": 0.8758851563831229, | |
| "eval_runtime": 1.416, | |
| "eval_samples_per_second": 4763.557, | |
| "eval_steps_per_second": 74.861, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.35587188612099646, | |
| "grad_norm": 9.723295211791992, | |
| "learning_rate": 2.7769416014449128e-05, | |
| "loss": 0.2867, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.4744958481613286, | |
| "grad_norm": 4.6286139488220215, | |
| "learning_rate": 2.686634557495485e-05, | |
| "loss": 0.2692, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.4744958481613286, | |
| "eval_accuracy": 0.8923647146034099, | |
| "eval_f1": 0.8923344963363296, | |
| "eval_loss": 0.2463846057653427, | |
| "eval_matthews_correlation": 0.7855525209874886, | |
| "eval_precision": 0.8930692687442148, | |
| "eval_recall": 0.892483470662371, | |
| "eval_runtime": 1.4056, | |
| "eval_samples_per_second": 4798.74, | |
| "eval_steps_per_second": 75.414, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5931198102016607, | |
| "grad_norm": 6.366510391235352, | |
| "learning_rate": 2.5963275135460566e-05, | |
| "loss": 0.2648, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.7117437722419929, | |
| "grad_norm": 6.894649505615234, | |
| "learning_rate": 2.5060204695966287e-05, | |
| "loss": 0.2594, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.7117437722419929, | |
| "eval_accuracy": 0.9012601927353595, | |
| "eval_f1": 0.901260173202269, | |
| "eval_loss": 0.22498522698879242, | |
| "eval_matthews_correlation": 0.8025661484949196, | |
| "eval_precision": 0.9012812748493957, | |
| "eval_recall": 0.9012848736535926, | |
| "eval_runtime": 1.4052, | |
| "eval_samples_per_second": 4800.055, | |
| "eval_steps_per_second": 75.435, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.830367734282325, | |
| "grad_norm": 16.520292282104492, | |
| "learning_rate": 2.4157134256472004e-05, | |
| "loss": 0.2517, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.9489916963226572, | |
| "grad_norm": 3.2996578216552734, | |
| "learning_rate": 2.3254063816977725e-05, | |
| "loss": 0.2432, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.9489916963226572, | |
| "eval_accuracy": 0.8917716827279466, | |
| "eval_f1": 0.8917528361445037, | |
| "eval_loss": 0.24542774260044098, | |
| "eval_matthews_correlation": 0.7836117160608459, | |
| "eval_precision": 0.8918855979873312, | |
| "eval_recall": 0.891726134298813, | |
| "eval_runtime": 1.4043, | |
| "eval_samples_per_second": 4803.154, | |
| "eval_steps_per_second": 75.483, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0676156583629894, | |
| "grad_norm": 6.839262962341309, | |
| "learning_rate": 2.2350993377483446e-05, | |
| "loss": 0.2125, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.1862396204033214, | |
| "grad_norm": 10.704083442687988, | |
| "learning_rate": 2.1447922937989163e-05, | |
| "loss": 0.1561, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.1862396204033214, | |
| "eval_accuracy": 0.8956263899184581, | |
| "eval_f1": 0.8951017745713171, | |
| "eval_loss": 0.2612854242324829, | |
| "eval_matthews_correlation": 0.8007606389596266, | |
| "eval_precision": 0.904766892075258, | |
| "eval_recall": 0.8960412854047166, | |
| "eval_runtime": 1.4053, | |
| "eval_samples_per_second": 4799.797, | |
| "eval_steps_per_second": 75.43, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.3048635824436536, | |
| "grad_norm": 4.566791534423828, | |
| "learning_rate": 2.0544852498494884e-05, | |
| "loss": 0.1685, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.4234875444839858, | |
| "grad_norm": 3.4650704860687256, | |
| "learning_rate": 1.96417820590006e-05, | |
| "loss": 0.1607, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.4234875444839858, | |
| "eval_accuracy": 0.9071905114899926, | |
| "eval_f1": 0.9071251020390567, | |
| "eval_loss": 0.22077599167823792, | |
| "eval_matthews_correlation": 0.8161151585361717, | |
| "eval_precision": 0.9087529499988971, | |
| "eval_recall": 0.9073633915023229, | |
| "eval_runtime": 1.4041, | |
| "eval_samples_per_second": 4803.697, | |
| "eval_steps_per_second": 75.492, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.5421115065243178, | |
| "grad_norm": 9.395886421203613, | |
| "learning_rate": 1.8738711619506322e-05, | |
| "loss": 0.1751, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.66073546856465, | |
| "grad_norm": 5.750766277313232, | |
| "learning_rate": 1.7835641180012043e-05, | |
| "loss": 0.1617, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.66073546856465, | |
| "eval_accuracy": 0.9033358042994811, | |
| "eval_f1": 0.9032721716896848, | |
| "eval_loss": 0.23882386088371277, | |
| "eval_matthews_correlation": 0.8072800092044834, | |
| "eval_precision": 0.9040581552977713, | |
| "eval_recall": 0.9032222866414605, | |
| "eval_runtime": 1.4054, | |
| "eval_samples_per_second": 4799.439, | |
| "eval_steps_per_second": 75.425, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.7793594306049823, | |
| "grad_norm": 10.305908203125, | |
| "learning_rate": 1.693257074051776e-05, | |
| "loss": 0.1615, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.8979833926453145, | |
| "grad_norm": 6.007554531097412, | |
| "learning_rate": 1.602950030102348e-05, | |
| "loss": 0.1669, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 1.8979833926453145, | |
| "eval_accuracy": 0.9058561897702001, | |
| "eval_f1": 0.9057890959881881, | |
| "eval_loss": 0.2197728157043457, | |
| "eval_matthews_correlation": 0.8134577263257711, | |
| "eval_precision": 0.9074290868275333, | |
| "eval_recall": 0.9060298429301878, | |
| "eval_runtime": 1.4038, | |
| "eval_samples_per_second": 4804.94, | |
| "eval_steps_per_second": 75.511, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.0166073546856467, | |
| "grad_norm": 17.52599334716797, | |
| "learning_rate": 1.5126429861529199e-05, | |
| "loss": 0.1482, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.135231316725979, | |
| "grad_norm": 13.2647066116333, | |
| "learning_rate": 1.422335942203492e-05, | |
| "loss": 0.0629, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.135231316725979, | |
| "eval_accuracy": 0.900815418828762, | |
| "eval_f1": 0.9008021532090624, | |
| "eval_loss": 0.37351372838020325, | |
| "eval_matthews_correlation": 0.8016708933554149, | |
| "eval_precision": 0.9008918124334665, | |
| "eval_recall": 0.9007790888470252, | |
| "eval_runtime": 1.4037, | |
| "eval_samples_per_second": 4805.322, | |
| "eval_steps_per_second": 75.517, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.2538552787663106, | |
| "grad_norm": 5.28846549987793, | |
| "learning_rate": 1.3320288982540638e-05, | |
| "loss": 0.0622, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.372479240806643, | |
| "grad_norm": 11.060345649719238, | |
| "learning_rate": 1.2417218543046358e-05, | |
| "loss": 0.0748, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.372479240806643, | |
| "eval_accuracy": 0.9082283172720533, | |
| "eval_f1": 0.9082064942582599, | |
| "eval_loss": 0.342942476272583, | |
| "eval_matthews_correlation": 0.8172074539472458, | |
| "eval_precision": 0.908867392978981, | |
| "eval_recall": 0.9083402309983616, | |
| "eval_runtime": 1.425, | |
| "eval_samples_per_second": 4733.445, | |
| "eval_steps_per_second": 74.388, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.491103202846975, | |
| "grad_norm": 16.14167594909668, | |
| "learning_rate": 1.1514148103552077e-05, | |
| "loss": 0.0678, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.6097271648873073, | |
| "grad_norm": 14.60816478729248, | |
| "learning_rate": 1.0611077664057798e-05, | |
| "loss": 0.0651, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.6097271648873073, | |
| "eval_accuracy": 0.9015567086730912, | |
| "eval_f1": 0.9015537463069576, | |
| "eval_loss": 0.3432765305042267, | |
| "eval_matthews_correlation": 0.8031074926139152, | |
| "eval_precision": 0.9015537463069576, | |
| "eval_recall": 0.9015537463069576, | |
| "eval_runtime": 1.413, | |
| "eval_samples_per_second": 4773.585, | |
| "eval_steps_per_second": 75.019, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.7283511269276395, | |
| "grad_norm": 19.60405731201172, | |
| "learning_rate": 9.708007224563517e-06, | |
| "loss": 0.0573, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 2.8469750889679717, | |
| "grad_norm": 12.747634887695312, | |
| "learning_rate": 8.804936785069236e-06, | |
| "loss": 0.0587, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.8469750889679717, | |
| "eval_accuracy": 0.9012601927353595, | |
| "eval_f1": 0.9012397677542721, | |
| "eval_loss": 0.3955250680446625, | |
| "eval_matthews_correlation": 0.8026222319497477, | |
| "eval_precision": 0.9014138348009262, | |
| "eval_recall": 0.901208423433808, | |
| "eval_runtime": 1.4171, | |
| "eval_samples_per_second": 4759.731, | |
| "eval_steps_per_second": 74.801, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.9655990510083035, | |
| "grad_norm": 13.178675651550293, | |
| "learning_rate": 7.901866345574955e-06, | |
| "loss": 0.0569, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.0842230130486357, | |
| "grad_norm": 0.011187891475856304, | |
| "learning_rate": 6.998795906080675e-06, | |
| "loss": 0.0263, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.0842230130486357, | |
| "eval_accuracy": 0.9057079318013344, | |
| "eval_f1": 0.9057078799868117, | |
| "eval_loss": 0.525695264339447, | |
| "eval_matthews_correlation": 0.8114565321991912, | |
| "eval_precision": 0.9057254122582752, | |
| "eval_recall": 0.9057311199609897, | |
| "eval_runtime": 1.4153, | |
| "eval_samples_per_second": 4765.722, | |
| "eval_steps_per_second": 74.895, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.202846975088968, | |
| "grad_norm": 0.2136336714029312, | |
| "learning_rate": 6.104756170981336e-06, | |
| "loss": 0.011, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.3214709371293, | |
| "grad_norm": 33.0368766784668, | |
| "learning_rate": 5.201685731487056e-06, | |
| "loss": 0.018, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.3214709371293, | |
| "eval_accuracy": 0.9057079318013344, | |
| "eval_f1": 0.9056997050145418, | |
| "eval_loss": 0.6456906199455261, | |
| "eval_matthews_correlation": 0.8117977167650796, | |
| "eval_precision": 0.9060113235426336, | |
| "eval_recall": 0.905786424375302, | |
| "eval_runtime": 1.4245, | |
| "eval_samples_per_second": 4734.973, | |
| "eval_steps_per_second": 74.412, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.4400948991696323, | |
| "grad_norm": 0.03287828713655472, | |
| "learning_rate": 4.2986152919927755e-06, | |
| "loss": 0.0134, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 3.5587188612099645, | |
| "grad_norm": 0.05251970514655113, | |
| "learning_rate": 3.395544852498495e-06, | |
| "loss": 0.0197, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.5587188612099645, | |
| "eval_accuracy": 0.9027427724240178, | |
| "eval_f1": 0.9027319948057138, | |
| "eval_loss": 0.5632529258728027, | |
| "eval_matthews_correlation": 0.805927089741698, | |
| "eval_precision": 0.9030994533926766, | |
| "eval_recall": 0.9028276821717736, | |
| "eval_runtime": 1.4184, | |
| "eval_samples_per_second": 4755.316, | |
| "eval_steps_per_second": 74.731, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 3.6773428232502967, | |
| "grad_norm": 0.18964755535125732, | |
| "learning_rate": 2.4924744130042145e-06, | |
| "loss": 0.0103, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 3.795966785290629, | |
| "grad_norm": 2.074934244155884, | |
| "learning_rate": 1.5894039735099338e-06, | |
| "loss": 0.0067, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.795966785290629, | |
| "eval_accuracy": 0.9052631578947369, | |
| "eval_f1": 0.9052587514243154, | |
| "eval_loss": 0.6363104581832886, | |
| "eval_matthews_correlation": 0.8107917228123425, | |
| "eval_precision": 0.905463923256376, | |
| "eval_recall": 0.9053278109809421, | |
| "eval_runtime": 1.4184, | |
| "eval_samples_per_second": 4755.362, | |
| "eval_steps_per_second": 74.732, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 3.914590747330961, | |
| "grad_norm": 0.007920457050204277, | |
| "learning_rate": 6.863335340156533e-07, | |
| "loss": 0.0084, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 3372, | |
| "total_flos": 5866539036180480.0, | |
| "train_loss": 0.1323915239189302, | |
| "train_runtime": 204.5834, | |
| "train_samples_per_second": 1054.865, | |
| "train_steps_per_second": 16.482 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 3372, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 5866539036180480.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |