| { | |
| "best_metric": 0.18491357564926147, | |
| "best_model_checkpoint": "output_pipe/prom_300_all/origin/checkpoint-1400", | |
| "epoch": 4.0, | |
| "eval_steps": 200, | |
| "global_step": 2960, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 10.229140281677246, | |
| "learning_rate": 2.951546391752577e-05, | |
| "loss": 0.3957, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 4.366018772125244, | |
| "learning_rate": 2.8484536082474226e-05, | |
| "loss": 0.3029, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "eval_accuracy": 0.8190878378378378, | |
| "eval_f1": 0.8137850769429716, | |
| "eval_loss": 0.37689200043678284, | |
| "eval_matthews_correlation": 0.675083279534266, | |
| "eval_precision": 0.8579374628996808, | |
| "eval_recall": 0.8183080017768842, | |
| "eval_runtime": 1.6963, | |
| "eval_samples_per_second": 3490.009, | |
| "eval_steps_per_second": 54.826, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 12.47264575958252, | |
| "learning_rate": 2.745360824742268e-05, | |
| "loss": 0.2656, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 8.677423477172852, | |
| "learning_rate": 2.6422680412371135e-05, | |
| "loss": 0.2217, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "eval_accuracy": 0.9070945945945946, | |
| "eval_f1": 0.9068358241206858, | |
| "eval_loss": 0.23628243803977966, | |
| "eval_matthews_correlation": 0.8197057914004126, | |
| "eval_precision": 0.9123568337823056, | |
| "eval_recall": 0.9073641621822256, | |
| "eval_runtime": 1.6952, | |
| "eval_samples_per_second": 3492.293, | |
| "eval_steps_per_second": 54.862, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 10.468666076660156, | |
| "learning_rate": 2.5391752577319586e-05, | |
| "loss": 0.2133, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 6.7936506271362305, | |
| "learning_rate": 2.436082474226804e-05, | |
| "loss": 0.2092, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "eval_accuracy": 0.9152027027027027, | |
| "eval_f1": 0.9149447781009592, | |
| "eval_loss": 0.2045987993478775, | |
| "eval_matthews_correlation": 0.8364946316412692, | |
| "eval_precision": 0.921029377746935, | |
| "eval_recall": 0.9154836370974333, | |
| "eval_runtime": 1.7024, | |
| "eval_samples_per_second": 3477.472, | |
| "eval_steps_per_second": 54.629, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 7.481766700744629, | |
| "learning_rate": 2.3329896907216496e-05, | |
| "loss": 0.2038, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "grad_norm": 6.448531627655029, | |
| "learning_rate": 2.229896907216495e-05, | |
| "loss": 0.1432, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.0810810810810811, | |
| "eval_accuracy": 0.93125, | |
| "eval_f1": 0.9312120773759067, | |
| "eval_loss": 0.20802178978919983, | |
| "eval_matthews_correlation": 0.8630798400949948, | |
| "eval_precision": 0.9319203976686419, | |
| "eval_recall": 0.9311597775881583, | |
| "eval_runtime": 1.7038, | |
| "eval_samples_per_second": 3474.605, | |
| "eval_steps_per_second": 54.584, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.2162162162162162, | |
| "grad_norm": 5.360595703125, | |
| "learning_rate": 2.1268041237113405e-05, | |
| "loss": 0.1225, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "grad_norm": 12.399473190307617, | |
| "learning_rate": 2.0237113402061856e-05, | |
| "loss": 0.119, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.3513513513513513, | |
| "eval_accuracy": 0.9273648648648649, | |
| "eval_f1": 0.9273253739163184, | |
| "eval_loss": 0.1945222169160843, | |
| "eval_matthews_correlation": 0.8561116827116911, | |
| "eval_precision": 0.9286165491178344, | |
| "eval_recall": 0.9274958671007523, | |
| "eval_runtime": 1.7116, | |
| "eval_samples_per_second": 3458.664, | |
| "eval_steps_per_second": 54.334, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.4864864864864864, | |
| "grad_norm": 4.339570045471191, | |
| "learning_rate": 1.9206185567010307e-05, | |
| "loss": 0.1273, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6216216216216215, | |
| "grad_norm": 14.022846221923828, | |
| "learning_rate": 1.8175257731958762e-05, | |
| "loss": 0.1153, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.6216216216216215, | |
| "eval_accuracy": 0.9302364864864865, | |
| "eval_f1": 0.9302364685710435, | |
| "eval_loss": 0.18970273435115814, | |
| "eval_matthews_correlation": 0.8605061164956768, | |
| "eval_precision": 0.9302512167180906, | |
| "eval_recall": 0.9302548997854683, | |
| "eval_runtime": 1.7115, | |
| "eval_samples_per_second": 3458.921, | |
| "eval_steps_per_second": 54.338, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.7567567567567568, | |
| "grad_norm": 4.394283771514893, | |
| "learning_rate": 1.7144329896907217e-05, | |
| "loss": 0.1036, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.8918918918918919, | |
| "grad_norm": 5.471324920654297, | |
| "learning_rate": 1.611340206185567e-05, | |
| "loss": 0.1189, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 1.8918918918918919, | |
| "eval_accuracy": 0.9305743243243243, | |
| "eval_f1": 0.9305326495433668, | |
| "eval_loss": 0.18491357564926147, | |
| "eval_matthews_correlation": 0.8617933500441142, | |
| "eval_precision": 0.9313144616824476, | |
| "eval_recall": 0.9304792930448134, | |
| "eval_runtime": 1.7174, | |
| "eval_samples_per_second": 3447.102, | |
| "eval_steps_per_second": 54.152, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.027027027027027, | |
| "grad_norm": 5.195973873138428, | |
| "learning_rate": 1.5082474226804124e-05, | |
| "loss": 0.0993, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "grad_norm": 3.4343478679656982, | |
| "learning_rate": 1.4051546391752577e-05, | |
| "loss": 0.0416, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.1621621621621623, | |
| "eval_accuracy": 0.9320945945945946, | |
| "eval_f1": 0.9320657433152479, | |
| "eval_loss": 0.2865821123123169, | |
| "eval_matthews_correlation": 0.8646019805851967, | |
| "eval_precision": 0.9325841926158089, | |
| "eval_recall": 0.9320179733750436, | |
| "eval_runtime": 1.7142, | |
| "eval_samples_per_second": 3453.6, | |
| "eval_steps_per_second": 54.254, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.2972972972972974, | |
| "grad_norm": 1.7518272399902344, | |
| "learning_rate": 1.3020618556701032e-05, | |
| "loss": 0.0321, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.4324324324324325, | |
| "grad_norm": 0.545198380947113, | |
| "learning_rate": 1.1989690721649485e-05, | |
| "loss": 0.0472, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.4324324324324325, | |
| "eval_accuracy": 0.9346283783783784, | |
| "eval_f1": 0.9346277050025539, | |
| "eval_loss": 0.2627970576286316, | |
| "eval_matthews_correlation": 0.8693732402984062, | |
| "eval_precision": 0.9347087744082508, | |
| "eval_recall": 0.9346644670192129, | |
| "eval_runtime": 1.7138, | |
| "eval_samples_per_second": 3454.269, | |
| "eval_steps_per_second": 54.265, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.5675675675675675, | |
| "grad_norm": 8.60261344909668, | |
| "learning_rate": 1.0958762886597938e-05, | |
| "loss": 0.0463, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "grad_norm": 5.734014511108398, | |
| "learning_rate": 9.927835051546392e-06, | |
| "loss": 0.0426, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.7027027027027026, | |
| "eval_accuracy": 0.9390202702702702, | |
| "eval_f1": 0.9390032120412144, | |
| "eval_loss": 0.24185040593147278, | |
| "eval_matthews_correlation": 0.8782767377163032, | |
| "eval_precision": 0.9393146284000857, | |
| "eval_recall": 0.9389621800341589, | |
| "eval_runtime": 1.7176, | |
| "eval_samples_per_second": 3446.664, | |
| "eval_steps_per_second": 54.145, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 2.8378378378378377, | |
| "grad_norm": 16.915462493896484, | |
| "learning_rate": 8.896907216494845e-06, | |
| "loss": 0.0404, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.972972972972973, | |
| "grad_norm": 11.942590713500977, | |
| "learning_rate": 7.8659793814433e-06, | |
| "loss": 0.0449, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 2.972972972972973, | |
| "eval_accuracy": 0.9346283783783784, | |
| "eval_f1": 0.9346085835481779, | |
| "eval_loss": 0.26332417130470276, | |
| "eval_matthews_correlation": 0.8695174028730603, | |
| "eval_precision": 0.9349504933868377, | |
| "eval_recall": 0.9345669940571169, | |
| "eval_runtime": 1.7174, | |
| "eval_samples_per_second": 3447.038, | |
| "eval_steps_per_second": 54.151, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 3.108108108108108, | |
| "grad_norm": 0.1922147125005722, | |
| "learning_rate": 6.835051546391753e-06, | |
| "loss": 0.0183, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 3.2432432432432434, | |
| "grad_norm": 0.016464663669466972, | |
| "learning_rate": 5.804123711340207e-06, | |
| "loss": 0.0151, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.2432432432432434, | |
| "eval_accuracy": 0.935304054054054, | |
| "eval_f1": 0.9352742660769024, | |
| "eval_loss": 0.3918153643608093, | |
| "eval_matthews_correlation": 0.8710722575664533, | |
| "eval_precision": 0.9358489722798395, | |
| "eval_recall": 0.9352235098392906, | |
| "eval_runtime": 1.7154, | |
| "eval_samples_per_second": 3451.033, | |
| "eval_steps_per_second": 54.214, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.3783783783783785, | |
| "grad_norm": 0.8072592616081238, | |
| "learning_rate": 4.77319587628866e-06, | |
| "loss": 0.0086, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.5135135135135136, | |
| "grad_norm": 11.552366256713867, | |
| "learning_rate": 3.7422680412371135e-06, | |
| "loss": 0.013, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.5135135135135136, | |
| "eval_accuracy": 0.9363175675675676, | |
| "eval_f1": 0.936317042424479, | |
| "eval_loss": 0.35771170258522034, | |
| "eval_matthews_correlation": 0.872640550366574, | |
| "eval_precision": 0.9363156189326801, | |
| "eval_recall": 0.9363249314835842, | |
| "eval_runtime": 1.7187, | |
| "eval_samples_per_second": 3444.564, | |
| "eval_steps_per_second": 54.112, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 3.6486486486486487, | |
| "grad_norm": 0.023688938468694687, | |
| "learning_rate": 2.711340206185567e-06, | |
| "loss": 0.0097, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 3.7837837837837838, | |
| "grad_norm": 0.010523764416575432, | |
| "learning_rate": 1.6804123711340206e-06, | |
| "loss": 0.011, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.7837837837837838, | |
| "eval_accuracy": 0.9363175675675676, | |
| "eval_f1": 0.9363174785300759, | |
| "eval_loss": 0.3880373537540436, | |
| "eval_matthews_correlation": 0.8727004149732407, | |
| "eval_precision": 0.9363563085660243, | |
| "eval_recall": 0.9363441064925211, | |
| "eval_runtime": 1.7189, | |
| "eval_samples_per_second": 3444.029, | |
| "eval_steps_per_second": 54.104, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 3.918918918918919, | |
| "grad_norm": 0.0027509788051247597, | |
| "learning_rate": 6.494845360824742e-07, | |
| "loss": 0.0095, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 2960, | |
| "total_flos": 9268766323310592.0, | |
| "train_loss": 0.10634732993470655, | |
| "train_runtime": 216.3397, | |
| "train_samples_per_second": 875.586, | |
| "train_steps_per_second": 13.682 | |
| } | |
| ], | |
| "logging_steps": 100, | |
| "max_steps": 2960, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 200, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9268766323310592.0, | |
| "train_batch_size": 64, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |