Chenghao-Qiu's picture
Upload folder using huggingface_hub
284cd41 verified
{
"best_metric": 0.18491357564926147,
"best_model_checkpoint": "output_pipe/prom_300_all/origin/checkpoint-1400",
"epoch": 4.0,
"eval_steps": 200,
"global_step": 2960,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.13513513513513514,
"grad_norm": 10.229140281677246,
"learning_rate": 2.951546391752577e-05,
"loss": 0.3957,
"step": 100
},
{
"epoch": 0.2702702702702703,
"grad_norm": 4.366018772125244,
"learning_rate": 2.8484536082474226e-05,
"loss": 0.3029,
"step": 200
},
{
"epoch": 0.2702702702702703,
"eval_accuracy": 0.8190878378378378,
"eval_f1": 0.8137850769429716,
"eval_loss": 0.37689200043678284,
"eval_matthews_correlation": 0.675083279534266,
"eval_precision": 0.8579374628996808,
"eval_recall": 0.8183080017768842,
"eval_runtime": 1.6963,
"eval_samples_per_second": 3490.009,
"eval_steps_per_second": 54.826,
"step": 200
},
{
"epoch": 0.40540540540540543,
"grad_norm": 12.47264575958252,
"learning_rate": 2.745360824742268e-05,
"loss": 0.2656,
"step": 300
},
{
"epoch": 0.5405405405405406,
"grad_norm": 8.677423477172852,
"learning_rate": 2.6422680412371135e-05,
"loss": 0.2217,
"step": 400
},
{
"epoch": 0.5405405405405406,
"eval_accuracy": 0.9070945945945946,
"eval_f1": 0.9068358241206858,
"eval_loss": 0.23628243803977966,
"eval_matthews_correlation": 0.8197057914004126,
"eval_precision": 0.9123568337823056,
"eval_recall": 0.9073641621822256,
"eval_runtime": 1.6952,
"eval_samples_per_second": 3492.293,
"eval_steps_per_second": 54.862,
"step": 400
},
{
"epoch": 0.6756756756756757,
"grad_norm": 10.468666076660156,
"learning_rate": 2.5391752577319586e-05,
"loss": 0.2133,
"step": 500
},
{
"epoch": 0.8108108108108109,
"grad_norm": 6.7936506271362305,
"learning_rate": 2.436082474226804e-05,
"loss": 0.2092,
"step": 600
},
{
"epoch": 0.8108108108108109,
"eval_accuracy": 0.9152027027027027,
"eval_f1": 0.9149447781009592,
"eval_loss": 0.2045987993478775,
"eval_matthews_correlation": 0.8364946316412692,
"eval_precision": 0.921029377746935,
"eval_recall": 0.9154836370974333,
"eval_runtime": 1.7024,
"eval_samples_per_second": 3477.472,
"eval_steps_per_second": 54.629,
"step": 600
},
{
"epoch": 0.9459459459459459,
"grad_norm": 7.481766700744629,
"learning_rate": 2.3329896907216496e-05,
"loss": 0.2038,
"step": 700
},
{
"epoch": 1.0810810810810811,
"grad_norm": 6.448531627655029,
"learning_rate": 2.229896907216495e-05,
"loss": 0.1432,
"step": 800
},
{
"epoch": 1.0810810810810811,
"eval_accuracy": 0.93125,
"eval_f1": 0.9312120773759067,
"eval_loss": 0.20802178978919983,
"eval_matthews_correlation": 0.8630798400949948,
"eval_precision": 0.9319203976686419,
"eval_recall": 0.9311597775881583,
"eval_runtime": 1.7038,
"eval_samples_per_second": 3474.605,
"eval_steps_per_second": 54.584,
"step": 800
},
{
"epoch": 1.2162162162162162,
"grad_norm": 5.360595703125,
"learning_rate": 2.1268041237113405e-05,
"loss": 0.1225,
"step": 900
},
{
"epoch": 1.3513513513513513,
"grad_norm": 12.399473190307617,
"learning_rate": 2.0237113402061856e-05,
"loss": 0.119,
"step": 1000
},
{
"epoch": 1.3513513513513513,
"eval_accuracy": 0.9273648648648649,
"eval_f1": 0.9273253739163184,
"eval_loss": 0.1945222169160843,
"eval_matthews_correlation": 0.8561116827116911,
"eval_precision": 0.9286165491178344,
"eval_recall": 0.9274958671007523,
"eval_runtime": 1.7116,
"eval_samples_per_second": 3458.664,
"eval_steps_per_second": 54.334,
"step": 1000
},
{
"epoch": 1.4864864864864864,
"grad_norm": 4.339570045471191,
"learning_rate": 1.9206185567010307e-05,
"loss": 0.1273,
"step": 1100
},
{
"epoch": 1.6216216216216215,
"grad_norm": 14.022846221923828,
"learning_rate": 1.8175257731958762e-05,
"loss": 0.1153,
"step": 1200
},
{
"epoch": 1.6216216216216215,
"eval_accuracy": 0.9302364864864865,
"eval_f1": 0.9302364685710435,
"eval_loss": 0.18970273435115814,
"eval_matthews_correlation": 0.8605061164956768,
"eval_precision": 0.9302512167180906,
"eval_recall": 0.9302548997854683,
"eval_runtime": 1.7115,
"eval_samples_per_second": 3458.921,
"eval_steps_per_second": 54.338,
"step": 1200
},
{
"epoch": 1.7567567567567568,
"grad_norm": 4.394283771514893,
"learning_rate": 1.7144329896907217e-05,
"loss": 0.1036,
"step": 1300
},
{
"epoch": 1.8918918918918919,
"grad_norm": 5.471324920654297,
"learning_rate": 1.611340206185567e-05,
"loss": 0.1189,
"step": 1400
},
{
"epoch": 1.8918918918918919,
"eval_accuracy": 0.9305743243243243,
"eval_f1": 0.9305326495433668,
"eval_loss": 0.18491357564926147,
"eval_matthews_correlation": 0.8617933500441142,
"eval_precision": 0.9313144616824476,
"eval_recall": 0.9304792930448134,
"eval_runtime": 1.7174,
"eval_samples_per_second": 3447.102,
"eval_steps_per_second": 54.152,
"step": 1400
},
{
"epoch": 2.027027027027027,
"grad_norm": 5.195973873138428,
"learning_rate": 1.5082474226804124e-05,
"loss": 0.0993,
"step": 1500
},
{
"epoch": 2.1621621621621623,
"grad_norm": 3.4343478679656982,
"learning_rate": 1.4051546391752577e-05,
"loss": 0.0416,
"step": 1600
},
{
"epoch": 2.1621621621621623,
"eval_accuracy": 0.9320945945945946,
"eval_f1": 0.9320657433152479,
"eval_loss": 0.2865821123123169,
"eval_matthews_correlation": 0.8646019805851967,
"eval_precision": 0.9325841926158089,
"eval_recall": 0.9320179733750436,
"eval_runtime": 1.7142,
"eval_samples_per_second": 3453.6,
"eval_steps_per_second": 54.254,
"step": 1600
},
{
"epoch": 2.2972972972972974,
"grad_norm": 1.7518272399902344,
"learning_rate": 1.3020618556701032e-05,
"loss": 0.0321,
"step": 1700
},
{
"epoch": 2.4324324324324325,
"grad_norm": 0.545198380947113,
"learning_rate": 1.1989690721649485e-05,
"loss": 0.0472,
"step": 1800
},
{
"epoch": 2.4324324324324325,
"eval_accuracy": 0.9346283783783784,
"eval_f1": 0.9346277050025539,
"eval_loss": 0.2627970576286316,
"eval_matthews_correlation": 0.8693732402984062,
"eval_precision": 0.9347087744082508,
"eval_recall": 0.9346644670192129,
"eval_runtime": 1.7138,
"eval_samples_per_second": 3454.269,
"eval_steps_per_second": 54.265,
"step": 1800
},
{
"epoch": 2.5675675675675675,
"grad_norm": 8.60261344909668,
"learning_rate": 1.0958762886597938e-05,
"loss": 0.0463,
"step": 1900
},
{
"epoch": 2.7027027027027026,
"grad_norm": 5.734014511108398,
"learning_rate": 9.927835051546392e-06,
"loss": 0.0426,
"step": 2000
},
{
"epoch": 2.7027027027027026,
"eval_accuracy": 0.9390202702702702,
"eval_f1": 0.9390032120412144,
"eval_loss": 0.24185040593147278,
"eval_matthews_correlation": 0.8782767377163032,
"eval_precision": 0.9393146284000857,
"eval_recall": 0.9389621800341589,
"eval_runtime": 1.7176,
"eval_samples_per_second": 3446.664,
"eval_steps_per_second": 54.145,
"step": 2000
},
{
"epoch": 2.8378378378378377,
"grad_norm": 16.915462493896484,
"learning_rate": 8.896907216494845e-06,
"loss": 0.0404,
"step": 2100
},
{
"epoch": 2.972972972972973,
"grad_norm": 11.942590713500977,
"learning_rate": 7.8659793814433e-06,
"loss": 0.0449,
"step": 2200
},
{
"epoch": 2.972972972972973,
"eval_accuracy": 0.9346283783783784,
"eval_f1": 0.9346085835481779,
"eval_loss": 0.26332417130470276,
"eval_matthews_correlation": 0.8695174028730603,
"eval_precision": 0.9349504933868377,
"eval_recall": 0.9345669940571169,
"eval_runtime": 1.7174,
"eval_samples_per_second": 3447.038,
"eval_steps_per_second": 54.151,
"step": 2200
},
{
"epoch": 3.108108108108108,
"grad_norm": 0.1922147125005722,
"learning_rate": 6.835051546391753e-06,
"loss": 0.0183,
"step": 2300
},
{
"epoch": 3.2432432432432434,
"grad_norm": 0.016464663669466972,
"learning_rate": 5.804123711340207e-06,
"loss": 0.0151,
"step": 2400
},
{
"epoch": 3.2432432432432434,
"eval_accuracy": 0.935304054054054,
"eval_f1": 0.9352742660769024,
"eval_loss": 0.3918153643608093,
"eval_matthews_correlation": 0.8710722575664533,
"eval_precision": 0.9358489722798395,
"eval_recall": 0.9352235098392906,
"eval_runtime": 1.7154,
"eval_samples_per_second": 3451.033,
"eval_steps_per_second": 54.214,
"step": 2400
},
{
"epoch": 3.3783783783783785,
"grad_norm": 0.8072592616081238,
"learning_rate": 4.77319587628866e-06,
"loss": 0.0086,
"step": 2500
},
{
"epoch": 3.5135135135135136,
"grad_norm": 11.552366256713867,
"learning_rate": 3.7422680412371135e-06,
"loss": 0.013,
"step": 2600
},
{
"epoch": 3.5135135135135136,
"eval_accuracy": 0.9363175675675676,
"eval_f1": 0.936317042424479,
"eval_loss": 0.35771170258522034,
"eval_matthews_correlation": 0.872640550366574,
"eval_precision": 0.9363156189326801,
"eval_recall": 0.9363249314835842,
"eval_runtime": 1.7187,
"eval_samples_per_second": 3444.564,
"eval_steps_per_second": 54.112,
"step": 2600
},
{
"epoch": 3.6486486486486487,
"grad_norm": 0.023688938468694687,
"learning_rate": 2.711340206185567e-06,
"loss": 0.0097,
"step": 2700
},
{
"epoch": 3.7837837837837838,
"grad_norm": 0.010523764416575432,
"learning_rate": 1.6804123711340206e-06,
"loss": 0.011,
"step": 2800
},
{
"epoch": 3.7837837837837838,
"eval_accuracy": 0.9363175675675676,
"eval_f1": 0.9363174785300759,
"eval_loss": 0.3880373537540436,
"eval_matthews_correlation": 0.8727004149732407,
"eval_precision": 0.9363563085660243,
"eval_recall": 0.9363441064925211,
"eval_runtime": 1.7189,
"eval_samples_per_second": 3444.029,
"eval_steps_per_second": 54.104,
"step": 2800
},
{
"epoch": 3.918918918918919,
"grad_norm": 0.0027509788051247597,
"learning_rate": 6.494845360824742e-07,
"loss": 0.0095,
"step": 2900
},
{
"epoch": 4.0,
"step": 2960,
"total_flos": 9268766323310592.0,
"train_loss": 0.10634732993470655,
"train_runtime": 216.3397,
"train_samples_per_second": 875.586,
"train_steps_per_second": 13.682
}
],
"logging_steps": 100,
"max_steps": 2960,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 9268766323310592.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}