{ "best_metric": 0.42440202832221985, "best_model_checkpoint": "output_pipe/prom_core_all/origin/checkpoint-1400", "epoch": 4.0, "eval_steps": 200, "global_step": 2960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13513513513513514, "grad_norm": 4.876448154449463, "learning_rate": 2.9505154639175257e-05, "loss": 0.5587, "step": 100 }, { "epoch": 0.2702702702702703, "grad_norm": 10.904062271118164, "learning_rate": 2.8474226804123712e-05, "loss": 0.4622, "step": 200 }, { "epoch": 0.2702702702702703, "eval_accuracy": 0.7640202702702703, "eval_f1": 0.7591361843228651, "eval_loss": 0.5017790198326111, "eval_matthews_correlation": 0.553929575887497, "eval_precision": 0.7895071132508147, "eval_recall": 0.7649658341702583, "eval_runtime": 1.2346, "eval_samples_per_second": 4794.903, "eval_steps_per_second": 75.325, "step": 200 }, { "epoch": 0.40540540540540543, "grad_norm": 18.69785499572754, "learning_rate": 2.7443298969072163e-05, "loss": 0.4702, "step": 300 }, { "epoch": 0.5405405405405406, "grad_norm": 9.501280784606934, "learning_rate": 2.6412371134020618e-05, "loss": 0.4564, "step": 400 }, { "epoch": 0.5405405405405406, "eval_accuracy": 0.7927364864864865, "eval_f1": 0.7925830768905244, "eval_loss": 0.44070252776145935, "eval_matthews_correlation": 0.5858516234344348, "eval_precision": 0.7932574236086611, "eval_recall": 0.792594574808426, "eval_runtime": 1.2306, "eval_samples_per_second": 4810.56, "eval_steps_per_second": 75.571, "step": 400 }, { "epoch": 0.6756756756756757, "grad_norm": 3.44538950920105, "learning_rate": 2.5381443298969073e-05, "loss": 0.4474, "step": 500 }, { "epoch": 0.8108108108108109, "grad_norm": 6.942621231079102, "learning_rate": 2.4350515463917527e-05, "loss": 0.4292, "step": 600 }, { "epoch": 0.8108108108108109, "eval_accuracy": 0.8099662162162162, "eval_f1": 0.8099642587283544, "eval_loss": 0.435224324464798, "eval_matthews_correlation": 0.6199452042163561, "eval_precision": 0.8099662162162162, "eval_recall": 0.8099789881317014, "eval_runtime": 1.2285, "eval_samples_per_second": 4818.922, "eval_steps_per_second": 75.703, "step": 600 }, { "epoch": 0.9459459459459459, "grad_norm": 9.121011734008789, "learning_rate": 2.3319587628865982e-05, "loss": 0.4311, "step": 700 }, { "epoch": 1.0810810810810811, "grad_norm": 7.060180187225342, "learning_rate": 2.2288659793814433e-05, "loss": 0.3734, "step": 800 }, { "epoch": 1.0810810810810811, "eval_accuracy": 0.8101351351351351, "eval_f1": 0.8100787544330121, "eval_loss": 0.44180622696876526, "eval_matthews_correlation": 0.6210645788205953, "eval_precision": 0.8107856684820405, "eval_recall": 0.8102791169148564, "eval_runtime": 1.2257, "eval_samples_per_second": 4829.832, "eval_steps_per_second": 75.874, "step": 800 }, { "epoch": 1.2162162162162162, "grad_norm": 7.786653995513916, "learning_rate": 2.1257731958762888e-05, "loss": 0.339, "step": 900 }, { "epoch": 1.3513513513513513, "grad_norm": 8.293940544128418, "learning_rate": 2.0226804123711342e-05, "loss": 0.3401, "step": 1000 }, { "epoch": 1.3513513513513513, "eval_accuracy": 0.8038851351351352, "eval_f1": 0.8034422824044964, "eval_loss": 0.4509773850440979, "eval_matthews_correlation": 0.6115825968659001, "eval_precision": 0.807367729138798, "eval_recall": 0.8042229529407885, "eval_runtime": 1.2303, "eval_samples_per_second": 4811.662, "eval_steps_per_second": 75.589, "step": 1000 }, { "epoch": 1.4864864864864864, "grad_norm": 6.160437107086182, "learning_rate": 1.9195876288659794e-05, "loss": 0.3452, "step": 1100 }, { "epoch": 1.6216216216216215, "grad_norm": 6.15946102142334, "learning_rate": 1.8164948453608248e-05, "loss": 0.3358, "step": 1200 }, { "epoch": 1.6216216216216215, "eval_accuracy": 0.8121621621621622, "eval_f1": 0.8121611116570571, "eval_loss": 0.43393442034721375, "eval_matthews_correlation": 0.6244475585739949, "eval_precision": 0.8122367519428947, "eval_recall": 0.8122108071700818, "eval_runtime": 1.2245, "eval_samples_per_second": 4834.71, "eval_steps_per_second": 75.951, "step": 1200 }, { "epoch": 1.7567567567567568, "grad_norm": 9.554533004760742, "learning_rate": 1.71340206185567e-05, "loss": 0.3411, "step": 1300 }, { "epoch": 1.8918918918918919, "grad_norm": 7.378434181213379, "learning_rate": 1.6103092783505154e-05, "loss": 0.3387, "step": 1400 }, { "epoch": 1.8918918918918919, "eval_accuracy": 0.8138513513513513, "eval_f1": 0.8138044071785355, "eval_loss": 0.42440202832221985, "eval_matthews_correlation": 0.6277545331414228, "eval_precision": 0.8139716022487501, "eval_recall": 0.8137829592367016, "eval_runtime": 1.2287, "eval_samples_per_second": 4818.127, "eval_steps_per_second": 75.69, "step": 1400 }, { "epoch": 2.027027027027027, "grad_norm": 7.731504440307617, "learning_rate": 1.5072164948453609e-05, "loss": 0.2999, "step": 1500 }, { "epoch": 2.1621621621621623, "grad_norm": 6.5144171714782715, "learning_rate": 1.4041237113402062e-05, "loss": 0.1876, "step": 1600 }, { "epoch": 2.1621621621621623, "eval_accuracy": 0.8087837837837838, "eval_f1": 0.8087488584474887, "eval_loss": 0.49598315358161926, "eval_matthews_correlation": 0.617579161176319, "eval_precision": 0.8088488091694348, "eval_recall": 0.8087303633652729, "eval_runtime": 1.2259, "eval_samples_per_second": 4829.218, "eval_steps_per_second": 75.864, "step": 1600 }, { "epoch": 2.2972972972972974, "grad_norm": 7.4392828941345215, "learning_rate": 1.3010309278350516e-05, "loss": 0.1799, "step": 1700 }, { "epoch": 2.4324324324324325, "grad_norm": 10.97509765625, "learning_rate": 1.197938144329897e-05, "loss": 0.1938, "step": 1800 }, { "epoch": 2.4324324324324325, "eval_accuracy": 0.810304054054054, "eval_f1": 0.8102811826035342, "eval_loss": 0.5354723334312439, "eval_matthews_correlation": 0.6205960823961605, "eval_precision": 0.8103291244096114, "eval_recall": 0.8102669610999084, "eval_runtime": 1.2215, "eval_samples_per_second": 4846.477, "eval_steps_per_second": 76.136, "step": 1800 }, { "epoch": 2.5675675675675675, "grad_norm": 11.830049514770508, "learning_rate": 1.0948453608247422e-05, "loss": 0.1906, "step": 1900 }, { "epoch": 2.7027027027027026, "grad_norm": 13.479290008544922, "learning_rate": 9.917525773195877e-06, "loss": 0.1834, "step": 2000 }, { "epoch": 2.7027027027027026, "eval_accuracy": 0.808277027027027, "eval_f1": 0.8081915116065802, "eval_loss": 0.6007954478263855, "eval_matthews_correlation": 0.616732464901666, "eval_precision": 0.8085577925672578, "eval_recall": 0.8081747912595467, "eval_runtime": 1.2278, "eval_samples_per_second": 4821.694, "eval_steps_per_second": 75.746, "step": 2000 }, { "epoch": 2.8378378378378377, "grad_norm": 12.72508716583252, "learning_rate": 8.88659793814433e-06, "loss": 0.1862, "step": 2100 }, { "epoch": 2.972972972972973, "grad_norm": 10.600125312805176, "learning_rate": 7.855670103092785e-06, "loss": 0.182, "step": 2200 }, { "epoch": 2.972972972972973, "eval_accuracy": 0.8099662162162162, "eval_f1": 0.8095247628533248, "eval_loss": 0.5430678129196167, "eval_matthews_correlation": 0.6218845218242811, "eval_precision": 0.8121990223274058, "eval_recall": 0.8096905586070646, "eval_runtime": 1.2235, "eval_samples_per_second": 4838.563, "eval_steps_per_second": 76.011, "step": 2200 }, { "epoch": 3.108108108108108, "grad_norm": 7.155954837799072, "learning_rate": 6.835051546391753e-06, "loss": 0.0831, "step": 2300 }, { "epoch": 3.2432432432432434, "grad_norm": 11.190349578857422, "learning_rate": 5.804123711340207e-06, "loss": 0.068, "step": 2400 }, { "epoch": 3.2432432432432434, "eval_accuracy": 0.8138513513513513, "eval_f1": 0.8137798524725756, "eval_loss": 0.8873556852340698, "eval_matthews_correlation": 0.6278424881602821, "eval_precision": 0.8140834677853808, "eval_recall": 0.8137591041632354, "eval_runtime": 1.2293, "eval_samples_per_second": 4815.887, "eval_steps_per_second": 75.655, "step": 2400 }, { "epoch": 3.3783783783783785, "grad_norm": 3.9781887531280518, "learning_rate": 4.77319587628866e-06, "loss": 0.0566, "step": 2500 }, { "epoch": 3.5135135135135136, "grad_norm": 4.182361602783203, "learning_rate": 3.752577319587629e-06, "loss": 0.0682, "step": 2600 }, { "epoch": 3.5135135135135136, "eval_accuracy": 0.8025337837837838, "eval_f1": 0.8023449121382681, "eval_loss": 1.001542568206787, "eval_matthews_correlation": 0.6069330253978419, "eval_precision": 0.8041693671082318, "eval_recall": 0.8027652823989849, "eval_runtime": 1.2241, "eval_samples_per_second": 4836.041, "eval_steps_per_second": 75.972, "step": 2600 }, { "epoch": 3.6486486486486487, "grad_norm": 19.823562622070312, "learning_rate": 2.7216494845360823e-06, "loss": 0.0576, "step": 2700 }, { "epoch": 3.7837837837837838, "grad_norm": 33.46441650390625, "learning_rate": 1.6907216494845361e-06, "loss": 0.0627, "step": 2800 }, { "epoch": 3.7837837837837838, "eval_accuracy": 0.808445945945946, "eval_f1": 0.8084343797693228, "eval_loss": 1.051027774810791, "eval_matthews_correlation": 0.6171943889514601, "eval_precision": 0.8086652678456729, "eval_recall": 0.8085291361187612, "eval_runtime": 1.2237, "eval_samples_per_second": 4837.925, "eval_steps_per_second": 76.001, "step": 2800 }, { "epoch": 3.918918918918919, "grad_norm": 14.81698989868164, "learning_rate": 6.597938144329897e-07, "loss": 0.0608, "step": 2900 }, { "epoch": 4.0, "step": 2960, "total_flos": 2703390177632256.0, "train_loss": 0.2621168656929119, "train_runtime": 176.8137, "train_samples_per_second": 1071.32, "train_steps_per_second": 16.741 } ], "logging_steps": 100, "max_steps": 2960, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2703390177632256.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }