{ "best_metric": 0.39341670274734497, "best_model_checkpoint": "output_pipe/prom_core_all/origin/checkpoint-1400", "epoch": 4.0, "eval_steps": 200, "global_step": 2960, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.13513513513513514, "grad_norm": 4.684391498565674, "learning_rate": 2.951546391752577e-05, "loss": 0.5941, "step": 100 }, { "epoch": 0.2702702702702703, "grad_norm": 4.193660259246826, "learning_rate": 2.8484536082474226e-05, "loss": 0.4474, "step": 200 }, { "epoch": 0.2702702702702703, "eval_accuracy": 0.7947635135135135, "eval_f1": 0.7936182509778013, "eval_loss": 0.44701528549194336, "eval_matthews_correlation": 0.5977995266975996, "eval_precision": 0.8025695906432748, "eval_recall": 0.7952744469132733, "eval_runtime": 0.8529, "eval_samples_per_second": 6941.23, "eval_steps_per_second": 109.043, "step": 200 }, { "epoch": 0.40540540540540543, "grad_norm": 4.3338518142700195, "learning_rate": 2.745360824742268e-05, "loss": 0.4384, "step": 300 }, { "epoch": 0.5405405405405406, "grad_norm": 2.3751919269561768, "learning_rate": 2.6422680412371135e-05, "loss": 0.4182, "step": 400 }, { "epoch": 0.5405405405405406, "eval_accuracy": 0.8077702702702703, "eval_f1": 0.8077296945997667, "eval_loss": 0.4295107126235962, "eval_matthews_correlation": 0.6155656382813084, "eval_precision": 0.8078553532095232, "eval_recall": 0.8077103021616007, "eval_runtime": 0.7841, "eval_samples_per_second": 7550.495, "eval_steps_per_second": 118.614, "step": 400 }, { "epoch": 0.6756756756756757, "grad_norm": 2.5912091732025146, "learning_rate": 2.5391752577319586e-05, "loss": 0.4153, "step": 500 }, { "epoch": 0.8108108108108109, "grad_norm": 2.8132834434509277, "learning_rate": 2.436082474226804e-05, "loss": 0.3994, "step": 600 }, { "epoch": 0.8108108108108109, "eval_accuracy": 0.8109797297297298, "eval_f1": 0.8104963186342169, "eval_loss": 0.4092378318309784, "eval_matthews_correlation": 0.6241638927167956, "eval_precision": 0.8134812050856828, "eval_recall": 0.810688933380313, "eval_runtime": 0.7912, "eval_samples_per_second": 7482.649, "eval_steps_per_second": 117.548, "step": 600 }, { "epoch": 0.9459459459459459, "grad_norm": 3.8523988723754883, "learning_rate": 2.3329896907216496e-05, "loss": 0.4037, "step": 700 }, { "epoch": 1.0810810810810811, "grad_norm": 3.936718225479126, "learning_rate": 2.229896907216495e-05, "loss": 0.3666, "step": 800 }, { "epoch": 1.0810810810810811, "eval_accuracy": 0.8177364864864864, "eval_f1": 0.8174029899785866, "eval_loss": 0.431902676820755, "eval_matthews_correlation": 0.6369721249100475, "eval_precision": 0.819479365887341, "eval_recall": 0.8174958473339216, "eval_runtime": 0.7965, "eval_samples_per_second": 7432.068, "eval_steps_per_second": 116.754, "step": 800 }, { "epoch": 1.2162162162162162, "grad_norm": 3.325840473175049, "learning_rate": 2.1268041237113405e-05, "loss": 0.3383, "step": 900 }, { "epoch": 1.3513513513513513, "grad_norm": 3.3491995334625244, "learning_rate": 2.0237113402061856e-05, "loss": 0.3313, "step": 1000 }, { "epoch": 1.3513513513513513, "eval_accuracy": 0.816722972972973, "eval_f1": 0.816423767977614, "eval_loss": 0.4097561538219452, "eval_matthews_correlation": 0.636440700980926, "eval_precision": 0.8194294997743541, "eval_recall": 0.8170157782478027, "eval_runtime": 0.7916, "eval_samples_per_second": 7478.063, "eval_steps_per_second": 117.476, "step": 1000 }, { "epoch": 1.4864864864864864, "grad_norm": 2.8750483989715576, "learning_rate": 1.9206185567010307e-05, "loss": 0.3422, "step": 1100 }, { "epoch": 1.6216216216216215, "grad_norm": 2.9016213417053223, "learning_rate": 1.8175257731958762e-05, "loss": 0.3342, "step": 1200 }, { "epoch": 1.6216216216216215, "eval_accuracy": 0.8226351351351351, "eval_f1": 0.8226311673334548, "eval_loss": 0.3969063460826874, "eval_matthews_correlation": 0.6452670293936331, "eval_precision": 0.8226283593061887, "eval_recall": 0.8226386701698242, "eval_runtime": 0.8057, "eval_samples_per_second": 7347.434, "eval_steps_per_second": 115.424, "step": 1200 }, { "epoch": 1.7567567567567568, "grad_norm": 3.7780559062957764, "learning_rate": 1.7144329896907217e-05, "loss": 0.3388, "step": 1300 }, { "epoch": 1.8918918918918919, "grad_norm": 2.481076955795288, "learning_rate": 1.611340206185567e-05, "loss": 0.3368, "step": 1400 }, { "epoch": 1.8918918918918919, "eval_accuracy": 0.8263513513513514, "eval_f1": 0.8261155586825131, "eval_loss": 0.39341670274734497, "eval_matthews_correlation": 0.6537637243776512, "eval_precision": 0.8276163526274195, "eval_recall": 0.8261490184207965, "eval_runtime": 0.7883, "eval_samples_per_second": 7509.461, "eval_steps_per_second": 117.97, "step": 1400 }, { "epoch": 2.027027027027027, "grad_norm": 3.455223560333252, "learning_rate": 1.5082474226804124e-05, "loss": 0.3164, "step": 1500 }, { "epoch": 2.1621621621621623, "grad_norm": 3.386279344558716, "learning_rate": 1.4051546391752577e-05, "loss": 0.2593, "step": 1600 }, { "epoch": 2.1621621621621623, "eval_accuracy": 0.8304054054054054, "eval_f1": 0.8303626359412428, "eval_loss": 0.4071274697780609, "eval_matthews_correlation": 0.6608739564809485, "eval_precision": 0.830536290932763, "eval_recall": 0.8303376953876045, "eval_runtime": 0.8091, "eval_samples_per_second": 7316.982, "eval_steps_per_second": 114.946, "step": 1600 }, { "epoch": 2.2972972972972974, "grad_norm": 3.8210701942443848, "learning_rate": 1.3020618556701032e-05, "loss": 0.2527, "step": 1700 }, { "epoch": 2.4324324324324325, "grad_norm": 3.2067503929138184, "learning_rate": 1.1989690721649485e-05, "loss": 0.2554, "step": 1800 }, { "epoch": 2.4324324324324325, "eval_accuracy": 0.8298986486486486, "eval_f1": 0.829796540365378, "eval_loss": 0.4566541910171509, "eval_matthews_correlation": 0.6611821554462906, "eval_precision": 0.8310928452755812, "eval_recall": 0.8300900705938966, "eval_runtime": 0.7864, "eval_samples_per_second": 7528.262, "eval_steps_per_second": 118.265, "step": 1800 }, { "epoch": 2.5675675675675675, "grad_norm": 3.9370391368865967, "learning_rate": 1.0958762886597938e-05, "loss": 0.261, "step": 1900 }, { "epoch": 2.7027027027027026, "grad_norm": 3.552394151687622, "learning_rate": 9.927835051546392e-06, "loss": 0.2585, "step": 2000 }, { "epoch": 2.7027027027027026, "eval_accuracy": 0.8339527027027027, "eval_f1": 0.8339117142535776, "eval_loss": 0.4246219992637634, "eval_matthews_correlation": 0.6679674617021183, "eval_precision": 0.8340812670080964, "eval_recall": 0.8338862231700334, "eval_runtime": 0.8169, "eval_samples_per_second": 7247.296, "eval_steps_per_second": 113.851, "step": 2000 }, { "epoch": 2.8378378378378377, "grad_norm": 5.959517002105713, "learning_rate": 8.896907216494845e-06, "loss": 0.2653, "step": 2100 }, { "epoch": 2.972972972972973, "grad_norm": 3.1301262378692627, "learning_rate": 7.8659793814433e-06, "loss": 0.2536, "step": 2200 }, { "epoch": 2.972972972972973, "eval_accuracy": 0.833277027027027, "eval_f1": 0.8332705141564862, "eval_loss": 0.4284779727458954, "eval_matthews_correlation": 0.6665410758687345, "eval_precision": 0.8332698342113638, "eval_recall": 0.8332712416588567, "eval_runtime": 0.7812, "eval_samples_per_second": 7578.458, "eval_steps_per_second": 119.053, "step": 2200 }, { "epoch": 3.108108108108108, "grad_norm": 5.621594429016113, "learning_rate": 6.835051546391753e-06, "loss": 0.2201, "step": 2300 }, { "epoch": 3.2432432432432434, "grad_norm": 4.273494720458984, "learning_rate": 5.804123711340207e-06, "loss": 0.1901, "step": 2400 }, { "epoch": 3.2432432432432434, "eval_accuracy": 0.8300675675675676, "eval_f1": 0.8300619621880789, "eval_loss": 0.4603254497051239, "eval_matthews_correlation": 0.6603695377770062, "eval_precision": 0.830230907439166, "eval_recall": 0.8301386367841352, "eval_runtime": 0.7952, "eval_samples_per_second": 7444.926, "eval_steps_per_second": 116.956, "step": 2400 }, { "epoch": 3.3783783783783785, "grad_norm": 2.421823263168335, "learning_rate": 4.77319587628866e-06, "loss": 0.1954, "step": 2500 }, { "epoch": 3.5135135135135136, "grad_norm": 3.237088918685913, "learning_rate": 3.752577319587629e-06, "loss": 0.1966, "step": 2600 }, { "epoch": 3.5135135135135136, "eval_accuracy": 0.8288851351351352, "eval_f1": 0.8288182721571277, "eval_loss": 0.47939351201057434, "eval_matthews_correlation": 0.6587744646373598, "eval_precision": 0.8297286638704058, "eval_recall": 0.8290461543167582, "eval_runtime": 0.8074, "eval_samples_per_second": 7331.818, "eval_steps_per_second": 115.179, "step": 2600 }, { "epoch": 3.6486486486486487, "grad_norm": 4.97433614730835, "learning_rate": 2.7216494845360823e-06, "loss": 0.1981, "step": 2700 }, { "epoch": 3.7837837837837838, "grad_norm": 4.287285327911377, "learning_rate": 1.6907216494845361e-06, "loss": 0.1921, "step": 2800 }, { "epoch": 3.7837837837837838, "eval_accuracy": 0.8293918918918919, "eval_f1": 0.8293918724196347, "eval_loss": 0.47961899638175964, "eval_matthews_correlation": 0.6588594780303657, "eval_precision": 0.8294312430581388, "eval_recall": 0.8294282349790937, "eval_runtime": 0.7786, "eval_samples_per_second": 7603.044, "eval_steps_per_second": 119.44, "step": 2800 }, { "epoch": 3.918918918918919, "grad_norm": 4.432475566864014, "learning_rate": 6.597938144329897e-07, "loss": 0.1968, "step": 2900 }, { "epoch": 4.0, "step": 2960, "total_flos": 2848522495741824.0, "train_loss": 0.308653936192796, "train_runtime": 127.8172, "train_samples_per_second": 1481.991, "train_steps_per_second": 23.158 } ], "logging_steps": 100, "max_steps": 2960, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2848522495741824.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }