{ "best_metric": 0.4873640835285187, "best_model_checkpoint": "output_pipe/4/origin/checkpoint-200", "epoch": 4.0, "eval_steps": 200, "global_step": 944, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.423728813559322, "grad_norm": 1.6054024696350098, "learning_rate": 2.8322147651006714e-05, "loss": 0.6523, "step": 100 }, { "epoch": 0.847457627118644, "grad_norm": 1.5572692155838013, "learning_rate": 2.4966442953020137e-05, "loss": 0.5213, "step": 200 }, { "epoch": 0.847457627118644, "eval_accuracy": 0.7546468401486989, "eval_f1": 0.7545886311627465, "eval_loss": 0.4873640835285187, "eval_matthews_correlation": 0.5136204005519472, "eval_precision": 0.757240412999415, "eval_recall": 0.756380707046741, "eval_runtime": 0.4764, "eval_samples_per_second": 3952.419, "eval_steps_per_second": 62.97, "step": 200 }, { "epoch": 1.271186440677966, "grad_norm": 2.912567377090454, "learning_rate": 2.1610738255033557e-05, "loss": 0.4243, "step": 300 }, { "epoch": 1.694915254237288, "grad_norm": 3.009249687194824, "learning_rate": 1.825503355704698e-05, "loss": 0.3561, "step": 400 }, { "epoch": 1.694915254237288, "eval_accuracy": 0.7567711099309612, "eval_f1": 0.7551546157602462, "eval_loss": 0.5137989521026611, "eval_matthews_correlation": 0.5326379449702427, "eval_precision": 0.7716787064732773, "eval_recall": 0.7610649764430736, "eval_runtime": 0.4747, "eval_samples_per_second": 3966.427, "eval_steps_per_second": 63.193, "step": 400 }, { "epoch": 2.1186440677966103, "grad_norm": 9.79773235321045, "learning_rate": 1.4899328859060403e-05, "loss": 0.2884, "step": 500 }, { "epoch": 2.542372881355932, "grad_norm": 6.196119785308838, "learning_rate": 1.1543624161073825e-05, "loss": 0.0992, "step": 600 }, { "epoch": 2.542372881355932, "eval_accuracy": 0.7673924588422729, "eval_f1": 0.7673871448846009, "eval_loss": 0.6910274624824524, "eval_matthews_correlation": 0.5376904599032052, "eval_precision": 0.768957383485309, "eval_recall": 0.7687331231852128, "eval_runtime": 0.4743, "eval_samples_per_second": 3970.373, "eval_steps_per_second": 63.256, "step": 600 }, { "epoch": 2.9661016949152543, "grad_norm": 5.558084964752197, "learning_rate": 8.18791946308725e-06, "loss": 0.0961, "step": 700 }, { "epoch": 3.389830508474576, "grad_norm": 4.1897196769714355, "learning_rate": 4.832214765100671e-06, "loss": 0.0399, "step": 800 }, { "epoch": 3.389830508474576, "eval_accuracy": 0.7652681890600106, "eval_f1": 0.7651775234741784, "eval_loss": 1.0188117027282715, "eval_matthews_correlation": 0.5355988651772904, "eval_precision": 0.7684172890513269, "eval_recall": 0.7671829983391519, "eval_runtime": 0.4754, "eval_samples_per_second": 3960.7, "eval_steps_per_second": 63.102, "step": 800 }, { "epoch": 3.8135593220338984, "grad_norm": 9.25539779663086, "learning_rate": 1.5100671140939598e-06, "loss": 0.0296, "step": 900 }, { "epoch": 4.0, "step": 944, "total_flos": 4056474731991360.0, "train_loss": 0.2665235818821495, "train_runtime": 83.9458, "train_samples_per_second": 717.797, "train_steps_per_second": 11.245 } ], "logging_steps": 100, "max_steps": 944, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4056474731991360.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }