| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.001779061865097297, | |
| "eval_steps": 500, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 8.895309325486485e-05, | |
| "grad_norm": 0.1897999793291092, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.3044, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.0001779061865097297, | |
| "grad_norm": 0.23376008868217468, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.3592, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.0002668592797645945, | |
| "grad_norm": 0.13603591918945312, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.084, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.0003558123730194594, | |
| "grad_norm": 0.34403547644615173, | |
| "learning_rate": 2.5e-05, | |
| "loss": 2.076, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.00044476546627432423, | |
| "grad_norm": 0.1530287265777588, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.7806, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.000533718559529189, | |
| "grad_norm": 0.344722718000412, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.5749, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.0006226716527840539, | |
| "grad_norm": 0.38181471824645996, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.5243, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.0007116247460389188, | |
| "grad_norm": 0.25272616744041443, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.4728, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.0008005778392937836, | |
| "grad_norm": 0.6237773299217224, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.3028, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.0008895309325486485, | |
| "grad_norm": 0.5120233297348022, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.0721, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0009784840258035134, | |
| "grad_norm": 0.6732835173606873, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.8071, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.001067437119058378, | |
| "grad_norm": 0.5018543004989624, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.8138, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.001156390212313243, | |
| "grad_norm": 0.24052944779396057, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.702, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.0012453433055681078, | |
| "grad_norm": 0.2696482837200165, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.6689, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.0013342963988229727, | |
| "grad_norm": 0.21222035586833954, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.7498, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.0014232494920778376, | |
| "grad_norm": 0.27624765038490295, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.6544, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.0015122025853327023, | |
| "grad_norm": 0.8518249988555908, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.6908, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.0016011556785875672, | |
| "grad_norm": 0.588943600654602, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.7507, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.001690108771842432, | |
| "grad_norm": 0.4197629690170288, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.8013, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.001779061865097297, | |
| "grad_norm": 0.48924073576927185, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.6292, | |
| "step": 500 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 500, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.087597314048e+16, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |