{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.001779061865097297, "eval_steps": 500, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 8.895309325486485e-05, "grad_norm": 0.1897999793291092, "learning_rate": 2.5e-05, "loss": 2.3044, "step": 25 }, { "epoch": 0.0001779061865097297, "grad_norm": 0.23376008868217468, "learning_rate": 2.5e-05, "loss": 2.3592, "step": 50 }, { "epoch": 0.0002668592797645945, "grad_norm": 0.13603591918945312, "learning_rate": 2.5e-05, "loss": 2.084, "step": 75 }, { "epoch": 0.0003558123730194594, "grad_norm": 0.34403547644615173, "learning_rate": 2.5e-05, "loss": 2.076, "step": 100 }, { "epoch": 0.00044476546627432423, "grad_norm": 0.1530287265777588, "learning_rate": 2.5e-05, "loss": 1.7806, "step": 125 }, { "epoch": 0.000533718559529189, "grad_norm": 0.344722718000412, "learning_rate": 2.5e-05, "loss": 1.5749, "step": 150 }, { "epoch": 0.0006226716527840539, "grad_norm": 0.38181471824645996, "learning_rate": 2.5e-05, "loss": 1.5243, "step": 175 }, { "epoch": 0.0007116247460389188, "grad_norm": 0.25272616744041443, "learning_rate": 2.5e-05, "loss": 1.4728, "step": 200 }, { "epoch": 0.0008005778392937836, "grad_norm": 0.6237773299217224, "learning_rate": 2.5e-05, "loss": 1.3028, "step": 225 }, { "epoch": 0.0008895309325486485, "grad_norm": 0.5120233297348022, "learning_rate": 2.5e-05, "loss": 1.0721, "step": 250 }, { "epoch": 0.0009784840258035134, "grad_norm": 0.6732835173606873, "learning_rate": 2.5e-05, "loss": 0.8071, "step": 275 }, { "epoch": 0.001067437119058378, "grad_norm": 0.5018543004989624, "learning_rate": 2.5e-05, "loss": 0.8138, "step": 300 }, { "epoch": 0.001156390212313243, "grad_norm": 0.24052944779396057, "learning_rate": 2.5e-05, "loss": 0.702, "step": 325 }, { "epoch": 0.0012453433055681078, "grad_norm": 0.2696482837200165, "learning_rate": 2.5e-05, "loss": 0.6689, "step": 350 }, { "epoch": 0.0013342963988229727, "grad_norm": 0.21222035586833954, "learning_rate": 2.5e-05, "loss": 0.7498, "step": 375 }, { "epoch": 0.0014232494920778376, "grad_norm": 0.27624765038490295, "learning_rate": 2.5e-05, "loss": 0.6544, "step": 400 }, { "epoch": 0.0015122025853327023, "grad_norm": 0.8518249988555908, "learning_rate": 2.5e-05, "loss": 0.6908, "step": 425 }, { "epoch": 0.0016011556785875672, "grad_norm": 0.588943600654602, "learning_rate": 2.5e-05, "loss": 0.7507, "step": 450 }, { "epoch": 0.001690108771842432, "grad_norm": 0.4197629690170288, "learning_rate": 2.5e-05, "loss": 0.8013, "step": 475 }, { "epoch": 0.001779061865097297, "grad_norm": 0.48924073576927185, "learning_rate": 2.5e-05, "loss": 0.6292, "step": 500 } ], "logging_steps": 25, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.087597314048e+16, "train_batch_size": 1, "trial_name": null, "trial_params": null }