{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.0005064124476179625, "eval_steps": 3, "global_step": 10, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval_loss": 3.332756996154785, "eval_runtime": 1255.489, "eval_samples_per_second": 6.623, "eval_steps_per_second": 3.312, "step": 0 }, { "epoch": 5.0641244761796245e-05, "grad_norm": 0.7450602054595947, "learning_rate": 0.0, "loss": 3.0601, "step": 1 }, { "epoch": 0.00010128248952359249, "grad_norm": 0.7262908816337585, "learning_rate": 2e-05, "loss": 3.0686, "step": 2 }, { "epoch": 0.00015192373428538875, "grad_norm": 0.6945377588272095, "learning_rate": 4e-05, "loss": 3.4784, "step": 3 }, { "epoch": 0.00015192373428538875, "eval_loss": 3.3230936527252197, "eval_runtime": 1245.743, "eval_samples_per_second": 6.675, "eval_steps_per_second": 3.338, "step": 3 }, { "epoch": 0.00020256497904718498, "grad_norm": 0.7639620304107666, "learning_rate": 6e-05, "loss": 3.3284, "step": 4 }, { "epoch": 0.00025320622380898124, "grad_norm": 0.7831843495368958, "learning_rate": 8e-05, "loss": 3.0446, "step": 5 }, { "epoch": 0.0003038474685707775, "grad_norm": 0.9005393981933594, "learning_rate": 0.0001, "loss": 3.03, "step": 6 }, { "epoch": 0.0003038474685707775, "eval_loss": 3.2163121700286865, "eval_runtime": 1246.0826, "eval_samples_per_second": 6.673, "eval_steps_per_second": 3.337, "step": 6 }, { "epoch": 0.0003544887133325737, "grad_norm": 0.8589541912078857, "learning_rate": 0.00012, "loss": 3.0754, "step": 7 }, { "epoch": 0.00040512995809436996, "grad_norm": 0.904832661151886, "learning_rate": 0.00014, "loss": 3.3662, "step": 8 }, { "epoch": 0.0004557712028561662, "grad_norm": 0.8699621558189392, "learning_rate": 0.00016, "loss": 2.9704, "step": 9 }, { "epoch": 0.0004557712028561662, "eval_loss": 2.990236520767212, "eval_runtime": 1246.2607, "eval_samples_per_second": 6.672, "eval_steps_per_second": 3.336, "step": 9 }, { "epoch": 0.0005064124476179625, "grad_norm": 0.7870314717292786, "learning_rate": 0.00018, "loss": 3.0758, "step": 10 } ], "logging_steps": 1, "max_steps": 10, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 3, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2852805358387200.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }