| { | |
| "best_metric": 1.6806640625, | |
| "best_model_checkpoint": "/home/zhengxinyong/outputs/bloom-7b1_ru_continual-pretrain_100000samples_-1vocab_original/checkpoint-25000", | |
| "epoch": 1.4637002341920375, | |
| "global_step": 25000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001, | |
| "loss": 2.1762, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0001, | |
| "loss": 1.9292, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_loss": 1.8681640625, | |
| "eval_runtime": 20.6594, | |
| "eval_samples_per_second": 65.975, | |
| "eval_steps_per_second": 8.277, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8402, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.0001, | |
| "loss": 1.8012, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_loss": 1.7763671875, | |
| "eval_runtime": 20.6347, | |
| "eval_samples_per_second": 66.054, | |
| "eval_steps_per_second": 8.287, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7654, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 0.0001, | |
| "loss": 1.7347, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_loss": 1.734375, | |
| "eval_runtime": 20.6431, | |
| "eval_samples_per_second": 66.027, | |
| "eval_steps_per_second": 8.284, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.0001, | |
| "loss": 1.6922, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 0.0001, | |
| "loss": 1.5617, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_loss": 1.703125, | |
| "eval_runtime": 20.6213, | |
| "eval_samples_per_second": 66.097, | |
| "eval_steps_per_second": 8.292, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 0.0001, | |
| "loss": 1.5768, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 0.0001, | |
| "loss": 1.573, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_loss": 1.6806640625, | |
| "eval_runtime": 20.6399, | |
| "eval_samples_per_second": 66.037, | |
| "eval_steps_per_second": 8.285, | |
| "step": 25000 | |
| } | |
| ], | |
| "max_steps": 25000, | |
| "num_train_epochs": 2, | |
| "total_flos": 7.423686731586601e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |