{ "best_metric": 2.947911262512207, "best_model_checkpoint": "/users/zyong2/data/zyong2/bigscience/data/processed/024/bloom-560m_de_continual-pretrain_100000samples_-1vocab_original_bsz1/checkpoint-25000", "epoch": 0.34345377112240694, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 9e-05, "loss": 4.0829, "step": 2500 }, { "epoch": 0.07, "learning_rate": 8e-05, "loss": 3.6084, "step": 5000 }, { "epoch": 0.07, "eval_loss": 3.581524133682251, "eval_runtime": 84.6587, "eval_samples_per_second": 9.497, "eval_steps_per_second": 4.748, "step": 5000 }, { "epoch": 0.1, "learning_rate": 7e-05, "loss": 3.4193, "step": 7500 }, { "epoch": 0.14, "learning_rate": 6e-05, "loss": 3.3097, "step": 10000 }, { "epoch": 0.14, "eval_loss": 3.3223936557769775, "eval_runtime": 84.6499, "eval_samples_per_second": 9.498, "eval_steps_per_second": 4.749, "step": 10000 }, { "epoch": 0.17, "learning_rate": 5e-05, "loss": 3.2085, "step": 12500 }, { "epoch": 0.21, "learning_rate": 4e-05, "loss": 3.0969, "step": 15000 }, { "epoch": 0.21, "eval_loss": 3.157708168029785, "eval_runtime": 84.5971, "eval_samples_per_second": 9.504, "eval_steps_per_second": 4.752, "step": 15000 }, { "epoch": 0.24, "learning_rate": 3e-05, "loss": 3.0205, "step": 17500 }, { "epoch": 0.27, "learning_rate": 2e-05, "loss": 2.968, "step": 20000 }, { "epoch": 0.27, "eval_loss": 3.02886700630188, "eval_runtime": 84.6789, "eval_samples_per_second": 9.495, "eval_steps_per_second": 4.747, "step": 20000 }, { "epoch": 0.31, "learning_rate": 1e-05, "loss": 2.8963, "step": 22500 }, { "epoch": 0.34, "learning_rate": 0.0, "loss": 2.8626, "step": 25000 }, { "epoch": 0.34, "eval_loss": 2.947911262512207, "eval_runtime": 84.657, "eval_samples_per_second": 9.497, "eval_steps_per_second": 4.749, "step": 25000 }, { "epoch": 0.34, "step": 25000, "total_flos": 4.64353492992e+16, "train_loss": 3.24729939453125, "train_runtime": 10361.3203, "train_samples_per_second": 2.413, "train_steps_per_second": 2.413 } ], "max_steps": 25000, "num_train_epochs": 1, "total_flos": 4.64353492992e+16, "trial_name": null, "trial_params": null }