| { | |
| "best_metric": 0.27745380997657776, | |
| "best_model_checkpoint": "uk-mt5-base-gec/checkpoint-1350", | |
| "epoch": 3.0177409816676524, | |
| "global_step": 2550, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.000970414201183432, | |
| "loss": 2.8559, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_google_bleu": 0.4161949252310698, | |
| "eval_loss": 0.3385647237300873, | |
| "eval_runtime": 574.9325, | |
| "eval_samples_per_second": 5.227, | |
| "eval_steps_per_second": 0.327, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.000940828402366864, | |
| "loss": 0.353, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_google_bleu": 0.4197968462318859, | |
| "eval_loss": 0.30946752429008484, | |
| "eval_runtime": 574.235, | |
| "eval_samples_per_second": 5.233, | |
| "eval_steps_per_second": 0.327, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.0009112426035502958, | |
| "loss": 0.3433, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_google_bleu": 0.42082026039416087, | |
| "eval_loss": 0.30230990052223206, | |
| "eval_runtime": 573.7714, | |
| "eval_samples_per_second": 5.237, | |
| "eval_steps_per_second": 0.328, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.0008816568047337278, | |
| "loss": 0.3248, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_google_bleu": 0.4194576852971206, | |
| "eval_loss": 0.2984682619571686, | |
| "eval_runtime": 574.3927, | |
| "eval_samples_per_second": 5.232, | |
| "eval_steps_per_second": 0.327, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.0008520710059171598, | |
| "loss": 0.3046, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_google_bleu": 0.4217920913982863, | |
| "eval_loss": 0.28489378094673157, | |
| "eval_runtime": 574.7617, | |
| "eval_samples_per_second": 5.228, | |
| "eval_steps_per_second": 0.327, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.0008224852071005917, | |
| "loss": 0.2625, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_google_bleu": 0.4213197969543147, | |
| "eval_loss": 0.29553136229515076, | |
| "eval_runtime": 573.965, | |
| "eval_samples_per_second": 5.236, | |
| "eval_steps_per_second": 0.328, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 0.0007928994082840238, | |
| "loss": 0.2127, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_google_bleu": 0.4211682670038433, | |
| "eval_loss": 0.30292925238609314, | |
| "eval_runtime": 574.6793, | |
| "eval_samples_per_second": 5.229, | |
| "eval_steps_per_second": 0.327, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "learning_rate": 0.0007633136094674556, | |
| "loss": 0.224, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_google_bleu": 0.4224696723929531, | |
| "eval_loss": 0.3068563640117645, | |
| "eval_runtime": 574.5278, | |
| "eval_samples_per_second": 5.23, | |
| "eval_steps_per_second": 0.327, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.0007337278106508876, | |
| "loss": 0.2332, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_google_bleu": 0.41897146578336036, | |
| "eval_loss": 0.27745380997657776, | |
| "eval_runtime": 575.5901, | |
| "eval_samples_per_second": 5.221, | |
| "eval_steps_per_second": 0.327, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 0.0007041420118343196, | |
| "loss": 0.238, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "eval_google_bleu": 0.4164072628882445, | |
| "eval_loss": 0.2903579771518707, | |
| "eval_runtime": 573.985, | |
| "eval_samples_per_second": 5.235, | |
| "eval_steps_per_second": 0.328, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 0.0006745562130177515, | |
| "loss": 0.2297, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_google_bleu": 0.41891172732452214, | |
| "eval_loss": 0.2825988233089447, | |
| "eval_runtime": 574.7646, | |
| "eval_samples_per_second": 5.228, | |
| "eval_steps_per_second": 0.327, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "learning_rate": 0.0006449704142011834, | |
| "loss": 0.1649, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_google_bleu": 0.41988701131139533, | |
| "eval_loss": 0.304867148399353, | |
| "eval_runtime": 575.4878, | |
| "eval_samples_per_second": 5.222, | |
| "eval_steps_per_second": 0.327, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.0006153846153846154, | |
| "loss": 0.1458, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_google_bleu": 0.4201902185823143, | |
| "eval_loss": 0.3138478994369507, | |
| "eval_runtime": 575.2695, | |
| "eval_samples_per_second": 5.224, | |
| "eval_steps_per_second": 0.327, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "learning_rate": 0.0005857988165680473, | |
| "loss": 0.1564, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_google_bleu": 0.415792735992587, | |
| "eval_loss": 0.3027360141277313, | |
| "eval_runtime": 566.4822, | |
| "eval_samples_per_second": 5.305, | |
| "eval_steps_per_second": 0.332, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 0.0005562130177514793, | |
| "loss": 0.1572, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_google_bleu": 0.4212085345156907, | |
| "eval_loss": 0.3020596504211426, | |
| "eval_runtime": 574.5482, | |
| "eval_samples_per_second": 5.23, | |
| "eval_steps_per_second": 0.327, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 0.0005266272189349113, | |
| "loss": 0.159, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_google_bleu": 0.41866493031642477, | |
| "eval_loss": 0.29025933146476746, | |
| "eval_runtime": 574.442, | |
| "eval_samples_per_second": 5.231, | |
| "eval_steps_per_second": 0.327, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 0.0004970414201183431, | |
| "loss": 0.159, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "eval_google_bleu": 0.4191764521684672, | |
| "eval_loss": 0.3248673677444458, | |
| "eval_runtime": 573.9254, | |
| "eval_samples_per_second": 5.236, | |
| "eval_steps_per_second": 0.328, | |
| "step": 2550 | |
| } | |
| ], | |
| "max_steps": 5070, | |
| "num_train_epochs": 6, | |
| "total_flos": 2.984077609323725e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |