| { | |
| "best_metric": 80.33013168664009, | |
| "best_model_checkpoint": "/root/turkic_qa/ru_uzn_models/ru_uzn_xlm_roberta_large_squad_model/checkpoint-2790", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 2790, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "step": 558, | |
| "train_exact_match": 64.83516483516483, | |
| "train_f1": 83.08204401574274, | |
| "train_runtime": 24.0955, | |
| "train_samples_per_second": 43.826, | |
| "train_steps_per_second": 1.577 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 80.57772827148438, | |
| "learning_rate": 1e-05, | |
| "loss": 1.2698, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_exact_match": 61.25, | |
| "eval_f1": 79.38877529103148, | |
| "eval_runtime": 76.3721, | |
| "eval_samples_per_second": 43.982, | |
| "eval_steps_per_second": 1.571, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "step": 1116, | |
| "train_exact_match": 74.82517482517483, | |
| "train_f1": 89.2404305639639, | |
| "train_runtime": 24.2914, | |
| "train_samples_per_second": 42.855, | |
| "train_steps_per_second": 1.564 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 80.02055358886719, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.8886, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_exact_match": 62.3125, | |
| "eval_f1": 80.2126480616015, | |
| "eval_runtime": 77.9968, | |
| "eval_samples_per_second": 43.066, | |
| "eval_steps_per_second": 1.539, | |
| "step": 1116 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 1674, | |
| "train_exact_match": 78.52147852147853, | |
| "train_f1": 92.40489073606565, | |
| "train_runtime": 25.6503, | |
| "train_samples_per_second": 41.013, | |
| "train_steps_per_second": 1.481 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 28.389789581298828, | |
| "learning_rate": 5e-06, | |
| "loss": 0.6286, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_exact_match": 61.9375, | |
| "eval_f1": 80.06485926873198, | |
| "eval_runtime": 83.2186, | |
| "eval_samples_per_second": 40.364, | |
| "eval_steps_per_second": 1.442, | |
| "step": 1674 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "step": 2232, | |
| "train_exact_match": 82.31768231768231, | |
| "train_f1": 93.82612819903538, | |
| "train_runtime": 23.9529, | |
| "train_samples_per_second": 43.669, | |
| "train_steps_per_second": 1.586 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 21.804250717163086, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.4725, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_exact_match": 62.625, | |
| "eval_f1": 80.25175708657538, | |
| "eval_runtime": 77.0148, | |
| "eval_samples_per_second": 43.615, | |
| "eval_steps_per_second": 1.558, | |
| "step": 2232 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 2790, | |
| "train_exact_match": 85.21478521478521, | |
| "train_f1": 94.39980772824222, | |
| "train_runtime": 24.3057, | |
| "train_samples_per_second": 42.871, | |
| "train_steps_per_second": 1.563 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 48.04233932495117, | |
| "learning_rate": 0.0, | |
| "loss": 0.378, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_exact_match": 62.46875, | |
| "eval_f1": 80.33013168664009, | |
| "eval_runtime": 78.9765, | |
| "eval_samples_per_second": 42.532, | |
| "eval_steps_per_second": 1.519, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 2790, | |
| "total_flos": 5.432587638826752e+16, | |
| "train_loss": 0.7275064119728663, | |
| "train_runtime": 5090.5681, | |
| "train_samples_per_second": 15.321, | |
| "train_steps_per_second": 0.548 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 2790, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 5.432587638826752e+16, | |
| "train_batch_size": 28, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |