| { | |
| "best_metric": 1.0545520782470703, | |
| "best_model_checkpoint": "/kaggle/output/checkpoint-23000", | |
| "epoch": 0.9370925684485006, | |
| "eval_steps": 1000, | |
| "global_step": 23000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 2.7777777777777777e-11, | |
| "loss": 1.1383, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 2.7750000000000004e-08, | |
| "loss": 1.1424, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_accuracy": 0.32375249500998005, | |
| "eval_loss": 1.1077626943588257, | |
| "eval_runtime": 54.8633, | |
| "eval_samples_per_second": 91.318, | |
| "eval_steps_per_second": 11.428, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 5.5527777777777784e-08, | |
| "loss": 1.1244, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 0.33652694610778444, | |
| "eval_loss": 1.1080161333084106, | |
| "eval_runtime": 54.7384, | |
| "eval_samples_per_second": 91.526, | |
| "eval_steps_per_second": 11.454, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 8.327777777777778e-08, | |
| "loss": 1.1228, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.34331337325349304, | |
| "eval_loss": 1.1084064245224, | |
| "eval_runtime": 54.7948, | |
| "eval_samples_per_second": 91.432, | |
| "eval_steps_per_second": 11.443, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.1105555555555557e-07, | |
| "loss": 1.1216, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.3385229540918164, | |
| "eval_loss": 1.1014840602874756, | |
| "eval_runtime": 54.8508, | |
| "eval_samples_per_second": 91.339, | |
| "eval_steps_per_second": 11.431, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.3880555555555558e-07, | |
| "loss": 1.1181, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 0.33073852295409184, | |
| "eval_loss": 1.1008135080337524, | |
| "eval_runtime": 54.8304, | |
| "eval_samples_per_second": 91.373, | |
| "eval_steps_per_second": 11.435, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.6658333333333335e-07, | |
| "loss": 1.1132, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.3520958083832335, | |
| "eval_loss": 1.0993762016296387, | |
| "eval_runtime": 54.8804, | |
| "eval_samples_per_second": 91.289, | |
| "eval_steps_per_second": 11.425, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 1.9433333333333334e-07, | |
| "loss": 1.1113, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.3530938123752495, | |
| "eval_loss": 1.0965770483016968, | |
| "eval_runtime": 54.8881, | |
| "eval_samples_per_second": 91.277, | |
| "eval_steps_per_second": 11.423, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 2.2211111111111114e-07, | |
| "loss": 1.1111, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.35708582834331337, | |
| "eval_loss": 1.094658613204956, | |
| "eval_runtime": 54.8233, | |
| "eval_samples_per_second": 91.384, | |
| "eval_steps_per_second": 11.437, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 2.4986111111111113e-07, | |
| "loss": 1.109, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.34191616766467064, | |
| "eval_loss": 1.106990933418274, | |
| "eval_runtime": 54.9095, | |
| "eval_samples_per_second": 91.241, | |
| "eval_steps_per_second": 11.419, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 2.776388888888889e-07, | |
| "loss": 1.1036, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 0.37584830339321357, | |
| "eval_loss": 1.0930211544036865, | |
| "eval_runtime": 54.9067, | |
| "eval_samples_per_second": 91.246, | |
| "eval_steps_per_second": 11.419, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.0541666666666667e-07, | |
| "loss": 1.1045, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.3652694610778443, | |
| "eval_loss": 1.092846393585205, | |
| "eval_runtime": 54.8964, | |
| "eval_samples_per_second": 91.263, | |
| "eval_steps_per_second": 11.422, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 3.3319444444444444e-07, | |
| "loss": 1.1024, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 0.39261477045908183, | |
| "eval_loss": 1.089038372039795, | |
| "eval_runtime": 54.9763, | |
| "eval_samples_per_second": 91.13, | |
| "eval_steps_per_second": 11.405, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 3.6094444444444446e-07, | |
| "loss": 1.1007, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_accuracy": 0.34311377245508984, | |
| "eval_loss": 1.0933948755264282, | |
| "eval_runtime": 54.9285, | |
| "eval_samples_per_second": 91.209, | |
| "eval_steps_per_second": 11.415, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 3.8872222222222223e-07, | |
| "loss": 1.0985, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.36367265469061877, | |
| "eval_loss": 1.09434974193573, | |
| "eval_runtime": 54.8032, | |
| "eval_samples_per_second": 91.418, | |
| "eval_steps_per_second": 11.441, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 4.1650000000000006e-07, | |
| "loss": 1.0988, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_accuracy": 0.39481037924151696, | |
| "eval_loss": 1.0886671543121338, | |
| "eval_runtime": 54.9221, | |
| "eval_samples_per_second": 91.22, | |
| "eval_steps_per_second": 11.416, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 4.4425e-07, | |
| "loss": 1.0965, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.3916167664670659, | |
| "eval_loss": 1.0834949016571045, | |
| "eval_runtime": 54.5628, | |
| "eval_samples_per_second": 91.821, | |
| "eval_steps_per_second": 11.491, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 4.7202777777777785e-07, | |
| "loss": 1.0926, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.4239520958083832, | |
| "eval_loss": 1.079688310623169, | |
| "eval_runtime": 54.6989, | |
| "eval_samples_per_second": 91.592, | |
| "eval_steps_per_second": 11.463, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 4.998055555555556e-07, | |
| "loss": 1.0956, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.4219560878243513, | |
| "eval_loss": 1.080493688583374, | |
| "eval_runtime": 54.6863, | |
| "eval_samples_per_second": 91.613, | |
| "eval_steps_per_second": 11.465, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 5.275555555555556e-07, | |
| "loss": 1.0878, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 0.4343313373253493, | |
| "eval_loss": 1.0664235353469849, | |
| "eval_runtime": 54.7843, | |
| "eval_samples_per_second": 91.45, | |
| "eval_steps_per_second": 11.445, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 5.553333333333334e-07, | |
| "loss": 1.0793, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.4365269461077844, | |
| "eval_loss": 1.06390380859375, | |
| "eval_runtime": 54.7978, | |
| "eval_samples_per_second": 91.427, | |
| "eval_steps_per_second": 11.442, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 5.830833333333334e-07, | |
| "loss": 1.0746, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.4311377245508982, | |
| "eval_loss": 1.0611063241958618, | |
| "eval_runtime": 54.6084, | |
| "eval_samples_per_second": 91.744, | |
| "eval_steps_per_second": 11.482, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 6.108611111111111e-07, | |
| "loss": 1.0757, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.43253493013972055, | |
| "eval_loss": 1.0579031705856323, | |
| "eval_runtime": 54.7147, | |
| "eval_samples_per_second": 91.566, | |
| "eval_steps_per_second": 11.459, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 6.386111111111112e-07, | |
| "loss": 1.0712, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_accuracy": 0.43353293413173655, | |
| "eval_loss": 1.0545520782470703, | |
| "eval_runtime": 54.8205, | |
| "eval_samples_per_second": 91.389, | |
| "eval_steps_per_second": 11.437, | |
| "step": 23000 | |
| } | |
| ], | |
| "logging_steps": 1000, | |
| "max_steps": 10000000, | |
| "num_train_epochs": 408, | |
| "save_steps": 1000, | |
| "total_flos": 4.8078167998464e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |