| { | |
| "best_metric": 0.9545454545454546, | |
| "best_model_checkpoint": "ViT-base-16-224-7.5-1.5-1.5-split-lion-4\\checkpoint-943", | |
| "epoch": 143.47826086956522, | |
| "eval_steps": 500, | |
| "global_step": 1650, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.7077922077922078, | |
| "eval_loss": 0.7381948232650757, | |
| "eval_runtime": 1.6508, | |
| "eval_samples_per_second": 93.289, | |
| "eval_steps_per_second": 6.058, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 1.5780272002695102e-05, | |
| "loss": 0.8102, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.8181818181818182, | |
| "eval_loss": 0.5202796459197998, | |
| "eval_runtime": 1.6447, | |
| "eval_samples_per_second": 93.636, | |
| "eval_steps_per_second": 6.08, | |
| "step": 23 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "eval_accuracy": 0.8701298701298701, | |
| "eval_loss": 0.4076531231403351, | |
| "eval_runtime": 1.6584, | |
| "eval_samples_per_second": 92.862, | |
| "eval_steps_per_second": 6.03, | |
| "step": 34 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 1.3309654930856552e-05, | |
| "loss": 0.4016, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8636363636363636, | |
| "eval_loss": 0.3777826428413391, | |
| "eval_runtime": 1.6734, | |
| "eval_samples_per_second": 92.03, | |
| "eval_steps_per_second": 5.976, | |
| "step": 46 | |
| }, | |
| { | |
| "epoch": 4.96, | |
| "eval_accuracy": 0.8766233766233766, | |
| "eval_loss": 0.35903552174568176, | |
| "eval_runtime": 1.6866, | |
| "eval_samples_per_second": 91.308, | |
| "eval_steps_per_second": 5.929, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 9.780401480557754e-06, | |
| "loss": 0.2052, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.29355403780937195, | |
| "eval_runtime": 1.7256, | |
| "eval_samples_per_second": 89.245, | |
| "eval_steps_per_second": 5.795, | |
| "step": 69 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 5.943306394074249e-06, | |
| "loss": 0.0838, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "eval_accuracy": 0.8961038961038961, | |
| "eval_loss": 0.2710248529911041, | |
| "eval_runtime": 1.6959, | |
| "eval_samples_per_second": 90.805, | |
| "eval_steps_per_second": 5.896, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8961038961038961, | |
| "eval_loss": 0.2775874435901642, | |
| "eval_runtime": 1.6736, | |
| "eval_samples_per_second": 92.018, | |
| "eval_steps_per_second": 5.975, | |
| "step": 92 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 2.6146530177605546e-06, | |
| "loss": 0.0407, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "eval_accuracy": 0.9415584415584416, | |
| "eval_loss": 0.22880351543426514, | |
| "eval_runtime": 1.6916, | |
| "eval_samples_per_second": 91.039, | |
| "eval_steps_per_second": 5.912, | |
| "step": 103 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9415584415584416, | |
| "eval_loss": 0.2207733541727066, | |
| "eval_runtime": 1.6896, | |
| "eval_samples_per_second": 91.147, | |
| "eval_steps_per_second": 5.919, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 5.025614934507641e-07, | |
| "loss": 0.039, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 10.96, | |
| "eval_accuracy": 0.9415584415584416, | |
| "eval_loss": 0.22476842999458313, | |
| "eval_runtime": 1.7237, | |
| "eval_samples_per_second": 89.342, | |
| "eval_steps_per_second": 5.801, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.3085295557975769, | |
| "eval_runtime": 1.727, | |
| "eval_samples_per_second": 89.173, | |
| "eval_steps_per_second": 5.79, | |
| "step": 138 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 1.6610319647849526e-05, | |
| "loss": 0.0324, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.26784980297088623, | |
| "eval_runtime": 1.7027, | |
| "eval_samples_per_second": 90.446, | |
| "eval_steps_per_second": 5.873, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "learning_rate": 1.529573176177447e-05, | |
| "loss": 0.022, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.948051948051948, | |
| "eval_loss": 0.2529321610927582, | |
| "eval_runtime": 1.7126, | |
| "eval_samples_per_second": 89.923, | |
| "eval_steps_per_second": 5.839, | |
| "step": 161 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "eval_accuracy": 0.922077922077922, | |
| "eval_loss": 0.24028430879116058, | |
| "eval_runtime": 1.7176, | |
| "eval_samples_per_second": 89.659, | |
| "eval_steps_per_second": 5.822, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.012, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.3513343632221222, | |
| "eval_runtime": 1.7476, | |
| "eval_samples_per_second": 88.12, | |
| "eval_steps_per_second": 5.722, | |
| "step": 184 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "eval_accuracy": 0.935064935064935, | |
| "eval_loss": 0.3014402389526367, | |
| "eval_runtime": 1.7276, | |
| "eval_samples_per_second": 89.139, | |
| "eval_steps_per_second": 5.788, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "learning_rate": 8.817873574253966e-06, | |
| "loss": 0.0097, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.935064935064935, | |
| "eval_loss": 0.31746622920036316, | |
| "eval_runtime": 1.9143, | |
| "eval_samples_per_second": 80.446, | |
| "eval_steps_per_second": 5.224, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 18.96, | |
| "eval_accuracy": 0.935064935064935, | |
| "eval_loss": 0.2747339904308319, | |
| "eval_runtime": 1.7322, | |
| "eval_samples_per_second": 88.904, | |
| "eval_steps_per_second": 5.773, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 19.13, | |
| "learning_rate": 5.03266861634036e-06, | |
| "loss": 0.0052, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.935064935064935, | |
| "eval_loss": 0.2932997941970825, | |
| "eval_runtime": 1.7167, | |
| "eval_samples_per_second": 89.706, | |
| "eval_steps_per_second": 5.825, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 20.87, | |
| "learning_rate": 1.9496296406751813e-06, | |
| "loss": 0.009, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 20.96, | |
| "eval_accuracy": 0.9415584415584416, | |
| "eval_loss": 0.28077924251556396, | |
| "eval_runtime": 1.7005, | |
| "eval_samples_per_second": 90.563, | |
| "eval_steps_per_second": 5.881, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.935064935064935, | |
| "eval_loss": 0.29576078057289124, | |
| "eval_runtime": 1.7368, | |
| "eval_samples_per_second": 88.67, | |
| "eval_steps_per_second": 5.758, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 22.61, | |
| "learning_rate": 2.246260785014683e-07, | |
| "loss": 0.0115, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 22.96, | |
| "eval_accuracy": 0.935064935064935, | |
| "eval_loss": 0.2983975112438202, | |
| "eval_runtime": 1.7116, | |
| "eval_samples_per_second": 89.976, | |
| "eval_steps_per_second": 5.843, | |
| "step": 264 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.3521440327167511, | |
| "eval_runtime": 1.7503, | |
| "eval_samples_per_second": 87.986, | |
| "eval_steps_per_second": 5.713, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 24.35, | |
| "learning_rate": 1.64420405881652e-05, | |
| "loss": 0.0104, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 24.96, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.4289417862892151, | |
| "eval_runtime": 1.7466, | |
| "eval_samples_per_second": 88.173, | |
| "eval_steps_per_second": 5.726, | |
| "step": 287 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.6231942176818848, | |
| "eval_runtime": 1.7359, | |
| "eval_samples_per_second": 88.715, | |
| "eval_steps_per_second": 5.761, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 26.09, | |
| "learning_rate": 1.4717037025991483e-05, | |
| "loss": 0.0086, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 26.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.5161650776863098, | |
| "eval_runtime": 1.7289, | |
| "eval_samples_per_second": 89.075, | |
| "eval_steps_per_second": 5.784, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 27.83, | |
| "learning_rate": 1.1633998050326307e-05, | |
| "loss": 0.0205, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.8896103896103896, | |
| "eval_loss": 0.6762561798095703, | |
| "eval_runtime": 1.7546, | |
| "eval_samples_per_second": 87.77, | |
| "eval_steps_per_second": 5.699, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 28.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.4664335250854492, | |
| "eval_runtime": 1.7617, | |
| "eval_samples_per_second": 87.414, | |
| "eval_steps_per_second": 5.676, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 29.57, | |
| "learning_rate": 7.848793092412702e-06, | |
| "loss": 0.012, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.3841441869735718, | |
| "eval_runtime": 1.7019, | |
| "eval_samples_per_second": 90.487, | |
| "eval_steps_per_second": 5.876, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 30.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.42462781071662903, | |
| "eval_runtime": 1.741, | |
| "eval_samples_per_second": 88.455, | |
| "eval_steps_per_second": 5.744, | |
| "step": 356 | |
| }, | |
| { | |
| "epoch": 31.3, | |
| "learning_rate": 4.1666666666666686e-06, | |
| "loss": 0.0061, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.43087005615234375, | |
| "eval_runtime": 1.7385, | |
| "eval_samples_per_second": 88.582, | |
| "eval_steps_per_second": 5.752, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 32.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.45713886618614197, | |
| "eval_runtime": 1.7098, | |
| "eval_samples_per_second": 90.069, | |
| "eval_steps_per_second": 5.849, | |
| "step": 379 | |
| }, | |
| { | |
| "epoch": 33.04, | |
| "learning_rate": 1.3709349048921951e-06, | |
| "loss": 0.0093, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.48606640100479126, | |
| "eval_runtime": 1.7668, | |
| "eval_samples_per_second": 87.163, | |
| "eval_steps_per_second": 5.66, | |
| "step": 391 | |
| }, | |
| { | |
| "epoch": 34.78, | |
| "learning_rate": 5.634701881714148e-08, | |
| "loss": 0.0101, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 34.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.49100440740585327, | |
| "eval_runtime": 1.7267, | |
| "eval_samples_per_second": 89.186, | |
| "eval_steps_per_second": 5.791, | |
| "step": 402 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.8961038961038961, | |
| "eval_loss": 0.5978976488113403, | |
| "eval_runtime": 1.7357, | |
| "eval_samples_per_second": 88.725, | |
| "eval_steps_per_second": 5.761, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 36.52, | |
| "learning_rate": 1.6164105173215904e-05, | |
| "loss": 0.011, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 36.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.3977736830711365, | |
| "eval_runtime": 1.7968, | |
| "eval_samples_per_second": 85.708, | |
| "eval_steps_per_second": 5.565, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.935064935064935, | |
| "eval_loss": 0.40591639280319214, | |
| "eval_runtime": 1.7136, | |
| "eval_samples_per_second": 89.87, | |
| "eval_steps_per_second": 5.836, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 38.26, | |
| "learning_rate": 1.4052013648906114e-05, | |
| "loss": 0.0226, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 38.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.4942101240158081, | |
| "eval_runtime": 1.7437, | |
| "eval_samples_per_second": 88.318, | |
| "eval_steps_per_second": 5.735, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 1.0723360272592418e-05, | |
| "loss": 0.0118, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.8896103896103896, | |
| "eval_loss": 0.7233626842498779, | |
| "eval_runtime": 1.7317, | |
| "eval_samples_per_second": 88.928, | |
| "eval_steps_per_second": 5.775, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 40.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.5826935172080994, | |
| "eval_runtime": 1.7439, | |
| "eval_samples_per_second": 88.31, | |
| "eval_steps_per_second": 5.734, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 41.74, | |
| "learning_rate": 6.886265186108914e-06, | |
| "loss": 0.011, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.8961038961038961, | |
| "eval_loss": 0.662550687789917, | |
| "eval_runtime": 1.7627, | |
| "eval_samples_per_second": 87.366, | |
| "eval_steps_per_second": 5.673, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 42.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.5871102213859558, | |
| "eval_runtime": 1.7486, | |
| "eval_samples_per_second": 88.072, | |
| "eval_steps_per_second": 5.719, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 43.48, | |
| "learning_rate": 3.3570117358101172e-06, | |
| "loss": 0.0003, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.5640321969985962, | |
| "eval_runtime": 1.6975, | |
| "eval_samples_per_second": 90.721, | |
| "eval_steps_per_second": 5.891, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 44.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.5655084848403931, | |
| "eval_runtime": 1.7184, | |
| "eval_samples_per_second": 89.619, | |
| "eval_steps_per_second": 5.819, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 45.22, | |
| "learning_rate": 8.863946639715635e-07, | |
| "loss": 0.0005, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.5844298601150513, | |
| "eval_runtime": 1.7344, | |
| "eval_samples_per_second": 88.793, | |
| "eval_steps_per_second": 5.766, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 46.96, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0064, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 46.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.5887525677680969, | |
| "eval_runtime": 1.7094, | |
| "eval_samples_per_second": 90.091, | |
| "eval_steps_per_second": 5.85, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.8896103896103896, | |
| "eval_loss": 0.6656709313392639, | |
| "eval_runtime": 1.7414, | |
| "eval_samples_per_second": 88.435, | |
| "eval_steps_per_second": 5.743, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 48.7, | |
| "learning_rate": 1.5780272002695102e-05, | |
| "loss": 0.0084, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 48.96, | |
| "eval_accuracy": 0.8961038961038961, | |
| "eval_loss": 0.7156269550323486, | |
| "eval_runtime": 1.7014, | |
| "eval_samples_per_second": 90.514, | |
| "eval_steps_per_second": 5.878, | |
| "step": 563 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.8766233766233766, | |
| "eval_loss": 0.9346238970756531, | |
| "eval_runtime": 1.7164, | |
| "eval_samples_per_second": 89.724, | |
| "eval_steps_per_second": 5.826, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 50.43, | |
| "learning_rate": 1.3309654930856552e-05, | |
| "loss": 0.0318, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 50.96, | |
| "eval_accuracy": 0.8961038961038961, | |
| "eval_loss": 0.8030693531036377, | |
| "eval_runtime": 1.6994, | |
| "eval_samples_per_second": 90.621, | |
| "eval_steps_per_second": 5.884, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.8961038961038961, | |
| "eval_loss": 0.5700052976608276, | |
| "eval_runtime": 1.7294, | |
| "eval_samples_per_second": 89.049, | |
| "eval_steps_per_second": 5.782, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 52.17, | |
| "learning_rate": 9.780401480557754e-06, | |
| "loss": 0.0338, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 52.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.40834710001945496, | |
| "eval_runtime": 1.6964, | |
| "eval_samples_per_second": 90.781, | |
| "eval_steps_per_second": 5.895, | |
| "step": 609 | |
| }, | |
| { | |
| "epoch": 53.91, | |
| "learning_rate": 5.943306394074249e-06, | |
| "loss": 0.0147, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.41295498609542847, | |
| "eval_runtime": 1.7144, | |
| "eval_samples_per_second": 89.828, | |
| "eval_steps_per_second": 5.833, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 54.96, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.40372058749198914, | |
| "eval_runtime": 1.7344, | |
| "eval_samples_per_second": 88.792, | |
| "eval_steps_per_second": 5.766, | |
| "step": 632 | |
| }, | |
| { | |
| "epoch": 55.65, | |
| "learning_rate": 2.6146530177605546e-06, | |
| "loss": 0.0011, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.36978378891944885, | |
| "eval_runtime": 1.7234, | |
| "eval_samples_per_second": 89.359, | |
| "eval_steps_per_second": 5.803, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 56.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.38696253299713135, | |
| "eval_runtime": 1.7014, | |
| "eval_samples_per_second": 90.515, | |
| "eval_steps_per_second": 5.878, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 57.39, | |
| "learning_rate": 5.025614934507641e-07, | |
| "loss": 0.0021, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.39924710988998413, | |
| "eval_runtime": 1.6854, | |
| "eval_samples_per_second": 91.374, | |
| "eval_steps_per_second": 5.933, | |
| "step": 667 | |
| }, | |
| { | |
| "epoch": 58.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.44311344623565674, | |
| "eval_runtime": 1.6854, | |
| "eval_samples_per_second": 91.374, | |
| "eval_steps_per_second": 5.933, | |
| "step": 678 | |
| }, | |
| { | |
| "epoch": 59.13, | |
| "learning_rate": 1.6610319647849526e-05, | |
| "loss": 0.0002, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.44143199920654297, | |
| "eval_runtime": 1.7504, | |
| "eval_samples_per_second": 87.98, | |
| "eval_steps_per_second": 5.713, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 60.87, | |
| "learning_rate": 1.529573176177447e-05, | |
| "loss": 0.0088, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 60.96, | |
| "eval_accuracy": 0.9415584415584416, | |
| "eval_loss": 0.4255146086215973, | |
| "eval_runtime": 1.7144, | |
| "eval_samples_per_second": 89.828, | |
| "eval_steps_per_second": 5.833, | |
| "step": 701 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_accuracy": 0.922077922077922, | |
| "eval_loss": 0.4168331027030945, | |
| "eval_runtime": 1.7434, | |
| "eval_samples_per_second": 88.334, | |
| "eval_steps_per_second": 5.736, | |
| "step": 713 | |
| }, | |
| { | |
| "epoch": 62.61, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0061, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 62.96, | |
| "eval_accuracy": 0.922077922077922, | |
| "eval_loss": 0.49312305450439453, | |
| "eval_runtime": 1.7334, | |
| "eval_samples_per_second": 88.843, | |
| "eval_steps_per_second": 5.769, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.948051948051948, | |
| "eval_loss": 0.28522124886512756, | |
| "eval_runtime": 1.7184, | |
| "eval_samples_per_second": 89.619, | |
| "eval_steps_per_second": 5.819, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 64.35, | |
| "learning_rate": 8.817873574253966e-06, | |
| "loss": 0.0179, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 64.96, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.44120827317237854, | |
| "eval_runtime": 1.7364, | |
| "eval_samples_per_second": 88.69, | |
| "eval_steps_per_second": 5.759, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_accuracy": 0.935064935064935, | |
| "eval_loss": 0.331409215927124, | |
| "eval_runtime": 1.7124, | |
| "eval_samples_per_second": 89.933, | |
| "eval_steps_per_second": 5.84, | |
| "step": 759 | |
| }, | |
| { | |
| "epoch": 66.09, | |
| "learning_rate": 5.03266861634036e-06, | |
| "loss": 0.0014, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 66.96, | |
| "eval_accuracy": 0.9415584415584416, | |
| "eval_loss": 0.2971489727497101, | |
| "eval_runtime": 1.7494, | |
| "eval_samples_per_second": 88.031, | |
| "eval_steps_per_second": 5.716, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 67.83, | |
| "learning_rate": 1.9496296406751813e-06, | |
| "loss": 0.0199, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.3261590301990509, | |
| "eval_runtime": 1.7504, | |
| "eval_samples_per_second": 87.98, | |
| "eval_steps_per_second": 5.713, | |
| "step": 782 | |
| }, | |
| { | |
| "epoch": 68.96, | |
| "eval_accuracy": 0.935064935064935, | |
| "eval_loss": 0.3835467994213104, | |
| "eval_runtime": 1.7824, | |
| "eval_samples_per_second": 86.401, | |
| "eval_steps_per_second": 5.61, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 69.57, | |
| "learning_rate": 2.246260785014683e-07, | |
| "loss": 0.0091, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.3903743326663971, | |
| "eval_runtime": 1.8425, | |
| "eval_samples_per_second": 83.582, | |
| "eval_steps_per_second": 5.427, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 70.96, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.47730717062950134, | |
| "eval_runtime": 1.8005, | |
| "eval_samples_per_second": 85.531, | |
| "eval_steps_per_second": 5.554, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 71.3, | |
| "learning_rate": 1.64420405881652e-05, | |
| "loss": 0.0029, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.922077922077922, | |
| "eval_loss": 0.4937627613544464, | |
| "eval_runtime": 1.7875, | |
| "eval_samples_per_second": 86.152, | |
| "eval_steps_per_second": 5.594, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 72.96, | |
| "eval_accuracy": 0.948051948051948, | |
| "eval_loss": 0.456775426864624, | |
| "eval_runtime": 1.7718, | |
| "eval_samples_per_second": 86.918, | |
| "eval_steps_per_second": 5.644, | |
| "step": 839 | |
| }, | |
| { | |
| "epoch": 73.04, | |
| "learning_rate": 1.4717037025991483e-05, | |
| "loss": 0.0224, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.4465040862560272, | |
| "eval_runtime": 1.7516, | |
| "eval_samples_per_second": 87.92, | |
| "eval_steps_per_second": 5.709, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 74.78, | |
| "learning_rate": 1.1633998050326307e-05, | |
| "loss": 0.0045, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 74.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.6093705296516418, | |
| "eval_runtime": 1.8028, | |
| "eval_samples_per_second": 85.421, | |
| "eval_steps_per_second": 5.547, | |
| "step": 862 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.6924065947532654, | |
| "eval_runtime": 1.7779, | |
| "eval_samples_per_second": 86.617, | |
| "eval_steps_per_second": 5.624, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 76.52, | |
| "learning_rate": 7.848793092412702e-06, | |
| "loss": 0.0088, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 76.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.48120445013046265, | |
| "eval_runtime": 1.7635, | |
| "eval_samples_per_second": 87.327, | |
| "eval_steps_per_second": 5.671, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.6367415189743042, | |
| "eval_runtime": 1.7799, | |
| "eval_samples_per_second": 86.522, | |
| "eval_steps_per_second": 5.618, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 78.26, | |
| "learning_rate": 4.1666666666666686e-06, | |
| "loss": 0.0033, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 78.96, | |
| "eval_accuracy": 0.922077922077922, | |
| "eval_loss": 0.49070408940315247, | |
| "eval_runtime": 1.7676, | |
| "eval_samples_per_second": 87.125, | |
| "eval_steps_per_second": 5.657, | |
| "step": 908 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 1.3709349048921951e-06, | |
| "loss": 0.0076, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.9415584415584416, | |
| "eval_loss": 0.31148040294647217, | |
| "eval_runtime": 1.7496, | |
| "eval_samples_per_second": 88.021, | |
| "eval_steps_per_second": 5.716, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 80.96, | |
| "eval_accuracy": 0.948051948051948, | |
| "eval_loss": 0.2700955271720886, | |
| "eval_runtime": 1.7827, | |
| "eval_samples_per_second": 86.387, | |
| "eval_steps_per_second": 5.61, | |
| "step": 931 | |
| }, | |
| { | |
| "epoch": 81.74, | |
| "learning_rate": 5.634701881714148e-08, | |
| "loss": 0.0002, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_accuracy": 0.9545454545454546, | |
| "eval_loss": 0.2613329589366913, | |
| "eval_runtime": 1.7597, | |
| "eval_samples_per_second": 87.515, | |
| "eval_steps_per_second": 5.683, | |
| "step": 943 | |
| }, | |
| { | |
| "epoch": 82.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.40443289279937744, | |
| "eval_runtime": 1.7637, | |
| "eval_samples_per_second": 87.314, | |
| "eval_steps_per_second": 5.67, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 83.48, | |
| "learning_rate": 1.6164105173215904e-05, | |
| "loss": 0.0193, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.9613493084907532, | |
| "eval_runtime": 1.7854, | |
| "eval_samples_per_second": 86.255, | |
| "eval_steps_per_second": 5.601, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 84.96, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.6934040784835815, | |
| "eval_runtime": 1.7617, | |
| "eval_samples_per_second": 87.414, | |
| "eval_steps_per_second": 5.676, | |
| "step": 977 | |
| }, | |
| { | |
| "epoch": 85.22, | |
| "learning_rate": 1.4052013648906114e-05, | |
| "loss": 0.0238, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_accuracy": 0.8896103896103896, | |
| "eval_loss": 0.9348794221878052, | |
| "eval_runtime": 1.7915, | |
| "eval_samples_per_second": 85.962, | |
| "eval_steps_per_second": 5.582, | |
| "step": 989 | |
| }, | |
| { | |
| "epoch": 86.96, | |
| "learning_rate": 1.0723360272592418e-05, | |
| "loss": 0.011, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 86.96, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.8836289644241333, | |
| "eval_runtime": 1.8098, | |
| "eval_samples_per_second": 85.095, | |
| "eval_steps_per_second": 5.526, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.7403988838195801, | |
| "eval_runtime": 1.7608, | |
| "eval_samples_per_second": 87.459, | |
| "eval_steps_per_second": 5.679, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 88.7, | |
| "learning_rate": 6.886265186108914e-06, | |
| "loss": 0.018, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 88.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.5259799957275391, | |
| "eval_runtime": 1.7749, | |
| "eval_samples_per_second": 86.764, | |
| "eval_steps_per_second": 5.634, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.5202356576919556, | |
| "eval_runtime": 1.7868, | |
| "eval_samples_per_second": 86.187, | |
| "eval_steps_per_second": 5.597, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 90.43, | |
| "learning_rate": 3.3570117358101172e-06, | |
| "loss": 0.0041, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 90.96, | |
| "eval_accuracy": 0.922077922077922, | |
| "eval_loss": 0.48472902178764343, | |
| "eval_runtime": 1.7845, | |
| "eval_samples_per_second": 86.297, | |
| "eval_steps_per_second": 5.604, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.48678579926490784, | |
| "eval_runtime": 1.7317, | |
| "eval_samples_per_second": 88.93, | |
| "eval_steps_per_second": 5.775, | |
| "step": 1058 | |
| }, | |
| { | |
| "epoch": 92.17, | |
| "learning_rate": 8.863946639715635e-07, | |
| "loss": 0.001, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 92.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.4975196421146393, | |
| "eval_runtime": 1.7866, | |
| "eval_samples_per_second": 86.198, | |
| "eval_steps_per_second": 5.597, | |
| "step": 1069 | |
| }, | |
| { | |
| "epoch": 93.91, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0014, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_accuracy": 0.922077922077922, | |
| "eval_loss": 0.6255179643630981, | |
| "eval_runtime": 1.7777, | |
| "eval_samples_per_second": 86.631, | |
| "eval_steps_per_second": 5.625, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 94.96, | |
| "eval_accuracy": 0.8766233766233766, | |
| "eval_loss": 0.9968315362930298, | |
| "eval_runtime": 1.7619, | |
| "eval_samples_per_second": 87.408, | |
| "eval_steps_per_second": 5.676, | |
| "step": 1092 | |
| }, | |
| { | |
| "epoch": 95.65, | |
| "learning_rate": 1.5780272002695102e-05, | |
| "loss": 0.0165, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.6173205971717834, | |
| "eval_runtime": 1.8105, | |
| "eval_samples_per_second": 85.06, | |
| "eval_steps_per_second": 5.523, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 96.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.7537987232208252, | |
| "eval_runtime": 1.7727, | |
| "eval_samples_per_second": 86.873, | |
| "eval_steps_per_second": 5.641, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 97.39, | |
| "learning_rate": 1.3309654930856552e-05, | |
| "loss": 0.013, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_accuracy": 0.8766233766233766, | |
| "eval_loss": 0.7560279965400696, | |
| "eval_runtime": 1.7715, | |
| "eval_samples_per_second": 86.931, | |
| "eval_steps_per_second": 5.645, | |
| "step": 1127 | |
| }, | |
| { | |
| "epoch": 98.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.5807818174362183, | |
| "eval_runtime": 1.79, | |
| "eval_samples_per_second": 86.032, | |
| "eval_steps_per_second": 5.586, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 99.13, | |
| "learning_rate": 9.780401480557754e-06, | |
| "loss": 0.0237, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.5147651433944702, | |
| "eval_runtime": 1.7527, | |
| "eval_samples_per_second": 87.864, | |
| "eval_steps_per_second": 5.705, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 100.87, | |
| "learning_rate": 5.943306394074249e-06, | |
| "loss": 0.0061, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 100.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.5672047734260559, | |
| "eval_runtime": 1.7885, | |
| "eval_samples_per_second": 86.106, | |
| "eval_steps_per_second": 5.591, | |
| "step": 1161 | |
| }, | |
| { | |
| "epoch": 102.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.4343276619911194, | |
| "eval_runtime": 1.7759, | |
| "eval_samples_per_second": 86.714, | |
| "eval_steps_per_second": 5.631, | |
| "step": 1173 | |
| }, | |
| { | |
| "epoch": 102.61, | |
| "learning_rate": 2.6146530177605546e-06, | |
| "loss": 0.002, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 102.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.32392024993896484, | |
| "eval_runtime": 1.8027, | |
| "eval_samples_per_second": 85.425, | |
| "eval_steps_per_second": 5.547, | |
| "step": 1184 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.2951604127883911, | |
| "eval_runtime": 1.8006, | |
| "eval_samples_per_second": 85.525, | |
| "eval_steps_per_second": 5.554, | |
| "step": 1196 | |
| }, | |
| { | |
| "epoch": 104.35, | |
| "learning_rate": 5.025614934507641e-07, | |
| "loss": 0.0005, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 104.96, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.2926579713821411, | |
| "eval_runtime": 1.7666, | |
| "eval_samples_per_second": 87.174, | |
| "eval_steps_per_second": 5.661, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 106.0, | |
| "eval_accuracy": 0.9285714285714286, | |
| "eval_loss": 0.3511568605899811, | |
| "eval_runtime": 1.7794, | |
| "eval_samples_per_second": 86.546, | |
| "eval_steps_per_second": 5.62, | |
| "step": 1219 | |
| }, | |
| { | |
| "epoch": 106.09, | |
| "learning_rate": 1.6610319647849526e-05, | |
| "loss": 0.0003, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 106.96, | |
| "eval_accuracy": 0.922077922077922, | |
| "eval_loss": 0.4030219316482544, | |
| "eval_runtime": 1.7918, | |
| "eval_samples_per_second": 85.945, | |
| "eval_steps_per_second": 5.581, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 107.83, | |
| "learning_rate": 1.529573176177447e-05, | |
| "loss": 0.0023, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "eval_accuracy": 0.8896103896103896, | |
| "eval_loss": 0.3911021053791046, | |
| "eval_runtime": 1.7635, | |
| "eval_samples_per_second": 87.328, | |
| "eval_steps_per_second": 5.671, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 108.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.5156851410865784, | |
| "eval_runtime": 1.7917, | |
| "eval_samples_per_second": 85.953, | |
| "eval_steps_per_second": 5.581, | |
| "step": 1253 | |
| }, | |
| { | |
| "epoch": 109.57, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0114, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 110.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.5531629323959351, | |
| "eval_runtime": 1.7244, | |
| "eval_samples_per_second": 89.307, | |
| "eval_steps_per_second": 5.799, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 110.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.6447522640228271, | |
| "eval_runtime": 1.7304, | |
| "eval_samples_per_second": 88.997, | |
| "eval_steps_per_second": 5.779, | |
| "step": 1276 | |
| }, | |
| { | |
| "epoch": 111.3, | |
| "learning_rate": 8.817873574253966e-06, | |
| "loss": 0.0003, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.6866676807403564, | |
| "eval_runtime": 1.7494, | |
| "eval_samples_per_second": 88.031, | |
| "eval_steps_per_second": 5.716, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 112.96, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.7411206960678101, | |
| "eval_runtime": 1.7444, | |
| "eval_samples_per_second": 88.283, | |
| "eval_steps_per_second": 5.733, | |
| "step": 1299 | |
| }, | |
| { | |
| "epoch": 113.04, | |
| "learning_rate": 5.03266861634036e-06, | |
| "loss": 0.0153, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 114.0, | |
| "eval_accuracy": 0.8896103896103896, | |
| "eval_loss": 0.7488161325454712, | |
| "eval_runtime": 1.7644, | |
| "eval_samples_per_second": 87.282, | |
| "eval_steps_per_second": 5.668, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 114.78, | |
| "learning_rate": 1.9496296406751813e-06, | |
| "loss": 0.0039, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 114.96, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.6945769786834717, | |
| "eval_runtime": 1.7344, | |
| "eval_samples_per_second": 88.792, | |
| "eval_steps_per_second": 5.766, | |
| "step": 1322 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.7218338847160339, | |
| "eval_runtime": 1.7644, | |
| "eval_samples_per_second": 87.281, | |
| "eval_steps_per_second": 5.668, | |
| "step": 1334 | |
| }, | |
| { | |
| "epoch": 116.52, | |
| "learning_rate": 2.246260785014683e-07, | |
| "loss": 0.0002, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 116.96, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.7305352091789246, | |
| "eval_runtime": 1.7634, | |
| "eval_samples_per_second": 87.332, | |
| "eval_steps_per_second": 5.671, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 118.0, | |
| "eval_accuracy": 0.8701298701298701, | |
| "eval_loss": 1.0061231851577759, | |
| "eval_runtime": 1.7854, | |
| "eval_samples_per_second": 86.255, | |
| "eval_steps_per_second": 5.601, | |
| "step": 1357 | |
| }, | |
| { | |
| "epoch": 118.26, | |
| "learning_rate": 1.64420405881652e-05, | |
| "loss": 0.0066, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 118.96, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.5966177582740784, | |
| "eval_runtime": 1.7834, | |
| "eval_samples_per_second": 86.352, | |
| "eval_steps_per_second": 5.607, | |
| "step": 1368 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "learning_rate": 1.4717037025991483e-05, | |
| "loss": 0.0083, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "eval_accuracy": 0.8636363636363636, | |
| "eval_loss": 1.1087840795516968, | |
| "eval_runtime": 1.7834, | |
| "eval_samples_per_second": 86.352, | |
| "eval_steps_per_second": 5.607, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 120.96, | |
| "eval_accuracy": 0.8831168831168831, | |
| "eval_loss": 0.821342945098877, | |
| "eval_runtime": 1.7534, | |
| "eval_samples_per_second": 87.83, | |
| "eval_steps_per_second": 5.703, | |
| "step": 1391 | |
| }, | |
| { | |
| "epoch": 121.74, | |
| "learning_rate": 1.1633998050326307e-05, | |
| "loss": 0.0202, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 122.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.5226480960845947, | |
| "eval_runtime": 1.7954, | |
| "eval_samples_per_second": 85.775, | |
| "eval_steps_per_second": 5.57, | |
| "step": 1403 | |
| }, | |
| { | |
| "epoch": 122.96, | |
| "eval_accuracy": 0.922077922077922, | |
| "eval_loss": 0.44853323698043823, | |
| "eval_runtime": 1.7774, | |
| "eval_samples_per_second": 86.643, | |
| "eval_steps_per_second": 5.626, | |
| "step": 1414 | |
| }, | |
| { | |
| "epoch": 123.48, | |
| "learning_rate": 7.848793092412702e-06, | |
| "loss": 0.0033, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 124.0, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.7281427979469299, | |
| "eval_runtime": 1.8004, | |
| "eval_samples_per_second": 85.537, | |
| "eval_steps_per_second": 5.554, | |
| "step": 1426 | |
| }, | |
| { | |
| "epoch": 124.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.6655176281929016, | |
| "eval_runtime": 1.7664, | |
| "eval_samples_per_second": 87.183, | |
| "eval_steps_per_second": 5.661, | |
| "step": 1437 | |
| }, | |
| { | |
| "epoch": 125.22, | |
| "learning_rate": 4.1666666666666686e-06, | |
| "loss": 0.0185, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 126.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.5161400437355042, | |
| "eval_runtime": 1.7704, | |
| "eval_samples_per_second": 86.986, | |
| "eval_steps_per_second": 5.648, | |
| "step": 1449 | |
| }, | |
| { | |
| "epoch": 126.96, | |
| "learning_rate": 1.3709349048921951e-06, | |
| "loss": 0.0001, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 126.96, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.4859886169433594, | |
| "eval_runtime": 1.7874, | |
| "eval_samples_per_second": 86.159, | |
| "eval_steps_per_second": 5.595, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 128.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.4834165573120117, | |
| "eval_runtime": 1.7944, | |
| "eval_samples_per_second": 85.822, | |
| "eval_steps_per_second": 5.573, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 128.7, | |
| "learning_rate": 5.634701881714148e-08, | |
| "loss": 0.0047, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 128.96, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.48360273241996765, | |
| "eval_runtime": 1.7574, | |
| "eval_samples_per_second": 87.63, | |
| "eval_steps_per_second": 5.69, | |
| "step": 1483 | |
| }, | |
| { | |
| "epoch": 130.0, | |
| "eval_accuracy": 0.9155844155844156, | |
| "eval_loss": 0.6164301037788391, | |
| "eval_runtime": 1.7394, | |
| "eval_samples_per_second": 88.537, | |
| "eval_steps_per_second": 5.749, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 130.43, | |
| "learning_rate": 1.6164105173215904e-05, | |
| "loss": 0.011, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 130.96, | |
| "eval_accuracy": 0.8961038961038961, | |
| "eval_loss": 0.7818012237548828, | |
| "eval_runtime": 1.7684, | |
| "eval_samples_per_second": 87.085, | |
| "eval_steps_per_second": 5.655, | |
| "step": 1506 | |
| }, | |
| { | |
| "epoch": 132.0, | |
| "eval_accuracy": 0.8636363636363636, | |
| "eval_loss": 0.8022345900535583, | |
| "eval_runtime": 1.7544, | |
| "eval_samples_per_second": 87.779, | |
| "eval_steps_per_second": 5.7, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 132.17, | |
| "learning_rate": 1.4052013648906114e-05, | |
| "loss": 0.0023, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 132.96, | |
| "eval_accuracy": 0.8636363636363636, | |
| "eval_loss": 0.8653693199157715, | |
| "eval_runtime": 1.7604, | |
| "eval_samples_per_second": 87.48, | |
| "eval_steps_per_second": 5.681, | |
| "step": 1529 | |
| }, | |
| { | |
| "epoch": 133.91, | |
| "learning_rate": 1.0723360272592418e-05, | |
| "loss": 0.0222, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 134.0, | |
| "eval_accuracy": 0.8896103896103896, | |
| "eval_loss": 0.6707515716552734, | |
| "eval_runtime": 1.7434, | |
| "eval_samples_per_second": 88.334, | |
| "eval_steps_per_second": 5.736, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 134.96, | |
| "eval_accuracy": 0.935064935064935, | |
| "eval_loss": 0.4996984004974365, | |
| "eval_runtime": 1.7534, | |
| "eval_samples_per_second": 87.83, | |
| "eval_steps_per_second": 5.703, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 135.65, | |
| "learning_rate": 6.886265186108914e-06, | |
| "loss": 0.0126, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 136.0, | |
| "eval_accuracy": 0.922077922077922, | |
| "eval_loss": 0.5560286641120911, | |
| "eval_runtime": 1.7314, | |
| "eval_samples_per_second": 88.946, | |
| "eval_steps_per_second": 5.776, | |
| "step": 1564 | |
| }, | |
| { | |
| "epoch": 136.96, | |
| "eval_accuracy": 0.8961038961038961, | |
| "eval_loss": 0.6162758469581604, | |
| "eval_runtime": 1.7204, | |
| "eval_samples_per_second": 89.515, | |
| "eval_steps_per_second": 5.813, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 137.39, | |
| "learning_rate": 3.3570117358101172e-06, | |
| "loss": 0.014, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 138.0, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.6488694548606873, | |
| "eval_runtime": 1.7194, | |
| "eval_samples_per_second": 89.567, | |
| "eval_steps_per_second": 5.816, | |
| "step": 1587 | |
| }, | |
| { | |
| "epoch": 138.96, | |
| "eval_accuracy": 0.8701298701298701, | |
| "eval_loss": 0.684516191482544, | |
| "eval_runtime": 1.7304, | |
| "eval_samples_per_second": 88.997, | |
| "eval_steps_per_second": 5.779, | |
| "step": 1598 | |
| }, | |
| { | |
| "epoch": 139.13, | |
| "learning_rate": 8.863946639715635e-07, | |
| "loss": 0.0088, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "eval_accuracy": 0.8766233766233766, | |
| "eval_loss": 0.7022619247436523, | |
| "eval_runtime": 1.7374, | |
| "eval_samples_per_second": 88.639, | |
| "eval_steps_per_second": 5.756, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 140.87, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0022, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 140.96, | |
| "eval_accuracy": 0.8701298701298701, | |
| "eval_loss": 0.762221097946167, | |
| "eval_runtime": 1.6984, | |
| "eval_samples_per_second": 90.675, | |
| "eval_steps_per_second": 5.888, | |
| "step": 1621 | |
| }, | |
| { | |
| "epoch": 142.0, | |
| "eval_accuracy": 0.8961038961038961, | |
| "eval_loss": 0.6736029982566833, | |
| "eval_runtime": 1.7594, | |
| "eval_samples_per_second": 87.53, | |
| "eval_steps_per_second": 5.684, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 142.61, | |
| "learning_rate": 1.5780272002695102e-05, | |
| "loss": 0.0017, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 142.96, | |
| "eval_accuracy": 0.9090909090909091, | |
| "eval_loss": 0.5298991799354553, | |
| "eval_runtime": 1.6954, | |
| "eval_samples_per_second": 90.835, | |
| "eval_steps_per_second": 5.898, | |
| "step": 1644 | |
| }, | |
| { | |
| "epoch": 143.48, | |
| "eval_accuracy": 0.9025974025974026, | |
| "eval_loss": 0.5584802031517029, | |
| "eval_runtime": 1.6964, | |
| "eval_samples_per_second": 90.782, | |
| "eval_steps_per_second": 5.895, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 143.48, | |
| "step": 1650, | |
| "total_flos": 8.017005638819359e+18, | |
| "train_loss": 0.028458750352940775, | |
| "train_runtime": 2286.6106, | |
| "train_samples_per_second": 47.297, | |
| "train_steps_per_second": 0.722 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 1650, | |
| "num_train_epochs": 150, | |
| "save_steps": 500, | |
| "total_flos": 8.017005638819359e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |