{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 99.66888519134775, "eval_steps": 1000, "global_step": 30000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.6622296173044924, "grad_norm": 3.129899024963379, "learning_rate": 0.00029939999999999996, "loss": 2.5636, "step": 500 }, { "epoch": 3.32279534109817, "grad_norm": 1.6870653629302979, "learning_rate": 0.0002949254237288135, "loss": 0.3803, "step": 1000 }, { "epoch": 3.32279534109817, "eval_loss": 0.3107321560382843, "eval_runtime": 5.3302, "eval_samples_per_second": 18.949, "eval_steps_per_second": 2.439, "eval_wer": 0.36236559139784946, "step": 1000 }, { "epoch": 4.985024958402662, "grad_norm": 4.024764537811279, "learning_rate": 0.00028984067796610164, "loss": 0.2315, "step": 1500 }, { "epoch": 6.64559068219634, "grad_norm": 2.152501106262207, "learning_rate": 0.0002847559322033898, "loss": 0.1663, "step": 2000 }, { "epoch": 6.64559068219634, "eval_loss": 0.2739505469799042, "eval_runtime": 5.2893, "eval_samples_per_second": 19.095, "eval_steps_per_second": 2.458, "eval_wer": 0.3096774193548387, "step": 2000 }, { "epoch": 8.306156405990016, "grad_norm": 1.6033236980438232, "learning_rate": 0.00027967118644067795, "loss": 0.1381, "step": 2500 }, { "epoch": 9.96838602329451, "grad_norm": 1.2841393947601318, "learning_rate": 0.00027458644067796607, "loss": 0.1206, "step": 3000 }, { "epoch": 9.96838602329451, "eval_loss": 0.24860869348049164, "eval_runtime": 5.3415, "eval_samples_per_second": 18.909, "eval_steps_per_second": 2.434, "eval_wer": 0.2903225806451613, "step": 3000 }, { "epoch": 11.628951747088186, "grad_norm": 1.436146855354309, "learning_rate": 0.0002695016949152542, "loss": 0.1053, "step": 3500 }, { "epoch": 13.289517470881863, "grad_norm": 0.8757719397544861, "learning_rate": 0.0002644169491525423, "loss": 0.0938, "step": 4000 }, { "epoch": 13.289517470881863, "eval_loss": 0.25879770517349243, "eval_runtime": 5.2643, "eval_samples_per_second": 19.186, "eval_steps_per_second": 2.469, "eval_wer": 0.28279569892473116, "step": 4000 }, { "epoch": 14.951747088186355, "grad_norm": 0.9832671284675598, "learning_rate": 0.0002593322033898305, "loss": 0.0872, "step": 4500 }, { "epoch": 16.612312811980033, "grad_norm": 0.9009350538253784, "learning_rate": 0.00025424745762711863, "loss": 0.0816, "step": 5000 }, { "epoch": 16.612312811980033, "eval_loss": 0.27693644165992737, "eval_runtime": 5.2922, "eval_samples_per_second": 19.085, "eval_steps_per_second": 2.456, "eval_wer": 0.278494623655914, "step": 5000 }, { "epoch": 18.27287853577371, "grad_norm": 1.4376908540725708, "learning_rate": 0.00024916271186440676, "loss": 0.0756, "step": 5500 }, { "epoch": 19.935108153078204, "grad_norm": 0.4128202795982361, "learning_rate": 0.0002440779661016949, "loss": 0.0689, "step": 6000 }, { "epoch": 19.935108153078204, "eval_loss": 0.24573881924152374, "eval_runtime": 5.2488, "eval_samples_per_second": 19.243, "eval_steps_per_second": 2.477, "eval_wer": 0.28817204301075267, "step": 6000 }, { "epoch": 21.59567387687188, "grad_norm": 0.9727014899253845, "learning_rate": 0.00023899322033898301, "loss": 0.0634, "step": 6500 }, { "epoch": 23.25623960066556, "grad_norm": 0.9723252058029175, "learning_rate": 0.00023390847457627117, "loss": 0.0642, "step": 7000 }, { "epoch": 23.25623960066556, "eval_loss": 0.26387491822242737, "eval_runtime": 5.2852, "eval_samples_per_second": 19.11, "eval_steps_per_second": 2.46, "eval_wer": 0.2913978494623656, "step": 7000 }, { "epoch": 24.91846921797005, "grad_norm": 0.8104386329650879, "learning_rate": 0.00022882372881355932, "loss": 0.0586, "step": 7500 }, { "epoch": 26.579034941763727, "grad_norm": 0.31280040740966797, "learning_rate": 0.00022373898305084742, "loss": 0.0566, "step": 8000 }, { "epoch": 26.579034941763727, "eval_loss": 0.2954213619232178, "eval_runtime": 5.2555, "eval_samples_per_second": 19.218, "eval_steps_per_second": 2.474, "eval_wer": 0.28279569892473116, "step": 8000 }, { "epoch": 28.239600665557404, "grad_norm": 0.31530410051345825, "learning_rate": 0.00021865423728813558, "loss": 0.0509, "step": 8500 }, { "epoch": 29.901830282861898, "grad_norm": 0.7447624802589417, "learning_rate": 0.0002135694915254237, "loss": 0.049, "step": 9000 }, { "epoch": 29.901830282861898, "eval_loss": 0.31719881296157837, "eval_runtime": 5.2417, "eval_samples_per_second": 19.268, "eval_steps_per_second": 2.48, "eval_wer": 0.2763440860215054, "step": 9000 }, { "epoch": 31.562396006655575, "grad_norm": 0.7232189774513245, "learning_rate": 0.00020848474576271186, "loss": 0.0464, "step": 9500 }, { "epoch": 33.22296173044925, "grad_norm": 0.7363786101341248, "learning_rate": 0.00020339999999999998, "loss": 0.0454, "step": 10000 }, { "epoch": 33.22296173044925, "eval_loss": 0.31861352920532227, "eval_runtime": 5.2622, "eval_samples_per_second": 19.194, "eval_steps_per_second": 2.47, "eval_wer": 0.28279569892473116, "step": 10000 }, { "epoch": 34.88519134775375, "grad_norm": 1.2368154525756836, "learning_rate": 0.0001983152542372881, "loss": 0.0419, "step": 10500 }, { "epoch": 36.54575707154742, "grad_norm": 0.20310941338539124, "learning_rate": 0.00019323050847457626, "loss": 0.0395, "step": 11000 }, { "epoch": 36.54575707154742, "eval_loss": 0.27824845910072327, "eval_runtime": 5.2266, "eval_samples_per_second": 19.324, "eval_steps_per_second": 2.487, "eval_wer": 0.2817204301075269, "step": 11000 }, { "epoch": 38.2063227953411, "grad_norm": 0.7990397214889526, "learning_rate": 0.0001881457627118644, "loss": 0.0379, "step": 11500 }, { "epoch": 39.86855241264559, "grad_norm": 1.0379022359848022, "learning_rate": 0.00018306101694915252, "loss": 0.0389, "step": 12000 }, { "epoch": 39.86855241264559, "eval_loss": 0.28572770953178406, "eval_runtime": 5.2471, "eval_samples_per_second": 19.249, "eval_steps_per_second": 2.478, "eval_wer": 0.28279569892473116, "step": 12000 }, { "epoch": 41.529118136439266, "grad_norm": 0.42599430680274963, "learning_rate": 0.00017797627118644067, "loss": 0.0338, "step": 12500 }, { "epoch": 43.18968386023295, "grad_norm": 0.8438450694084167, "learning_rate": 0.0001728915254237288, "loss": 0.0321, "step": 13000 }, { "epoch": 43.18968386023295, "eval_loss": 0.26923489570617676, "eval_runtime": 5.1894, "eval_samples_per_second": 19.463, "eval_steps_per_second": 2.505, "eval_wer": 0.25268817204301075, "step": 13000 }, { "epoch": 44.85191347753744, "grad_norm": 0.27244409918785095, "learning_rate": 0.00016780677966101695, "loss": 0.0307, "step": 13500 }, { "epoch": 46.51247920133112, "grad_norm": 0.5336557626724243, "learning_rate": 0.00016272203389830505, "loss": 0.0282, "step": 14000 }, { "epoch": 46.51247920133112, "eval_loss": 0.2570391595363617, "eval_runtime": 5.2068, "eval_samples_per_second": 19.398, "eval_steps_per_second": 2.497, "eval_wer": 0.25591397849462366, "step": 14000 }, { "epoch": 48.17304492512479, "grad_norm": 0.5201185941696167, "learning_rate": 0.0001576372881355932, "loss": 0.0276, "step": 14500 }, { "epoch": 49.83527454242928, "grad_norm": 0.42062297463417053, "learning_rate": 0.00015255254237288136, "loss": 0.0269, "step": 15000 }, { "epoch": 49.83527454242928, "eval_loss": 0.24461327493190765, "eval_runtime": 5.222, "eval_samples_per_second": 19.341, "eval_steps_per_second": 2.489, "eval_wer": 0.2623655913978495, "step": 15000 }, { "epoch": 51.49584026622296, "grad_norm": 1.2455600500106812, "learning_rate": 0.0001474677966101695, "loss": 0.0253, "step": 15500 }, { "epoch": 53.15640599001664, "grad_norm": 0.5616517066955566, "learning_rate": 0.00014238305084745761, "loss": 0.0233, "step": 16000 }, { "epoch": 53.15640599001664, "eval_loss": 0.23834320902824402, "eval_runtime": 5.2763, "eval_samples_per_second": 19.142, "eval_steps_per_second": 2.464, "eval_wer": 0.24731182795698925, "step": 16000 }, { "epoch": 54.818635607321134, "grad_norm": 0.6374333500862122, "learning_rate": 0.00013729830508474577, "loss": 0.023, "step": 16500 }, { "epoch": 56.47920133111481, "grad_norm": 0.599651575088501, "learning_rate": 0.0001322135593220339, "loss": 0.0224, "step": 17000 }, { "epoch": 56.47920133111481, "eval_loss": 0.28050878643989563, "eval_runtime": 5.2647, "eval_samples_per_second": 19.185, "eval_steps_per_second": 2.469, "eval_wer": 0.24731182795698925, "step": 17000 }, { "epoch": 58.13976705490849, "grad_norm": 0.5658329129219055, "learning_rate": 0.00012712881355932202, "loss": 0.0202, "step": 17500 }, { "epoch": 59.80199667221298, "grad_norm": 0.1811748892068863, "learning_rate": 0.00012204406779661016, "loss": 0.0198, "step": 18000 }, { "epoch": 59.80199667221298, "eval_loss": 0.25546789169311523, "eval_runtime": 5.2627, "eval_samples_per_second": 19.192, "eval_steps_per_second": 2.47, "eval_wer": 0.25161290322580643, "step": 18000 }, { "epoch": 61.46256239600665, "grad_norm": 0.3274936378002167, "learning_rate": 0.00011695932203389829, "loss": 0.0179, "step": 18500 }, { "epoch": 63.123128119800334, "grad_norm": 0.4713875353336334, "learning_rate": 0.00011187457627118644, "loss": 0.0159, "step": 19000 }, { "epoch": 63.123128119800334, "eval_loss": 0.20965830981731415, "eval_runtime": 5.2591, "eval_samples_per_second": 19.205, "eval_steps_per_second": 2.472, "eval_wer": 0.24086021505376345, "step": 19000 }, { "epoch": 64.78535773710483, "grad_norm": 0.07249698787927628, "learning_rate": 0.00010678983050847457, "loss": 0.0162, "step": 19500 }, { "epoch": 66.4459234608985, "grad_norm": 0.0907130241394043, "learning_rate": 0.00010170508474576271, "loss": 0.015, "step": 20000 }, { "epoch": 66.4459234608985, "eval_loss": 0.23673121631145477, "eval_runtime": 5.2393, "eval_samples_per_second": 19.277, "eval_steps_per_second": 2.481, "eval_wer": 0.25053763440860216, "step": 20000 }, { "epoch": 68.10648918469218, "grad_norm": 0.05233411118388176, "learning_rate": 9.662033898305084e-05, "loss": 0.0144, "step": 20500 }, { "epoch": 69.76871880199667, "grad_norm": 0.10925977677106857, "learning_rate": 9.153559322033896e-05, "loss": 0.015, "step": 21000 }, { "epoch": 69.76871880199667, "eval_loss": 0.24856378138065338, "eval_runtime": 5.3437, "eval_samples_per_second": 18.901, "eval_steps_per_second": 2.433, "eval_wer": 0.25268817204301075, "step": 21000 }, { "epoch": 71.42928452579035, "grad_norm": 0.267115980386734, "learning_rate": 8.64508474576271e-05, "loss": 0.0117, "step": 21500 }, { "epoch": 73.08985024958403, "grad_norm": 0.4500684440135956, "learning_rate": 8.136610169491526e-05, "loss": 0.0122, "step": 22000 }, { "epoch": 73.08985024958403, "eval_loss": 0.24751359224319458, "eval_runtime": 5.2943, "eval_samples_per_second": 19.077, "eval_steps_per_second": 2.455, "eval_wer": 0.25268817204301075, "step": 22000 }, { "epoch": 74.75207986688852, "grad_norm": 1.1715344190597534, "learning_rate": 7.628135593220339e-05, "loss": 0.0119, "step": 22500 }, { "epoch": 76.4126455906822, "grad_norm": 0.22268928587436676, "learning_rate": 7.119661016949153e-05, "loss": 0.0104, "step": 23000 }, { "epoch": 76.4126455906822, "eval_loss": 0.23766544461250305, "eval_runtime": 5.275, "eval_samples_per_second": 19.147, "eval_steps_per_second": 2.464, "eval_wer": 0.23440860215053763, "step": 23000 }, { "epoch": 78.07321131447587, "grad_norm": 0.3640448749065399, "learning_rate": 6.611186440677965e-05, "loss": 0.0097, "step": 23500 }, { "epoch": 79.73544093178036, "grad_norm": 0.13920682668685913, "learning_rate": 6.102711864406779e-05, "loss": 0.008, "step": 24000 }, { "epoch": 79.73544093178036, "eval_loss": 0.23628441989421844, "eval_runtime": 5.3789, "eval_samples_per_second": 18.777, "eval_steps_per_second": 2.417, "eval_wer": 0.24408602150537634, "step": 24000 }, { "epoch": 81.39600665557404, "grad_norm": 0.051646001636981964, "learning_rate": 5.594237288135593e-05, "loss": 0.0082, "step": 24500 }, { "epoch": 83.05657237936772, "grad_norm": 0.034305017441511154, "learning_rate": 5.085762711864406e-05, "loss": 0.0081, "step": 25000 }, { "epoch": 83.05657237936772, "eval_loss": 0.23471036553382874, "eval_runtime": 5.3686, "eval_samples_per_second": 18.813, "eval_steps_per_second": 2.422, "eval_wer": 0.23333333333333334, "step": 25000 }, { "epoch": 84.71880199667221, "grad_norm": 0.006502960808575153, "learning_rate": 4.57728813559322e-05, "loss": 0.0057, "step": 25500 }, { "epoch": 86.3793677204659, "grad_norm": 0.5881304144859314, "learning_rate": 4.0688135593220334e-05, "loss": 0.0072, "step": 26000 }, { "epoch": 86.3793677204659, "eval_loss": 0.22321127355098724, "eval_runtime": 5.3519, "eval_samples_per_second": 18.872, "eval_steps_per_second": 2.429, "eval_wer": 0.22903225806451613, "step": 26000 }, { "epoch": 88.03993344425957, "grad_norm": 0.2054450660943985, "learning_rate": 3.560338983050847e-05, "loss": 0.0063, "step": 26500 }, { "epoch": 89.70216306156406, "grad_norm": 0.25417467951774597, "learning_rate": 3.051864406779661e-05, "loss": 0.0064, "step": 27000 }, { "epoch": 89.70216306156406, "eval_loss": 0.22117015719413757, "eval_runtime": 5.3469, "eval_samples_per_second": 18.89, "eval_steps_per_second": 2.431, "eval_wer": 0.22795698924731184, "step": 27000 }, { "epoch": 91.36272878535773, "grad_norm": 0.26413634419441223, "learning_rate": 2.5433898305084745e-05, "loss": 0.0052, "step": 27500 }, { "epoch": 93.02329450915141, "grad_norm": 0.06111468747258186, "learning_rate": 2.034915254237288e-05, "loss": 0.0044, "step": 28000 }, { "epoch": 93.02329450915141, "eval_loss": 0.22874999046325684, "eval_runtime": 5.2229, "eval_samples_per_second": 19.338, "eval_steps_per_second": 2.489, "eval_wer": 0.22580645161290322, "step": 28000 }, { "epoch": 94.6855241264559, "grad_norm": 0.4941785931587219, "learning_rate": 1.5264406779661016e-05, "loss": 0.0041, "step": 28500 }, { "epoch": 96.34608985024958, "grad_norm": 0.19530624151229858, "learning_rate": 1.0179661016949151e-05, "loss": 0.004, "step": 29000 }, { "epoch": 96.34608985024958, "eval_loss": 0.22946567833423615, "eval_runtime": 5.3077, "eval_samples_per_second": 19.029, "eval_steps_per_second": 2.449, "eval_wer": 0.23440860215053763, "step": 29000 }, { "epoch": 98.00665557404326, "grad_norm": 0.19543957710266113, "learning_rate": 5.094915254237288e-06, "loss": 0.0042, "step": 29500 }, { "epoch": 99.66888519134775, "grad_norm": 0.6488747000694275, "learning_rate": 1.0169491525423728e-08, "loss": 0.0037, "step": 30000 }, { "epoch": 99.66888519134775, "eval_loss": 0.22431735694408417, "eval_runtime": 5.3048, "eval_samples_per_second": 19.039, "eval_steps_per_second": 2.451, "eval_wer": 0.22043010752688172, "step": 30000 }, { "epoch": 99.66888519134775, "step": 30000, "total_flos": 2.0486046325976072e+19, "train_loss": 0.08741244434913, "train_runtime": 29833.4209, "train_samples_per_second": 16.089, "train_steps_per_second": 1.006 } ], "logging_steps": 500, "max_steps": 30000, "num_input_tokens_seen": 0, "num_train_epochs": 100, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.0486046325976072e+19, "train_batch_size": 8, "trial_name": null, "trial_params": null }