wavlm-salt-eng / trainer_state.json
ajikadev's picture
End of training
4bbe7a2 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 99.66888519134775,
"eval_steps": 1000,
"global_step": 30000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 1.6622296173044924,
"grad_norm": 3.129899024963379,
"learning_rate": 0.00029939999999999996,
"loss": 2.5636,
"step": 500
},
{
"epoch": 3.32279534109817,
"grad_norm": 1.6870653629302979,
"learning_rate": 0.0002949254237288135,
"loss": 0.3803,
"step": 1000
},
{
"epoch": 3.32279534109817,
"eval_loss": 0.3107321560382843,
"eval_runtime": 5.3302,
"eval_samples_per_second": 18.949,
"eval_steps_per_second": 2.439,
"eval_wer": 0.36236559139784946,
"step": 1000
},
{
"epoch": 4.985024958402662,
"grad_norm": 4.024764537811279,
"learning_rate": 0.00028984067796610164,
"loss": 0.2315,
"step": 1500
},
{
"epoch": 6.64559068219634,
"grad_norm": 2.152501106262207,
"learning_rate": 0.0002847559322033898,
"loss": 0.1663,
"step": 2000
},
{
"epoch": 6.64559068219634,
"eval_loss": 0.2739505469799042,
"eval_runtime": 5.2893,
"eval_samples_per_second": 19.095,
"eval_steps_per_second": 2.458,
"eval_wer": 0.3096774193548387,
"step": 2000
},
{
"epoch": 8.306156405990016,
"grad_norm": 1.6033236980438232,
"learning_rate": 0.00027967118644067795,
"loss": 0.1381,
"step": 2500
},
{
"epoch": 9.96838602329451,
"grad_norm": 1.2841393947601318,
"learning_rate": 0.00027458644067796607,
"loss": 0.1206,
"step": 3000
},
{
"epoch": 9.96838602329451,
"eval_loss": 0.24860869348049164,
"eval_runtime": 5.3415,
"eval_samples_per_second": 18.909,
"eval_steps_per_second": 2.434,
"eval_wer": 0.2903225806451613,
"step": 3000
},
{
"epoch": 11.628951747088186,
"grad_norm": 1.436146855354309,
"learning_rate": 0.0002695016949152542,
"loss": 0.1053,
"step": 3500
},
{
"epoch": 13.289517470881863,
"grad_norm": 0.8757719397544861,
"learning_rate": 0.0002644169491525423,
"loss": 0.0938,
"step": 4000
},
{
"epoch": 13.289517470881863,
"eval_loss": 0.25879770517349243,
"eval_runtime": 5.2643,
"eval_samples_per_second": 19.186,
"eval_steps_per_second": 2.469,
"eval_wer": 0.28279569892473116,
"step": 4000
},
{
"epoch": 14.951747088186355,
"grad_norm": 0.9832671284675598,
"learning_rate": 0.0002593322033898305,
"loss": 0.0872,
"step": 4500
},
{
"epoch": 16.612312811980033,
"grad_norm": 0.9009350538253784,
"learning_rate": 0.00025424745762711863,
"loss": 0.0816,
"step": 5000
},
{
"epoch": 16.612312811980033,
"eval_loss": 0.27693644165992737,
"eval_runtime": 5.2922,
"eval_samples_per_second": 19.085,
"eval_steps_per_second": 2.456,
"eval_wer": 0.278494623655914,
"step": 5000
},
{
"epoch": 18.27287853577371,
"grad_norm": 1.4376908540725708,
"learning_rate": 0.00024916271186440676,
"loss": 0.0756,
"step": 5500
},
{
"epoch": 19.935108153078204,
"grad_norm": 0.4128202795982361,
"learning_rate": 0.0002440779661016949,
"loss": 0.0689,
"step": 6000
},
{
"epoch": 19.935108153078204,
"eval_loss": 0.24573881924152374,
"eval_runtime": 5.2488,
"eval_samples_per_second": 19.243,
"eval_steps_per_second": 2.477,
"eval_wer": 0.28817204301075267,
"step": 6000
},
{
"epoch": 21.59567387687188,
"grad_norm": 0.9727014899253845,
"learning_rate": 0.00023899322033898301,
"loss": 0.0634,
"step": 6500
},
{
"epoch": 23.25623960066556,
"grad_norm": 0.9723252058029175,
"learning_rate": 0.00023390847457627117,
"loss": 0.0642,
"step": 7000
},
{
"epoch": 23.25623960066556,
"eval_loss": 0.26387491822242737,
"eval_runtime": 5.2852,
"eval_samples_per_second": 19.11,
"eval_steps_per_second": 2.46,
"eval_wer": 0.2913978494623656,
"step": 7000
},
{
"epoch": 24.91846921797005,
"grad_norm": 0.8104386329650879,
"learning_rate": 0.00022882372881355932,
"loss": 0.0586,
"step": 7500
},
{
"epoch": 26.579034941763727,
"grad_norm": 0.31280040740966797,
"learning_rate": 0.00022373898305084742,
"loss": 0.0566,
"step": 8000
},
{
"epoch": 26.579034941763727,
"eval_loss": 0.2954213619232178,
"eval_runtime": 5.2555,
"eval_samples_per_second": 19.218,
"eval_steps_per_second": 2.474,
"eval_wer": 0.28279569892473116,
"step": 8000
},
{
"epoch": 28.239600665557404,
"grad_norm": 0.31530410051345825,
"learning_rate": 0.00021865423728813558,
"loss": 0.0509,
"step": 8500
},
{
"epoch": 29.901830282861898,
"grad_norm": 0.7447624802589417,
"learning_rate": 0.0002135694915254237,
"loss": 0.049,
"step": 9000
},
{
"epoch": 29.901830282861898,
"eval_loss": 0.31719881296157837,
"eval_runtime": 5.2417,
"eval_samples_per_second": 19.268,
"eval_steps_per_second": 2.48,
"eval_wer": 0.2763440860215054,
"step": 9000
},
{
"epoch": 31.562396006655575,
"grad_norm": 0.7232189774513245,
"learning_rate": 0.00020848474576271186,
"loss": 0.0464,
"step": 9500
},
{
"epoch": 33.22296173044925,
"grad_norm": 0.7363786101341248,
"learning_rate": 0.00020339999999999998,
"loss": 0.0454,
"step": 10000
},
{
"epoch": 33.22296173044925,
"eval_loss": 0.31861352920532227,
"eval_runtime": 5.2622,
"eval_samples_per_second": 19.194,
"eval_steps_per_second": 2.47,
"eval_wer": 0.28279569892473116,
"step": 10000
},
{
"epoch": 34.88519134775375,
"grad_norm": 1.2368154525756836,
"learning_rate": 0.0001983152542372881,
"loss": 0.0419,
"step": 10500
},
{
"epoch": 36.54575707154742,
"grad_norm": 0.20310941338539124,
"learning_rate": 0.00019323050847457626,
"loss": 0.0395,
"step": 11000
},
{
"epoch": 36.54575707154742,
"eval_loss": 0.27824845910072327,
"eval_runtime": 5.2266,
"eval_samples_per_second": 19.324,
"eval_steps_per_second": 2.487,
"eval_wer": 0.2817204301075269,
"step": 11000
},
{
"epoch": 38.2063227953411,
"grad_norm": 0.7990397214889526,
"learning_rate": 0.0001881457627118644,
"loss": 0.0379,
"step": 11500
},
{
"epoch": 39.86855241264559,
"grad_norm": 1.0379022359848022,
"learning_rate": 0.00018306101694915252,
"loss": 0.0389,
"step": 12000
},
{
"epoch": 39.86855241264559,
"eval_loss": 0.28572770953178406,
"eval_runtime": 5.2471,
"eval_samples_per_second": 19.249,
"eval_steps_per_second": 2.478,
"eval_wer": 0.28279569892473116,
"step": 12000
},
{
"epoch": 41.529118136439266,
"grad_norm": 0.42599430680274963,
"learning_rate": 0.00017797627118644067,
"loss": 0.0338,
"step": 12500
},
{
"epoch": 43.18968386023295,
"grad_norm": 0.8438450694084167,
"learning_rate": 0.0001728915254237288,
"loss": 0.0321,
"step": 13000
},
{
"epoch": 43.18968386023295,
"eval_loss": 0.26923489570617676,
"eval_runtime": 5.1894,
"eval_samples_per_second": 19.463,
"eval_steps_per_second": 2.505,
"eval_wer": 0.25268817204301075,
"step": 13000
},
{
"epoch": 44.85191347753744,
"grad_norm": 0.27244409918785095,
"learning_rate": 0.00016780677966101695,
"loss": 0.0307,
"step": 13500
},
{
"epoch": 46.51247920133112,
"grad_norm": 0.5336557626724243,
"learning_rate": 0.00016272203389830505,
"loss": 0.0282,
"step": 14000
},
{
"epoch": 46.51247920133112,
"eval_loss": 0.2570391595363617,
"eval_runtime": 5.2068,
"eval_samples_per_second": 19.398,
"eval_steps_per_second": 2.497,
"eval_wer": 0.25591397849462366,
"step": 14000
},
{
"epoch": 48.17304492512479,
"grad_norm": 0.5201185941696167,
"learning_rate": 0.0001576372881355932,
"loss": 0.0276,
"step": 14500
},
{
"epoch": 49.83527454242928,
"grad_norm": 0.42062297463417053,
"learning_rate": 0.00015255254237288136,
"loss": 0.0269,
"step": 15000
},
{
"epoch": 49.83527454242928,
"eval_loss": 0.24461327493190765,
"eval_runtime": 5.222,
"eval_samples_per_second": 19.341,
"eval_steps_per_second": 2.489,
"eval_wer": 0.2623655913978495,
"step": 15000
},
{
"epoch": 51.49584026622296,
"grad_norm": 1.2455600500106812,
"learning_rate": 0.0001474677966101695,
"loss": 0.0253,
"step": 15500
},
{
"epoch": 53.15640599001664,
"grad_norm": 0.5616517066955566,
"learning_rate": 0.00014238305084745761,
"loss": 0.0233,
"step": 16000
},
{
"epoch": 53.15640599001664,
"eval_loss": 0.23834320902824402,
"eval_runtime": 5.2763,
"eval_samples_per_second": 19.142,
"eval_steps_per_second": 2.464,
"eval_wer": 0.24731182795698925,
"step": 16000
},
{
"epoch": 54.818635607321134,
"grad_norm": 0.6374333500862122,
"learning_rate": 0.00013729830508474577,
"loss": 0.023,
"step": 16500
},
{
"epoch": 56.47920133111481,
"grad_norm": 0.599651575088501,
"learning_rate": 0.0001322135593220339,
"loss": 0.0224,
"step": 17000
},
{
"epoch": 56.47920133111481,
"eval_loss": 0.28050878643989563,
"eval_runtime": 5.2647,
"eval_samples_per_second": 19.185,
"eval_steps_per_second": 2.469,
"eval_wer": 0.24731182795698925,
"step": 17000
},
{
"epoch": 58.13976705490849,
"grad_norm": 0.5658329129219055,
"learning_rate": 0.00012712881355932202,
"loss": 0.0202,
"step": 17500
},
{
"epoch": 59.80199667221298,
"grad_norm": 0.1811748892068863,
"learning_rate": 0.00012204406779661016,
"loss": 0.0198,
"step": 18000
},
{
"epoch": 59.80199667221298,
"eval_loss": 0.25546789169311523,
"eval_runtime": 5.2627,
"eval_samples_per_second": 19.192,
"eval_steps_per_second": 2.47,
"eval_wer": 0.25161290322580643,
"step": 18000
},
{
"epoch": 61.46256239600665,
"grad_norm": 0.3274936378002167,
"learning_rate": 0.00011695932203389829,
"loss": 0.0179,
"step": 18500
},
{
"epoch": 63.123128119800334,
"grad_norm": 0.4713875353336334,
"learning_rate": 0.00011187457627118644,
"loss": 0.0159,
"step": 19000
},
{
"epoch": 63.123128119800334,
"eval_loss": 0.20965830981731415,
"eval_runtime": 5.2591,
"eval_samples_per_second": 19.205,
"eval_steps_per_second": 2.472,
"eval_wer": 0.24086021505376345,
"step": 19000
},
{
"epoch": 64.78535773710483,
"grad_norm": 0.07249698787927628,
"learning_rate": 0.00010678983050847457,
"loss": 0.0162,
"step": 19500
},
{
"epoch": 66.4459234608985,
"grad_norm": 0.0907130241394043,
"learning_rate": 0.00010170508474576271,
"loss": 0.015,
"step": 20000
},
{
"epoch": 66.4459234608985,
"eval_loss": 0.23673121631145477,
"eval_runtime": 5.2393,
"eval_samples_per_second": 19.277,
"eval_steps_per_second": 2.481,
"eval_wer": 0.25053763440860216,
"step": 20000
},
{
"epoch": 68.10648918469218,
"grad_norm": 0.05233411118388176,
"learning_rate": 9.662033898305084e-05,
"loss": 0.0144,
"step": 20500
},
{
"epoch": 69.76871880199667,
"grad_norm": 0.10925977677106857,
"learning_rate": 9.153559322033896e-05,
"loss": 0.015,
"step": 21000
},
{
"epoch": 69.76871880199667,
"eval_loss": 0.24856378138065338,
"eval_runtime": 5.3437,
"eval_samples_per_second": 18.901,
"eval_steps_per_second": 2.433,
"eval_wer": 0.25268817204301075,
"step": 21000
},
{
"epoch": 71.42928452579035,
"grad_norm": 0.267115980386734,
"learning_rate": 8.64508474576271e-05,
"loss": 0.0117,
"step": 21500
},
{
"epoch": 73.08985024958403,
"grad_norm": 0.4500684440135956,
"learning_rate": 8.136610169491526e-05,
"loss": 0.0122,
"step": 22000
},
{
"epoch": 73.08985024958403,
"eval_loss": 0.24751359224319458,
"eval_runtime": 5.2943,
"eval_samples_per_second": 19.077,
"eval_steps_per_second": 2.455,
"eval_wer": 0.25268817204301075,
"step": 22000
},
{
"epoch": 74.75207986688852,
"grad_norm": 1.1715344190597534,
"learning_rate": 7.628135593220339e-05,
"loss": 0.0119,
"step": 22500
},
{
"epoch": 76.4126455906822,
"grad_norm": 0.22268928587436676,
"learning_rate": 7.119661016949153e-05,
"loss": 0.0104,
"step": 23000
},
{
"epoch": 76.4126455906822,
"eval_loss": 0.23766544461250305,
"eval_runtime": 5.275,
"eval_samples_per_second": 19.147,
"eval_steps_per_second": 2.464,
"eval_wer": 0.23440860215053763,
"step": 23000
},
{
"epoch": 78.07321131447587,
"grad_norm": 0.3640448749065399,
"learning_rate": 6.611186440677965e-05,
"loss": 0.0097,
"step": 23500
},
{
"epoch": 79.73544093178036,
"grad_norm": 0.13920682668685913,
"learning_rate": 6.102711864406779e-05,
"loss": 0.008,
"step": 24000
},
{
"epoch": 79.73544093178036,
"eval_loss": 0.23628441989421844,
"eval_runtime": 5.3789,
"eval_samples_per_second": 18.777,
"eval_steps_per_second": 2.417,
"eval_wer": 0.24408602150537634,
"step": 24000
},
{
"epoch": 81.39600665557404,
"grad_norm": 0.051646001636981964,
"learning_rate": 5.594237288135593e-05,
"loss": 0.0082,
"step": 24500
},
{
"epoch": 83.05657237936772,
"grad_norm": 0.034305017441511154,
"learning_rate": 5.085762711864406e-05,
"loss": 0.0081,
"step": 25000
},
{
"epoch": 83.05657237936772,
"eval_loss": 0.23471036553382874,
"eval_runtime": 5.3686,
"eval_samples_per_second": 18.813,
"eval_steps_per_second": 2.422,
"eval_wer": 0.23333333333333334,
"step": 25000
},
{
"epoch": 84.71880199667221,
"grad_norm": 0.006502960808575153,
"learning_rate": 4.57728813559322e-05,
"loss": 0.0057,
"step": 25500
},
{
"epoch": 86.3793677204659,
"grad_norm": 0.5881304144859314,
"learning_rate": 4.0688135593220334e-05,
"loss": 0.0072,
"step": 26000
},
{
"epoch": 86.3793677204659,
"eval_loss": 0.22321127355098724,
"eval_runtime": 5.3519,
"eval_samples_per_second": 18.872,
"eval_steps_per_second": 2.429,
"eval_wer": 0.22903225806451613,
"step": 26000
},
{
"epoch": 88.03993344425957,
"grad_norm": 0.2054450660943985,
"learning_rate": 3.560338983050847e-05,
"loss": 0.0063,
"step": 26500
},
{
"epoch": 89.70216306156406,
"grad_norm": 0.25417467951774597,
"learning_rate": 3.051864406779661e-05,
"loss": 0.0064,
"step": 27000
},
{
"epoch": 89.70216306156406,
"eval_loss": 0.22117015719413757,
"eval_runtime": 5.3469,
"eval_samples_per_second": 18.89,
"eval_steps_per_second": 2.431,
"eval_wer": 0.22795698924731184,
"step": 27000
},
{
"epoch": 91.36272878535773,
"grad_norm": 0.26413634419441223,
"learning_rate": 2.5433898305084745e-05,
"loss": 0.0052,
"step": 27500
},
{
"epoch": 93.02329450915141,
"grad_norm": 0.06111468747258186,
"learning_rate": 2.034915254237288e-05,
"loss": 0.0044,
"step": 28000
},
{
"epoch": 93.02329450915141,
"eval_loss": 0.22874999046325684,
"eval_runtime": 5.2229,
"eval_samples_per_second": 19.338,
"eval_steps_per_second": 2.489,
"eval_wer": 0.22580645161290322,
"step": 28000
},
{
"epoch": 94.6855241264559,
"grad_norm": 0.4941785931587219,
"learning_rate": 1.5264406779661016e-05,
"loss": 0.0041,
"step": 28500
},
{
"epoch": 96.34608985024958,
"grad_norm": 0.19530624151229858,
"learning_rate": 1.0179661016949151e-05,
"loss": 0.004,
"step": 29000
},
{
"epoch": 96.34608985024958,
"eval_loss": 0.22946567833423615,
"eval_runtime": 5.3077,
"eval_samples_per_second": 19.029,
"eval_steps_per_second": 2.449,
"eval_wer": 0.23440860215053763,
"step": 29000
},
{
"epoch": 98.00665557404326,
"grad_norm": 0.19543957710266113,
"learning_rate": 5.094915254237288e-06,
"loss": 0.0042,
"step": 29500
},
{
"epoch": 99.66888519134775,
"grad_norm": 0.6488747000694275,
"learning_rate": 1.0169491525423728e-08,
"loss": 0.0037,
"step": 30000
},
{
"epoch": 99.66888519134775,
"eval_loss": 0.22431735694408417,
"eval_runtime": 5.3048,
"eval_samples_per_second": 19.039,
"eval_steps_per_second": 2.451,
"eval_wer": 0.22043010752688172,
"step": 30000
},
{
"epoch": 99.66888519134775,
"step": 30000,
"total_flos": 2.0486046325976072e+19,
"train_loss": 0.08741244434913,
"train_runtime": 29833.4209,
"train_samples_per_second": 16.089,
"train_steps_per_second": 1.006
}
],
"logging_steps": 500,
"max_steps": 30000,
"num_input_tokens_seen": 0,
"num_train_epochs": 100,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.0486046325976072e+19,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}