{ "best_global_step": 600, "best_metric": 0.5363820680333409, "best_model_checkpoint": "./whisper-small-sw/checkpoint-600", "epoch": 2.0, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.05, "grad_norm": 4503124.5, "learning_rate": 4.800000000000001e-07, "loss": 4.1558, "step": 25 }, { "epoch": 0.1, "grad_norm": 1575086.625, "learning_rate": 9.800000000000001e-07, "loss": 3.3557, "step": 50 }, { "epoch": 0.15, "grad_norm": 1165318.0, "learning_rate": 1.48e-06, "loss": 2.4313, "step": 75 }, { "epoch": 0.2, "grad_norm": 1055806.75, "learning_rate": 1.98e-06, "loss": 1.9578, "step": 100 }, { "epoch": 0.2, "eval_loss": 1.8956851959228516, "eval_runtime": 969.757, "eval_samples_per_second": 2.062, "eval_steps_per_second": 0.172, "eval_wer": 0.8697341743635955, "step": 100 }, { "epoch": 0.25, "grad_norm": 1095035.375, "learning_rate": 2.4800000000000004e-06, "loss": 1.8414, "step": 125 }, { "epoch": 0.3, "grad_norm": 1035096.4375, "learning_rate": 2.9800000000000003e-06, "loss": 1.6974, "step": 150 }, { "epoch": 0.35, "grad_norm": 1070272.75, "learning_rate": 3.48e-06, "loss": 1.621, "step": 175 }, { "epoch": 0.4, "grad_norm": 1132629.5, "learning_rate": 3.980000000000001e-06, "loss": 1.4927, "step": 200 }, { "epoch": 0.4, "eval_loss": 1.4295355081558228, "eval_runtime": 924.4171, "eval_samples_per_second": 2.164, "eval_steps_per_second": 0.181, "eval_wer": 0.6874296012615454, "step": 200 }, { "epoch": 0.45, "grad_norm": 960689.375, "learning_rate": 4.48e-06, "loss": 1.3545, "step": 225 }, { "epoch": 0.5, "grad_norm": 920681.5, "learning_rate": 4.980000000000001e-06, "loss": 1.2992, "step": 250 }, { "epoch": 0.55, "grad_norm": 765348.0625, "learning_rate": 5.480000000000001e-06, "loss": 1.2236, "step": 275 }, { "epoch": 0.6, "grad_norm": 810740.3125, "learning_rate": 5.98e-06, "loss": 1.1184, "step": 300 }, { "epoch": 0.6, "eval_loss": 1.0826019048690796, "eval_runtime": 1007.0686, "eval_samples_per_second": 1.986, "eval_steps_per_second": 0.166, "eval_wer": 0.6485131786438387, "step": 300 }, { "epoch": 0.65, "grad_norm": 945555.75, "learning_rate": 6.480000000000001e-06, "loss": 1.0226, "step": 325 }, { "epoch": 0.7, "grad_norm": 629258.5625, "learning_rate": 6.98e-06, "loss": 0.9098, "step": 350 }, { "epoch": 0.75, "grad_norm": 720113.9375, "learning_rate": 7.48e-06, "loss": 0.848, "step": 375 }, { "epoch": 0.8, "grad_norm": 727505.875, "learning_rate": 7.980000000000002e-06, "loss": 0.7788, "step": 400 }, { "epoch": 0.8, "eval_loss": 0.7836961150169373, "eval_runtime": 1040.1837, "eval_samples_per_second": 1.923, "eval_steps_per_second": 0.161, "eval_wer": 0.6084703762108583, "step": 400 }, { "epoch": 0.85, "grad_norm": 745182.125, "learning_rate": 8.48e-06, "loss": 0.7841, "step": 425 }, { "epoch": 0.9, "grad_norm": 624584.875, "learning_rate": 8.98e-06, "loss": 0.7609, "step": 450 }, { "epoch": 0.95, "grad_norm": 604703.5625, "learning_rate": 9.48e-06, "loss": 0.7249, "step": 475 }, { "epoch": 1.0, "grad_norm": 658865.0, "learning_rate": 9.980000000000001e-06, "loss": 0.6966, "step": 500 }, { "epoch": 1.0, "eval_loss": 0.7045486569404602, "eval_runtime": 1097.0835, "eval_samples_per_second": 1.823, "eval_steps_per_second": 0.152, "eval_wer": 0.688387024104528, "step": 500 }, { "epoch": 1.05, "grad_norm": 449478.71875, "learning_rate": 9.52e-06, "loss": 0.5287, "step": 525 }, { "epoch": 1.1, "grad_norm": 490002.71875, "learning_rate": 9.020000000000002e-06, "loss": 0.5736, "step": 550 }, { "epoch": 1.15, "grad_norm": 611781.625, "learning_rate": 8.52e-06, "loss": 0.5479, "step": 575 }, { "epoch": 1.2, "grad_norm": 440316.34375, "learning_rate": 8.020000000000001e-06, "loss": 0.5242, "step": 600 }, { "epoch": 1.2, "eval_loss": 0.6567466855049133, "eval_runtime": 1001.8554, "eval_samples_per_second": 1.996, "eval_steps_per_second": 0.167, "eval_wer": 0.5363820680333409, "step": 600 }, { "epoch": 1.25, "grad_norm": 633716.0, "learning_rate": 7.520000000000001e-06, "loss": 0.5501, "step": 625 }, { "epoch": 1.3, "grad_norm": 482948.25, "learning_rate": 7.0200000000000006e-06, "loss": 0.5189, "step": 650 }, { "epoch": 1.35, "grad_norm": 623572.875, "learning_rate": 6.520000000000001e-06, "loss": 0.5392, "step": 675 }, { "epoch": 1.4, "grad_norm": 651482.125, "learning_rate": 6.02e-06, "loss": 0.506, "step": 700 }, { "epoch": 1.4, "eval_loss": 0.6255258321762085, "eval_runtime": 1163.0764, "eval_samples_per_second": 1.72, "eval_steps_per_second": 0.144, "eval_wer": 0.6922730344672223, "step": 700 }, { "epoch": 1.45, "grad_norm": 462773.625, "learning_rate": 5.5200000000000005e-06, "loss": 0.4503, "step": 725 }, { "epoch": 1.5, "grad_norm": 498840.125, "learning_rate": 5.02e-06, "loss": 0.4676, "step": 750 }, { "epoch": 1.55, "grad_norm": 528649.8125, "learning_rate": 4.520000000000001e-06, "loss": 0.4561, "step": 775 }, { "epoch": 1.6, "grad_norm": 418120.5625, "learning_rate": 4.0200000000000005e-06, "loss": 0.4064, "step": 800 }, { "epoch": 1.6, "eval_loss": 0.6007899641990662, "eval_runtime": 1235.1962, "eval_samples_per_second": 1.619, "eval_steps_per_second": 0.135, "eval_wer": 0.7322031989186754, "step": 800 }, { "epoch": 1.65, "grad_norm": 597861.5, "learning_rate": 3.52e-06, "loss": 0.4127, "step": 825 }, { "epoch": 1.7, "grad_norm": 377506.28125, "learning_rate": 3.0200000000000003e-06, "loss": 0.3911, "step": 850 }, { "epoch": 1.75, "grad_norm": 406077.25, "learning_rate": 2.52e-06, "loss": 0.3828, "step": 875 }, { "epoch": 1.8, "grad_norm": 495271.09375, "learning_rate": 2.02e-06, "loss": 0.3368, "step": 900 }, { "epoch": 1.8, "eval_loss": 0.5886157155036926, "eval_runtime": 1612.2956, "eval_samples_per_second": 1.24, "eval_steps_per_second": 0.104, "eval_wer": 0.9467785537283172, "step": 900 }, { "epoch": 1.85, "grad_norm": 552985.125, "learning_rate": 1.52e-06, "loss": 0.3455, "step": 925 }, { "epoch": 1.9, "grad_norm": 366518.8125, "learning_rate": 1.02e-06, "loss": 0.3354, "step": 950 }, { "epoch": 1.95, "grad_norm": 405846.40625, "learning_rate": 5.2e-07, "loss": 0.3129, "step": 975 }, { "epoch": 2.0, "grad_norm": 414410.40625, "learning_rate": 2e-08, "loss": 0.2974, "step": 1000 }, { "epoch": 2.0, "eval_loss": 0.5830898880958557, "eval_runtime": 1396.4972, "eval_samples_per_second": 1.432, "eval_steps_per_second": 0.12, "eval_wer": 0.7279229556206352, "step": 1000 } ], "logging_steps": 25, "max_steps": 1000, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.61736640512e+18, "train_batch_size": 16, "trial_name": null, "trial_params": null }