| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 99.66888519134775, | |
| "eval_steps": 1000, | |
| "global_step": 30000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.6622296173044924, | |
| "grad_norm": 3.129899024963379, | |
| "learning_rate": 0.00029939999999999996, | |
| "loss": 2.5636, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.32279534109817, | |
| "grad_norm": 1.6870653629302979, | |
| "learning_rate": 0.0002949254237288135, | |
| "loss": 0.3803, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 3.32279534109817, | |
| "eval_loss": 0.3107321560382843, | |
| "eval_runtime": 5.3302, | |
| "eval_samples_per_second": 18.949, | |
| "eval_steps_per_second": 2.439, | |
| "eval_wer": 0.36236559139784946, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.985024958402662, | |
| "grad_norm": 4.024764537811279, | |
| "learning_rate": 0.00028984067796610164, | |
| "loss": 0.2315, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 6.64559068219634, | |
| "grad_norm": 2.152501106262207, | |
| "learning_rate": 0.0002847559322033898, | |
| "loss": 0.1663, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 6.64559068219634, | |
| "eval_loss": 0.2739505469799042, | |
| "eval_runtime": 5.2893, | |
| "eval_samples_per_second": 19.095, | |
| "eval_steps_per_second": 2.458, | |
| "eval_wer": 0.3096774193548387, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 8.306156405990016, | |
| "grad_norm": 1.6033236980438232, | |
| "learning_rate": 0.00027967118644067795, | |
| "loss": 0.1381, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 9.96838602329451, | |
| "grad_norm": 1.2841393947601318, | |
| "learning_rate": 0.00027458644067796607, | |
| "loss": 0.1206, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 9.96838602329451, | |
| "eval_loss": 0.24860869348049164, | |
| "eval_runtime": 5.3415, | |
| "eval_samples_per_second": 18.909, | |
| "eval_steps_per_second": 2.434, | |
| "eval_wer": 0.2903225806451613, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 11.628951747088186, | |
| "grad_norm": 1.436146855354309, | |
| "learning_rate": 0.0002695016949152542, | |
| "loss": 0.1053, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 13.289517470881863, | |
| "grad_norm": 0.8757719397544861, | |
| "learning_rate": 0.0002644169491525423, | |
| "loss": 0.0938, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 13.289517470881863, | |
| "eval_loss": 0.25879770517349243, | |
| "eval_runtime": 5.2643, | |
| "eval_samples_per_second": 19.186, | |
| "eval_steps_per_second": 2.469, | |
| "eval_wer": 0.28279569892473116, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 14.951747088186355, | |
| "grad_norm": 0.9832671284675598, | |
| "learning_rate": 0.0002593322033898305, | |
| "loss": 0.0872, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 16.612312811980033, | |
| "grad_norm": 0.9009350538253784, | |
| "learning_rate": 0.00025424745762711863, | |
| "loss": 0.0816, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 16.612312811980033, | |
| "eval_loss": 0.27693644165992737, | |
| "eval_runtime": 5.2922, | |
| "eval_samples_per_second": 19.085, | |
| "eval_steps_per_second": 2.456, | |
| "eval_wer": 0.278494623655914, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 18.27287853577371, | |
| "grad_norm": 1.4376908540725708, | |
| "learning_rate": 0.00024916271186440676, | |
| "loss": 0.0756, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 19.935108153078204, | |
| "grad_norm": 0.4128202795982361, | |
| "learning_rate": 0.0002440779661016949, | |
| "loss": 0.0689, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 19.935108153078204, | |
| "eval_loss": 0.24573881924152374, | |
| "eval_runtime": 5.2488, | |
| "eval_samples_per_second": 19.243, | |
| "eval_steps_per_second": 2.477, | |
| "eval_wer": 0.28817204301075267, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 21.59567387687188, | |
| "grad_norm": 0.9727014899253845, | |
| "learning_rate": 0.00023899322033898301, | |
| "loss": 0.0634, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 23.25623960066556, | |
| "grad_norm": 0.9723252058029175, | |
| "learning_rate": 0.00023390847457627117, | |
| "loss": 0.0642, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 23.25623960066556, | |
| "eval_loss": 0.26387491822242737, | |
| "eval_runtime": 5.2852, | |
| "eval_samples_per_second": 19.11, | |
| "eval_steps_per_second": 2.46, | |
| "eval_wer": 0.2913978494623656, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 24.91846921797005, | |
| "grad_norm": 0.8104386329650879, | |
| "learning_rate": 0.00022882372881355932, | |
| "loss": 0.0586, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 26.579034941763727, | |
| "grad_norm": 0.31280040740966797, | |
| "learning_rate": 0.00022373898305084742, | |
| "loss": 0.0566, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 26.579034941763727, | |
| "eval_loss": 0.2954213619232178, | |
| "eval_runtime": 5.2555, | |
| "eval_samples_per_second": 19.218, | |
| "eval_steps_per_second": 2.474, | |
| "eval_wer": 0.28279569892473116, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 28.239600665557404, | |
| "grad_norm": 0.31530410051345825, | |
| "learning_rate": 0.00021865423728813558, | |
| "loss": 0.0509, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 29.901830282861898, | |
| "grad_norm": 0.7447624802589417, | |
| "learning_rate": 0.0002135694915254237, | |
| "loss": 0.049, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 29.901830282861898, | |
| "eval_loss": 0.31719881296157837, | |
| "eval_runtime": 5.2417, | |
| "eval_samples_per_second": 19.268, | |
| "eval_steps_per_second": 2.48, | |
| "eval_wer": 0.2763440860215054, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 31.562396006655575, | |
| "grad_norm": 0.7232189774513245, | |
| "learning_rate": 0.00020848474576271186, | |
| "loss": 0.0464, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 33.22296173044925, | |
| "grad_norm": 0.7363786101341248, | |
| "learning_rate": 0.00020339999999999998, | |
| "loss": 0.0454, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 33.22296173044925, | |
| "eval_loss": 0.31861352920532227, | |
| "eval_runtime": 5.2622, | |
| "eval_samples_per_second": 19.194, | |
| "eval_steps_per_second": 2.47, | |
| "eval_wer": 0.28279569892473116, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 34.88519134775375, | |
| "grad_norm": 1.2368154525756836, | |
| "learning_rate": 0.0001983152542372881, | |
| "loss": 0.0419, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 36.54575707154742, | |
| "grad_norm": 0.20310941338539124, | |
| "learning_rate": 0.00019323050847457626, | |
| "loss": 0.0395, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 36.54575707154742, | |
| "eval_loss": 0.27824845910072327, | |
| "eval_runtime": 5.2266, | |
| "eval_samples_per_second": 19.324, | |
| "eval_steps_per_second": 2.487, | |
| "eval_wer": 0.2817204301075269, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 38.2063227953411, | |
| "grad_norm": 0.7990397214889526, | |
| "learning_rate": 0.0001881457627118644, | |
| "loss": 0.0379, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 39.86855241264559, | |
| "grad_norm": 1.0379022359848022, | |
| "learning_rate": 0.00018306101694915252, | |
| "loss": 0.0389, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 39.86855241264559, | |
| "eval_loss": 0.28572770953178406, | |
| "eval_runtime": 5.2471, | |
| "eval_samples_per_second": 19.249, | |
| "eval_steps_per_second": 2.478, | |
| "eval_wer": 0.28279569892473116, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 41.529118136439266, | |
| "grad_norm": 0.42599430680274963, | |
| "learning_rate": 0.00017797627118644067, | |
| "loss": 0.0338, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 43.18968386023295, | |
| "grad_norm": 0.8438450694084167, | |
| "learning_rate": 0.0001728915254237288, | |
| "loss": 0.0321, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 43.18968386023295, | |
| "eval_loss": 0.26923489570617676, | |
| "eval_runtime": 5.1894, | |
| "eval_samples_per_second": 19.463, | |
| "eval_steps_per_second": 2.505, | |
| "eval_wer": 0.25268817204301075, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 44.85191347753744, | |
| "grad_norm": 0.27244409918785095, | |
| "learning_rate": 0.00016780677966101695, | |
| "loss": 0.0307, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 46.51247920133112, | |
| "grad_norm": 0.5336557626724243, | |
| "learning_rate": 0.00016272203389830505, | |
| "loss": 0.0282, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 46.51247920133112, | |
| "eval_loss": 0.2570391595363617, | |
| "eval_runtime": 5.2068, | |
| "eval_samples_per_second": 19.398, | |
| "eval_steps_per_second": 2.497, | |
| "eval_wer": 0.25591397849462366, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 48.17304492512479, | |
| "grad_norm": 0.5201185941696167, | |
| "learning_rate": 0.0001576372881355932, | |
| "loss": 0.0276, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 49.83527454242928, | |
| "grad_norm": 0.42062297463417053, | |
| "learning_rate": 0.00015255254237288136, | |
| "loss": 0.0269, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 49.83527454242928, | |
| "eval_loss": 0.24461327493190765, | |
| "eval_runtime": 5.222, | |
| "eval_samples_per_second": 19.341, | |
| "eval_steps_per_second": 2.489, | |
| "eval_wer": 0.2623655913978495, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 51.49584026622296, | |
| "grad_norm": 1.2455600500106812, | |
| "learning_rate": 0.0001474677966101695, | |
| "loss": 0.0253, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 53.15640599001664, | |
| "grad_norm": 0.5616517066955566, | |
| "learning_rate": 0.00014238305084745761, | |
| "loss": 0.0233, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 53.15640599001664, | |
| "eval_loss": 0.23834320902824402, | |
| "eval_runtime": 5.2763, | |
| "eval_samples_per_second": 19.142, | |
| "eval_steps_per_second": 2.464, | |
| "eval_wer": 0.24731182795698925, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 54.818635607321134, | |
| "grad_norm": 0.6374333500862122, | |
| "learning_rate": 0.00013729830508474577, | |
| "loss": 0.023, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 56.47920133111481, | |
| "grad_norm": 0.599651575088501, | |
| "learning_rate": 0.0001322135593220339, | |
| "loss": 0.0224, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 56.47920133111481, | |
| "eval_loss": 0.28050878643989563, | |
| "eval_runtime": 5.2647, | |
| "eval_samples_per_second": 19.185, | |
| "eval_steps_per_second": 2.469, | |
| "eval_wer": 0.24731182795698925, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 58.13976705490849, | |
| "grad_norm": 0.5658329129219055, | |
| "learning_rate": 0.00012712881355932202, | |
| "loss": 0.0202, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 59.80199667221298, | |
| "grad_norm": 0.1811748892068863, | |
| "learning_rate": 0.00012204406779661016, | |
| "loss": 0.0198, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 59.80199667221298, | |
| "eval_loss": 0.25546789169311523, | |
| "eval_runtime": 5.2627, | |
| "eval_samples_per_second": 19.192, | |
| "eval_steps_per_second": 2.47, | |
| "eval_wer": 0.25161290322580643, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 61.46256239600665, | |
| "grad_norm": 0.3274936378002167, | |
| "learning_rate": 0.00011695932203389829, | |
| "loss": 0.0179, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 63.123128119800334, | |
| "grad_norm": 0.4713875353336334, | |
| "learning_rate": 0.00011187457627118644, | |
| "loss": 0.0159, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 63.123128119800334, | |
| "eval_loss": 0.20965830981731415, | |
| "eval_runtime": 5.2591, | |
| "eval_samples_per_second": 19.205, | |
| "eval_steps_per_second": 2.472, | |
| "eval_wer": 0.24086021505376345, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 64.78535773710483, | |
| "grad_norm": 0.07249698787927628, | |
| "learning_rate": 0.00010678983050847457, | |
| "loss": 0.0162, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 66.4459234608985, | |
| "grad_norm": 0.0907130241394043, | |
| "learning_rate": 0.00010170508474576271, | |
| "loss": 0.015, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 66.4459234608985, | |
| "eval_loss": 0.23673121631145477, | |
| "eval_runtime": 5.2393, | |
| "eval_samples_per_second": 19.277, | |
| "eval_steps_per_second": 2.481, | |
| "eval_wer": 0.25053763440860216, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 68.10648918469218, | |
| "grad_norm": 0.05233411118388176, | |
| "learning_rate": 9.662033898305084e-05, | |
| "loss": 0.0144, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 69.76871880199667, | |
| "grad_norm": 0.10925977677106857, | |
| "learning_rate": 9.153559322033896e-05, | |
| "loss": 0.015, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 69.76871880199667, | |
| "eval_loss": 0.24856378138065338, | |
| "eval_runtime": 5.3437, | |
| "eval_samples_per_second": 18.901, | |
| "eval_steps_per_second": 2.433, | |
| "eval_wer": 0.25268817204301075, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 71.42928452579035, | |
| "grad_norm": 0.267115980386734, | |
| "learning_rate": 8.64508474576271e-05, | |
| "loss": 0.0117, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 73.08985024958403, | |
| "grad_norm": 0.4500684440135956, | |
| "learning_rate": 8.136610169491526e-05, | |
| "loss": 0.0122, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 73.08985024958403, | |
| "eval_loss": 0.24751359224319458, | |
| "eval_runtime": 5.2943, | |
| "eval_samples_per_second": 19.077, | |
| "eval_steps_per_second": 2.455, | |
| "eval_wer": 0.25268817204301075, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 74.75207986688852, | |
| "grad_norm": 1.1715344190597534, | |
| "learning_rate": 7.628135593220339e-05, | |
| "loss": 0.0119, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 76.4126455906822, | |
| "grad_norm": 0.22268928587436676, | |
| "learning_rate": 7.119661016949153e-05, | |
| "loss": 0.0104, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 76.4126455906822, | |
| "eval_loss": 0.23766544461250305, | |
| "eval_runtime": 5.275, | |
| "eval_samples_per_second": 19.147, | |
| "eval_steps_per_second": 2.464, | |
| "eval_wer": 0.23440860215053763, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 78.07321131447587, | |
| "grad_norm": 0.3640448749065399, | |
| "learning_rate": 6.611186440677965e-05, | |
| "loss": 0.0097, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 79.73544093178036, | |
| "grad_norm": 0.13920682668685913, | |
| "learning_rate": 6.102711864406779e-05, | |
| "loss": 0.008, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 79.73544093178036, | |
| "eval_loss": 0.23628441989421844, | |
| "eval_runtime": 5.3789, | |
| "eval_samples_per_second": 18.777, | |
| "eval_steps_per_second": 2.417, | |
| "eval_wer": 0.24408602150537634, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 81.39600665557404, | |
| "grad_norm": 0.051646001636981964, | |
| "learning_rate": 5.594237288135593e-05, | |
| "loss": 0.0082, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 83.05657237936772, | |
| "grad_norm": 0.034305017441511154, | |
| "learning_rate": 5.085762711864406e-05, | |
| "loss": 0.0081, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 83.05657237936772, | |
| "eval_loss": 0.23471036553382874, | |
| "eval_runtime": 5.3686, | |
| "eval_samples_per_second": 18.813, | |
| "eval_steps_per_second": 2.422, | |
| "eval_wer": 0.23333333333333334, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 84.71880199667221, | |
| "grad_norm": 0.006502960808575153, | |
| "learning_rate": 4.57728813559322e-05, | |
| "loss": 0.0057, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 86.3793677204659, | |
| "grad_norm": 0.5881304144859314, | |
| "learning_rate": 4.0688135593220334e-05, | |
| "loss": 0.0072, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 86.3793677204659, | |
| "eval_loss": 0.22321127355098724, | |
| "eval_runtime": 5.3519, | |
| "eval_samples_per_second": 18.872, | |
| "eval_steps_per_second": 2.429, | |
| "eval_wer": 0.22903225806451613, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 88.03993344425957, | |
| "grad_norm": 0.2054450660943985, | |
| "learning_rate": 3.560338983050847e-05, | |
| "loss": 0.0063, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 89.70216306156406, | |
| "grad_norm": 0.25417467951774597, | |
| "learning_rate": 3.051864406779661e-05, | |
| "loss": 0.0064, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 89.70216306156406, | |
| "eval_loss": 0.22117015719413757, | |
| "eval_runtime": 5.3469, | |
| "eval_samples_per_second": 18.89, | |
| "eval_steps_per_second": 2.431, | |
| "eval_wer": 0.22795698924731184, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 91.36272878535773, | |
| "grad_norm": 0.26413634419441223, | |
| "learning_rate": 2.5433898305084745e-05, | |
| "loss": 0.0052, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 93.02329450915141, | |
| "grad_norm": 0.06111468747258186, | |
| "learning_rate": 2.034915254237288e-05, | |
| "loss": 0.0044, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 93.02329450915141, | |
| "eval_loss": 0.22874999046325684, | |
| "eval_runtime": 5.2229, | |
| "eval_samples_per_second": 19.338, | |
| "eval_steps_per_second": 2.489, | |
| "eval_wer": 0.22580645161290322, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 94.6855241264559, | |
| "grad_norm": 0.4941785931587219, | |
| "learning_rate": 1.5264406779661016e-05, | |
| "loss": 0.0041, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 96.34608985024958, | |
| "grad_norm": 0.19530624151229858, | |
| "learning_rate": 1.0179661016949151e-05, | |
| "loss": 0.004, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 96.34608985024958, | |
| "eval_loss": 0.22946567833423615, | |
| "eval_runtime": 5.3077, | |
| "eval_samples_per_second": 19.029, | |
| "eval_steps_per_second": 2.449, | |
| "eval_wer": 0.23440860215053763, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 98.00665557404326, | |
| "grad_norm": 0.19543957710266113, | |
| "learning_rate": 5.094915254237288e-06, | |
| "loss": 0.0042, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 99.66888519134775, | |
| "grad_norm": 0.6488747000694275, | |
| "learning_rate": 1.0169491525423728e-08, | |
| "loss": 0.0037, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 99.66888519134775, | |
| "eval_loss": 0.22431735694408417, | |
| "eval_runtime": 5.3048, | |
| "eval_samples_per_second": 19.039, | |
| "eval_steps_per_second": 2.451, | |
| "eval_wer": 0.22043010752688172, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 99.66888519134775, | |
| "step": 30000, | |
| "total_flos": 2.0486046325976072e+19, | |
| "train_loss": 0.08741244434913, | |
| "train_runtime": 29833.4209, | |
| "train_samples_per_second": 16.089, | |
| "train_steps_per_second": 1.006 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 30000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 100, | |
| "save_steps": 1000, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.0486046325976072e+19, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |