{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 46, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.021739130434782608, "grad_norm": 1.1839052438735962, "learning_rate": 0.0, "loss": 2.5329, "step": 1 }, { "epoch": 0.043478260869565216, "grad_norm": 1.286145567893982, "learning_rate": 8e-05, "loss": 2.4878, "step": 2 }, { "epoch": 0.06521739130434782, "grad_norm": 1.2978639602661133, "learning_rate": 0.00016, "loss": 2.2947, "step": 3 }, { "epoch": 0.08695652173913043, "grad_norm": 1.4251940250396729, "learning_rate": 0.00024, "loss": 1.8883, "step": 4 }, { "epoch": 0.10869565217391304, "grad_norm": 1.7767548561096191, "learning_rate": 0.00032, "loss": 1.4114, "step": 5 }, { "epoch": 0.13043478260869565, "grad_norm": 2.304410696029663, "learning_rate": 0.0004, "loss": 0.9961, "step": 6 }, { "epoch": 0.15217391304347827, "grad_norm": 3.0359408855438232, "learning_rate": 0.00039941316023674807, "loss": 0.826, "step": 7 }, { "epoch": 0.17391304347826086, "grad_norm": 1.336562156677246, "learning_rate": 0.00039765608475606976, "loss": 0.8054, "step": 8 }, { "epoch": 0.1956521739130435, "grad_norm": 0.5087499618530273, "learning_rate": 0.00039473908477555584, "loss": 0.626, "step": 9 }, { "epoch": 0.21739130434782608, "grad_norm": 0.47382721304893494, "learning_rate": 0.0003906792784109861, "loss": 0.5957, "step": 10 }, { "epoch": 0.2391304347826087, "grad_norm": 0.8307927250862122, "learning_rate": 0.000385500490220419, "loss": 0.6326, "step": 11 }, { "epoch": 0.2608695652173913, "grad_norm": 0.7591253519058228, "learning_rate": 0.00037923311139221114, "loss": 0.5715, "step": 12 }, { "epoch": 0.2826086956521739, "grad_norm": 0.4196276068687439, "learning_rate": 0.00037191392139744025, "loss": 0.519, "step": 13 }, { "epoch": 0.30434782608695654, "grad_norm": 0.3430580496788025, "learning_rate": 0.00036358587215334356, "loss": 0.5277, "step": 14 }, { "epoch": 0.32608695652173914, "grad_norm": 0.39747822284698486, "learning_rate": 0.0003542978359643886, "loss": 0.5513, "step": 15 }, { "epoch": 0.34782608695652173, "grad_norm": 0.2966645359992981, "learning_rate": 0.0003441043187201574, "loss": 0.4966, "step": 16 }, { "epoch": 0.3695652173913043, "grad_norm": 0.3044649660587311, "learning_rate": 0.00033306514003311305, "loss": 0.5843, "step": 17 }, { "epoch": 0.391304347826087, "grad_norm": 0.29440030455589294, "learning_rate": 0.0003212450821933276, "loss": 0.5514, "step": 18 }, { "epoch": 0.41304347826086957, "grad_norm": 0.25709202885627747, "learning_rate": 0.00030871351000024425, "loss": 0.5554, "step": 19 }, { "epoch": 0.43478260869565216, "grad_norm": 0.2896120548248291, "learning_rate": 0.0002955439637024526, "loss": 0.5084, "step": 20 }, { "epoch": 0.45652173913043476, "grad_norm": 0.31083154678344727, "learning_rate": 0.00028181372743426805, "loss": 0.5945, "step": 21 }, { "epoch": 0.4782608695652174, "grad_norm": 0.2456878423690796, "learning_rate": 0.00026760337568170053, "loss": 0.413, "step": 22 }, { "epoch": 0.5, "grad_norm": 0.23984560370445251, "learning_rate": 0.0002529963004393324, "loss": 0.551, "step": 23 }, { "epoch": 0.5217391304347826, "grad_norm": 0.29197460412979126, "learning_rate": 0.0002380782218329337, "loss": 0.4758, "step": 24 }, { "epoch": 0.5434782608695652, "grad_norm": 0.24348799884319305, "learning_rate": 0.00022293668507968013, "loss": 0.4229, "step": 25 }, { "epoch": 0.5652173913043478, "grad_norm": 0.259126216173172, "learning_rate": 0.0002076605467380071, "loss": 0.5218, "step": 26 }, { "epoch": 0.5869565217391305, "grad_norm": 0.2982611060142517, "learning_rate": 0.00019233945326199294, "loss": 0.5754, "step": 27 }, { "epoch": 0.6086956521739131, "grad_norm": 0.27066999673843384, "learning_rate": 0.00017706331492031995, "loss": 0.4559, "step": 28 }, { "epoch": 0.6304347826086957, "grad_norm": 0.22570188343524933, "learning_rate": 0.0001619217781670663, "loss": 0.4743, "step": 29 }, { "epoch": 0.6521739130434783, "grad_norm": 0.23569968342781067, "learning_rate": 0.0001470036995606677, "loss": 0.478, "step": 30 }, { "epoch": 0.6739130434782609, "grad_norm": 0.29840192198753357, "learning_rate": 0.0001323966243182995, "loss": 0.4915, "step": 31 }, { "epoch": 0.6956521739130435, "grad_norm": 0.22925806045532227, "learning_rate": 0.00011818627256573203, "loss": 0.4438, "step": 32 }, { "epoch": 0.717391304347826, "grad_norm": 0.23691901564598083, "learning_rate": 0.00010445603629754738, "loss": 0.4096, "step": 33 }, { "epoch": 0.7391304347826086, "grad_norm": 0.2998674213886261, "learning_rate": 9.12864899997558e-05, "loss": 0.5025, "step": 34 }, { "epoch": 0.7608695652173914, "grad_norm": 0.21318762004375458, "learning_rate": 7.875491780667246e-05, "loss": 0.424, "step": 35 }, { "epoch": 0.782608695652174, "grad_norm": 0.2458743005990982, "learning_rate": 6.693485996688696e-05, "loss": 0.505, "step": 36 }, { "epoch": 0.8043478260869565, "grad_norm": 0.22771987318992615, "learning_rate": 5.589568127984262e-05, "loss": 0.4847, "step": 37 }, { "epoch": 0.8260869565217391, "grad_norm": 0.23360995948314667, "learning_rate": 4.5702164035611406e-05, "loss": 0.5909, "step": 38 }, { "epoch": 0.8478260869565217, "grad_norm": 0.23350945115089417, "learning_rate": 3.641412784665647e-05, "loss": 0.4595, "step": 39 }, { "epoch": 0.8695652173913043, "grad_norm": 0.24031572043895721, "learning_rate": 2.8086078602559806e-05, "loss": 0.531, "step": 40 }, { "epoch": 0.8913043478260869, "grad_norm": 0.21330903470516205, "learning_rate": 2.0766888607788904e-05, "loss": 0.4063, "step": 41 }, { "epoch": 0.9130434782608695, "grad_norm": 0.24683713912963867, "learning_rate": 1.4499509779581078e-05, "loss": 0.5228, "step": 42 }, { "epoch": 0.9347826086956522, "grad_norm": 0.21736790239810944, "learning_rate": 9.320721589013892e-06, "loss": 0.5032, "step": 43 }, { "epoch": 0.9565217391304348, "grad_norm": 0.2305678129196167, "learning_rate": 5.260915224444207e-06, "loss": 0.4663, "step": 44 }, { "epoch": 0.9782608695652174, "grad_norm": 0.23209130764007568, "learning_rate": 2.343915243930317e-06, "loss": 0.504, "step": 45 }, { "epoch": 1.0, "grad_norm": 0.23025843501091003, "learning_rate": 5.868397632519118e-07, "loss": 0.3984, "step": 46 } ], "logging_steps": 1, "max_steps": 46, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.779203528306688e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }