{ "best_global_step": 200, "best_metric": 0.2087530493736267, "best_model_checkpoint": "./adalora_weather_model/checkpoint-200", "epoch": 2.0411311053984575, "eval_steps": 20, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10282776349614396, "grad_norm": 3.2930359840393066, "learning_rate": 6.75e-05, "loss": 16.7516, "mean_token_accuracy": 0.5331788018345833, "num_tokens": 171254.0, "step": 10 }, { "epoch": 0.20565552699228792, "grad_norm": 5.34633731842041, "learning_rate": 0.0001425, "loss": 14.2345, "mean_token_accuracy": 0.5723872803151607, "num_tokens": 342816.0, "step": 20 }, { "epoch": 0.20565552699228792, "eval_loss": 1.4267879724502563, "eval_mean_token_accuracy": 0.6206505249708126, "eval_num_tokens": 342816.0, "eval_runtime": 103.2151, "eval_samples_per_second": 1.889, "eval_steps_per_second": 1.889, "step": 20 }, { "epoch": 0.30848329048843187, "grad_norm": 8.451922416687012, "learning_rate": 0.00021749999999999997, "loss": 8.4633, "mean_token_accuracy": 0.7056376278400421, "num_tokens": 514067.0, "step": 30 }, { "epoch": 0.41131105398457585, "grad_norm": 3.2435312271118164, "learning_rate": 0.00029249999999999995, "loss": 3.6174, "mean_token_accuracy": 0.8711350880563259, "num_tokens": 685570.0, "step": 40 }, { "epoch": 0.41131105398457585, "eval_loss": 0.34826213121414185, "eval_mean_token_accuracy": 0.8939384683584556, "eval_num_tokens": 685570.0, "eval_runtime": 103.283, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 40 }, { "epoch": 0.5141388174807198, "grad_norm": 3.4071648120880127, "learning_rate": 0.0002995163544683256, "loss": 2.9181, "mean_token_accuracy": 0.8942699111998081, "num_tokens": 856740.0, "step": 50 }, { "epoch": 0.6169665809768637, "grad_norm": 2.6412267684936523, "learning_rate": 0.00029784849709745616, "loss": 2.638, "mean_token_accuracy": 0.9004527874290943, "num_tokens": 1028000.0, "step": 60 }, { "epoch": 0.6169665809768637, "eval_loss": 0.29210129380226135, "eval_mean_token_accuracy": 0.9071287249907469, "eval_num_tokens": 1028000.0, "eval_runtime": 103.2512, "eval_samples_per_second": 1.889, "eval_steps_per_second": 1.889, "step": 60 }, { "epoch": 0.7197943444730077, "grad_norm": 10.460367202758789, "learning_rate": 0.0002950037303267096, "loss": 2.2428, "mean_token_accuracy": 0.9117808744311333, "num_tokens": 1199650.0, "step": 70 }, { "epoch": 0.8226221079691517, "grad_norm": 5.442368984222412, "learning_rate": 0.0002910046991800035, "loss": 2.0227, "mean_token_accuracy": 0.9168093383312226, "num_tokens": 1370524.0, "step": 80 }, { "epoch": 0.8226221079691517, "eval_loss": 0.2537098526954651, "eval_mean_token_accuracy": 0.9172265719144772, "eval_num_tokens": 1370524.0, "eval_runtime": 103.2432, "eval_samples_per_second": 1.889, "eval_steps_per_second": 1.889, "step": 80 }, { "epoch": 0.9254498714652957, "grad_norm": 2.3143043518066406, "learning_rate": 0.00028588323690176954, "loss": 1.9486, "mean_token_accuracy": 0.9203169830143452, "num_tokens": 1542159.0, "step": 90 }, { "epoch": 1.0205655526992288, "grad_norm": 2.387840986251831, "learning_rate": 0.0002796801115567139, "loss": 1.7171, "mean_token_accuracy": 0.9238405316262632, "num_tokens": 1700574.0, "step": 100 }, { "epoch": 1.0205655526992288, "eval_loss": 0.237007275223732, "eval_mean_token_accuracy": 0.9215406671548501, "eval_num_tokens": 1700574.0, "eval_runtime": 102.9604, "eval_samples_per_second": 1.894, "eval_steps_per_second": 1.894, "step": 100 }, { "epoch": 1.1233933161953726, "grad_norm": 2.3169972896575928, "learning_rate": 0.0002724447015062708, "loss": 1.7776, "mean_token_accuracy": 0.925829317420721, "num_tokens": 1871783.0, "step": 110 }, { "epoch": 1.2262210796915167, "grad_norm": 2.366626262664795, "learning_rate": 0.0002642346023450357, "loss": 1.7638, "mean_token_accuracy": 0.9251113034784794, "num_tokens": 2043203.0, "step": 120 }, { "epoch": 1.2262210796915167, "eval_loss": 0.2297067493200302, "eval_mean_token_accuracy": 0.9240646191132375, "eval_num_tokens": 2043203.0, "eval_runtime": 103.0662, "eval_samples_per_second": 1.892, "eval_steps_per_second": 1.892, "step": 120 }, { "epoch": 1.3290488431876606, "grad_norm": 2.324875593185425, "learning_rate": 0.0002551151684260553, "loss": 1.7129, "mean_token_accuracy": 0.9276402719318867, "num_tokens": 2214867.0, "step": 130 }, { "epoch": 1.4318766066838047, "grad_norm": 2.4916014671325684, "learning_rate": 0.0002451589926245468, "loss": 1.6328, "mean_token_accuracy": 0.9298155799508094, "num_tokens": 2385981.0, "step": 140 }, { "epoch": 1.4318766066838047, "eval_loss": 0.22466857731342316, "eval_mean_token_accuracy": 0.9257748848352677, "eval_num_tokens": 2385981.0, "eval_runtime": 103.2959, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 140 }, { "epoch": 1.5347043701799485, "grad_norm": 2.331782341003418, "learning_rate": 0.00023444532848124715, "loss": 1.6382, "mean_token_accuracy": 0.9296720393002034, "num_tokens": 2557432.0, "step": 150 }, { "epoch": 1.6375321336760926, "grad_norm": 2.2701163291931152, "learning_rate": 0.00022305945932527308, "loss": 1.6396, "mean_token_accuracy": 0.9298155024647713, "num_tokens": 2729083.0, "step": 160 }, { "epoch": 1.6375321336760926, "eval_loss": 0.21823178231716156, "eval_mean_token_accuracy": 0.92809411745805, "eval_num_tokens": 2729083.0, "eval_runtime": 103.3054, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 160 }, { "epoch": 1.7403598971722365, "grad_norm": 2.184347629547119, "learning_rate": 0.0002110920193984228, "loss": 1.667, "mean_token_accuracy": 0.928074149042368, "num_tokens": 2900445.0, "step": 170 }, { "epoch": 1.8431876606683804, "grad_norm": 2.0277137756347656, "learning_rate": 0.00019863827238493308, "loss": 1.5743, "mean_token_accuracy": 0.9325967490673065, "num_tokens": 3072258.0, "step": 180 }, { "epoch": 1.8431876606683804, "eval_loss": 0.2095421850681305, "eval_mean_token_accuracy": 0.9297601647866078, "eval_num_tokens": 3072258.0, "eval_runtime": 103.4026, "eval_samples_per_second": 1.886, "eval_steps_per_second": 1.886, "step": 180 }, { "epoch": 1.9460154241645244, "grad_norm": 2.0166707038879395, "learning_rate": 0.00018579735308976727, "loss": 1.5818, "mean_token_accuracy": 0.9324821837246418, "num_tokens": 3242706.0, "step": 190 }, { "epoch": 2.0411311053984575, "grad_norm": 2.155334949493408, "learning_rate": 0.00017267147830185608, "loss": 1.4363, "mean_token_accuracy": 0.9325833642804945, "num_tokens": 3401061.0, "step": 200 }, { "epoch": 2.0411311053984575, "eval_loss": 0.2087530493736267, "eval_mean_token_accuracy": 0.9303424829091781, "eval_num_tokens": 3401061.0, "eval_runtime": 103.3012, "eval_samples_per_second": 1.888, "eval_steps_per_second": 1.888, "step": 200 } ], "logging_steps": 10, "max_steps": 392, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 40, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.5822989364992717e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }