| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.025437201907790145, | |
| "eval_steps": 50, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0001271860095389507, | |
| "eval_loss": 1.2489664554595947, | |
| "eval_runtime": 59.8471, | |
| "eval_samples_per_second": 55.324, | |
| "eval_steps_per_second": 27.67, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.0012718600953895071, | |
| "grad_norm": 2.501387119293213, | |
| "learning_rate": 0.00019967573081342103, | |
| "loss": 4.8851, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.0025437201907790143, | |
| "grad_norm": 2.09289813041687, | |
| "learning_rate": 0.0001970941817426052, | |
| "loss": 3.8905, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.0038155802861685214, | |
| "grad_norm": 1.562943935394287, | |
| "learning_rate": 0.00019199794436588243, | |
| "loss": 3.4798, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.005087440381558029, | |
| "grad_norm": 1.8731768131256104, | |
| "learning_rate": 0.0001845190085543795, | |
| "loss": 3.2003, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.006359300476947536, | |
| "grad_norm": 1.9606801271438599, | |
| "learning_rate": 0.00017485107481711012, | |
| "loss": 3.1954, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.006359300476947536, | |
| "eval_loss": 0.7762054800987244, | |
| "eval_runtime": 58.3454, | |
| "eval_samples_per_second": 56.748, | |
| "eval_steps_per_second": 28.383, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.007631160572337043, | |
| "grad_norm": 1.9391154050827026, | |
| "learning_rate": 0.00016324453755953773, | |
| "loss": 3.1999, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.00890302066772655, | |
| "grad_norm": 2.0354959964752197, | |
| "learning_rate": 0.00015000000000000001, | |
| "loss": 3.117, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.010174880763116057, | |
| "grad_norm": 1.97865629196167, | |
| "learning_rate": 0.00013546048870425356, | |
| "loss": 2.9997, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.011446740858505564, | |
| "grad_norm": 2.2046570777893066, | |
| "learning_rate": 0.00012000256937760445, | |
| "loss": 3.1402, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.012718600953895072, | |
| "grad_norm": 2.166163444519043, | |
| "learning_rate": 0.00010402659401094152, | |
| "loss": 2.9453, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.012718600953895072, | |
| "eval_loss": 0.7418835759162903, | |
| "eval_runtime": 58.4925, | |
| "eval_samples_per_second": 56.606, | |
| "eval_steps_per_second": 28.311, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.013990461049284579, | |
| "grad_norm": 2.079094171524048, | |
| "learning_rate": 8.79463319744677e-05, | |
| "loss": 2.9943, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.015262321144674086, | |
| "grad_norm": 2.177687406539917, | |
| "learning_rate": 7.217825360835473e-05, | |
| "loss": 3.009, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.016534181240063592, | |
| "grad_norm": 2.1857900619506836, | |
| "learning_rate": 5.713074385969457e-05, | |
| "loss": 3.069, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.0178060413354531, | |
| "grad_norm": 2.200139045715332, | |
| "learning_rate": 4.3193525326884435e-05, | |
| "loss": 2.9511, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.019077901430842606, | |
| "grad_norm": 2.269382953643799, | |
| "learning_rate": 3.072756464904006e-05, | |
| "loss": 2.9891, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.019077901430842606, | |
| "eval_loss": 0.7284408807754517, | |
| "eval_runtime": 58.5829, | |
| "eval_samples_per_second": 56.518, | |
| "eval_steps_per_second": 28.268, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.020349761526232114, | |
| "grad_norm": 2.212369918823242, | |
| "learning_rate": 2.0055723659649904e-05, | |
| "loss": 2.9499, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.021621621621621623, | |
| "grad_norm": 2.320812940597534, | |
| "learning_rate": 1.1454397434679021e-05, | |
| "loss": 2.9569, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.022893481717011128, | |
| "grad_norm": 2.3446998596191406, | |
| "learning_rate": 5.146355805285452e-06, | |
| "loss": 3.0559, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.024165341812400636, | |
| "grad_norm": 2.276193380355835, | |
| "learning_rate": 1.2949737362087156e-06, | |
| "loss": 3.0383, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.025437201907790145, | |
| "grad_norm": 2.3682680130004883, | |
| "learning_rate": 0.0, | |
| "loss": 2.9477, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.025437201907790145, | |
| "eval_loss": 0.7256292700767517, | |
| "eval_runtime": 58.5463, | |
| "eval_samples_per_second": 56.554, | |
| "eval_steps_per_second": 28.285, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 200, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 50, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.1943156564099072e+16, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |