| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 148, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.033783783783783786, | |
| "grad_norm": 4.081285861070036, | |
| "learning_rate": 3.125e-05, | |
| "loss": 1.343, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.06756756756756757, | |
| "grad_norm": 2.4767980475317306, | |
| "learning_rate": 4.9977343997179584e-05, | |
| "loss": 1.0898, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.10135135135135136, | |
| "grad_norm": 1.75418160634778, | |
| "learning_rate": 4.97229876633906e-05, | |
| "loss": 0.9965, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.13513513513513514, | |
| "grad_norm": 1.581608017901505, | |
| "learning_rate": 4.91891643656567e-05, | |
| "loss": 0.9496, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.16891891891891891, | |
| "grad_norm": 1.457452333355659, | |
| "learning_rate": 4.838258724167838e-05, | |
| "loss": 0.9404, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.20270270270270271, | |
| "grad_norm": 1.6831247945129768, | |
| "learning_rate": 4.731339946677661e-05, | |
| "loss": 0.8779, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.23648648648648649, | |
| "grad_norm": 1.538797370066566, | |
| "learning_rate": 4.599504669757798e-05, | |
| "loss": 0.87, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.2702702702702703, | |
| "grad_norm": 1.3334974862534241, | |
| "learning_rate": 4.444410798508125e-05, | |
| "loss": 0.8516, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.30405405405405406, | |
| "grad_norm": 1.282523609947544, | |
| "learning_rate": 4.268008728347168e-05, | |
| "loss": 0.8224, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.33783783783783783, | |
| "grad_norm": 1.3334225837945703, | |
| "learning_rate": 4.072516817658065e-05, | |
| "loss": 0.8235, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.3716216216216216, | |
| "grad_norm": 1.6171426981850356, | |
| "learning_rate": 3.860393490644781e-05, | |
| "loss": 0.8258, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.40540540540540543, | |
| "grad_norm": 1.211882443831182, | |
| "learning_rate": 3.634306321221328e-05, | |
| "loss": 0.8052, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.4391891891891892, | |
| "grad_norm": 1.2960251498072217, | |
| "learning_rate": 3.397098486722039e-05, | |
| "loss": 0.8011, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.47297297297297297, | |
| "grad_norm": 1.2312853794344398, | |
| "learning_rate": 3.1517530132969326e-05, | |
| "loss": 0.7848, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.5067567567567568, | |
| "grad_norm": 1.3787021580163523, | |
| "learning_rate": 2.9013552626270894e-05, | |
| "loss": 0.8097, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5405405405405406, | |
| "grad_norm": 1.1958489309242977, | |
| "learning_rate": 2.6490541317113427e-05, | |
| "loss": 0.7474, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.5743243243243243, | |
| "grad_norm": 1.2265018406929975, | |
| "learning_rate": 2.3980224536594803e-05, | |
| "loss": 0.7643, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.6081081081081081, | |
| "grad_norm": 1.2553476359659121, | |
| "learning_rate": 2.1514170974749814e-05, | |
| "loss": 0.752, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.6418918918918919, | |
| "grad_norm": 1.1960996698324164, | |
| "learning_rate": 1.9123392685956238e-05, | |
| "loss": 0.7377, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.6756756756756757, | |
| "grad_norm": 1.1286279557451986, | |
| "learning_rate": 1.6837955094357533e-05, | |
| "loss": 0.728, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7094594594594594, | |
| "grad_norm": 1.1422328938911805, | |
| "learning_rate": 1.468659890370983e-05, | |
| "loss": 0.7374, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.7432432432432432, | |
| "grad_norm": 1.1484788185826784, | |
| "learning_rate": 1.2696378666356468e-05, | |
| "loss": 0.7218, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.777027027027027, | |
| "grad_norm": 1.132142821163549, | |
| "learning_rate": 1.0892322556534839e-05, | |
| "loss": 0.7063, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.8108108108108109, | |
| "grad_norm": 1.073342872082975, | |
| "learning_rate": 9.297117626563687e-06, | |
| "loss": 0.7104, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8445945945945946, | |
| "grad_norm": 1.108976138426985, | |
| "learning_rate": 7.930824503996856e-06, | |
| "loss": 0.7268, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.8783783783783784, | |
| "grad_norm": 1.1335555277239915, | |
| "learning_rate": 6.810625117592363e-06, | |
| "loss": 0.7072, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.9121621621621622, | |
| "grad_norm": 1.020215002734917, | |
| "learning_rate": 5.950606624589065e-06, | |
| "loss": 0.7096, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.9459459459459459, | |
| "grad_norm": 1.028950150275939, | |
| "learning_rate": 5.361584256530833e-06, | |
| "loss": 0.693, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9797297297297297, | |
| "grad_norm": 1.071472930368092, | |
| "learning_rate": 5.050965311454739e-06, | |
| "loss": 0.6905, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 148, | |
| "total_flos": 161717585444864.0, | |
| "train_loss": 0.8149667881630562, | |
| "train_runtime": 2632.7258, | |
| "train_samples_per_second": 3.593, | |
| "train_steps_per_second": 0.056 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 148, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 161717585444864.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |