| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 289, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03460207612456748, | |
| "grad_norm": 18.75143814086914, | |
| "learning_rate": 9.653979238754326e-06, | |
| "loss": 0.5271, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06920415224913495, | |
| "grad_norm": 15.585368156433105, | |
| "learning_rate": 9.307958477508652e-06, | |
| "loss": 0.2558, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.10380622837370242, | |
| "grad_norm": 6.447887420654297, | |
| "learning_rate": 8.961937716262975e-06, | |
| "loss": 0.1715, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.1384083044982699, | |
| "grad_norm": 10.45551872253418, | |
| "learning_rate": 8.615916955017302e-06, | |
| "loss": 0.1847, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.17301038062283736, | |
| "grad_norm": 5.732431888580322, | |
| "learning_rate": 8.269896193771627e-06, | |
| "loss": 0.0818, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.20761245674740483, | |
| "grad_norm": 6.817206382751465, | |
| "learning_rate": 7.923875432525952e-06, | |
| "loss": 0.0961, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2422145328719723, | |
| "grad_norm": 12.547711372375488, | |
| "learning_rate": 7.577854671280277e-06, | |
| "loss": 0.2854, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.2768166089965398, | |
| "grad_norm": 5.625271797180176, | |
| "learning_rate": 7.2318339100346025e-06, | |
| "loss": 0.1504, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.31141868512110726, | |
| "grad_norm": 15.37494945526123, | |
| "learning_rate": 6.885813148788928e-06, | |
| "loss": 0.1155, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.3460207612456747, | |
| "grad_norm": 6.263580799102783, | |
| "learning_rate": 6.539792387543253e-06, | |
| "loss": 0.0832, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3806228373702422, | |
| "grad_norm": 3.973139762878418, | |
| "learning_rate": 6.193771626297579e-06, | |
| "loss": 0.0647, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.41522491349480967, | |
| "grad_norm": 10.100776672363281, | |
| "learning_rate": 5.847750865051903e-06, | |
| "loss": 0.1117, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.44982698961937717, | |
| "grad_norm": 5.387557506561279, | |
| "learning_rate": 5.501730103806229e-06, | |
| "loss": 0.0759, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.4844290657439446, | |
| "grad_norm": 16.064565658569336, | |
| "learning_rate": 5.155709342560554e-06, | |
| "loss": 0.1313, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5190311418685121, | |
| "grad_norm": 5.043946743011475, | |
| "learning_rate": 4.809688581314879e-06, | |
| "loss": 0.1236, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.5536332179930796, | |
| "grad_norm": 6.548528671264648, | |
| "learning_rate": 4.463667820069205e-06, | |
| "loss": 0.0558, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5882352941176471, | |
| "grad_norm": 1.170918345451355, | |
| "learning_rate": 4.11764705882353e-06, | |
| "loss": 0.11, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.6228373702422145, | |
| "grad_norm": 9.746258735656738, | |
| "learning_rate": 3.7716262975778552e-06, | |
| "loss": 0.2231, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.657439446366782, | |
| "grad_norm": 4.314220905303955, | |
| "learning_rate": 3.42560553633218e-06, | |
| "loss": 0.1057, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.6920415224913494, | |
| "grad_norm": 7.863876819610596, | |
| "learning_rate": 3.0795847750865054e-06, | |
| "loss": 0.0963, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.726643598615917, | |
| "grad_norm": 4.094608783721924, | |
| "learning_rate": 2.7335640138408307e-06, | |
| "loss": 0.0487, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.7612456747404844, | |
| "grad_norm": 0.8956738710403442, | |
| "learning_rate": 2.387543252595156e-06, | |
| "loss": 0.0774, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.7958477508650519, | |
| "grad_norm": 4.65871000289917, | |
| "learning_rate": 2.041522491349481e-06, | |
| "loss": 0.0781, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.8304498269896193, | |
| "grad_norm": 6.274026870727539, | |
| "learning_rate": 1.6955017301038063e-06, | |
| "loss": 0.057, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.8650519031141869, | |
| "grad_norm": 0.8215273022651672, | |
| "learning_rate": 1.3494809688581318e-06, | |
| "loss": 0.0854, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.8996539792387543, | |
| "grad_norm": 12.408058166503906, | |
| "learning_rate": 1.0034602076124569e-06, | |
| "loss": 0.0743, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.9342560553633218, | |
| "grad_norm": 4.427896022796631, | |
| "learning_rate": 6.57439446366782e-07, | |
| "loss": 0.0519, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.9688581314878892, | |
| "grad_norm": 9.057334899902344, | |
| "learning_rate": 3.114186851211073e-07, | |
| "loss": 0.0727, | |
| "step": 280 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 289, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |