{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 289, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03460207612456748, "grad_norm": 18.75143814086914, "learning_rate": 9.653979238754326e-06, "loss": 0.5271, "step": 10 }, { "epoch": 0.06920415224913495, "grad_norm": 15.585368156433105, "learning_rate": 9.307958477508652e-06, "loss": 0.2558, "step": 20 }, { "epoch": 0.10380622837370242, "grad_norm": 6.447887420654297, "learning_rate": 8.961937716262975e-06, "loss": 0.1715, "step": 30 }, { "epoch": 0.1384083044982699, "grad_norm": 10.45551872253418, "learning_rate": 8.615916955017302e-06, "loss": 0.1847, "step": 40 }, { "epoch": 0.17301038062283736, "grad_norm": 5.732431888580322, "learning_rate": 8.269896193771627e-06, "loss": 0.0818, "step": 50 }, { "epoch": 0.20761245674740483, "grad_norm": 6.817206382751465, "learning_rate": 7.923875432525952e-06, "loss": 0.0961, "step": 60 }, { "epoch": 0.2422145328719723, "grad_norm": 12.547711372375488, "learning_rate": 7.577854671280277e-06, "loss": 0.2854, "step": 70 }, { "epoch": 0.2768166089965398, "grad_norm": 5.625271797180176, "learning_rate": 7.2318339100346025e-06, "loss": 0.1504, "step": 80 }, { "epoch": 0.31141868512110726, "grad_norm": 15.37494945526123, "learning_rate": 6.885813148788928e-06, "loss": 0.1155, "step": 90 }, { "epoch": 0.3460207612456747, "grad_norm": 6.263580799102783, "learning_rate": 6.539792387543253e-06, "loss": 0.0832, "step": 100 }, { "epoch": 0.3806228373702422, "grad_norm": 3.973139762878418, "learning_rate": 6.193771626297579e-06, "loss": 0.0647, "step": 110 }, { "epoch": 0.41522491349480967, "grad_norm": 10.100776672363281, "learning_rate": 5.847750865051903e-06, "loss": 0.1117, "step": 120 }, { "epoch": 0.44982698961937717, "grad_norm": 5.387557506561279, "learning_rate": 5.501730103806229e-06, "loss": 0.0759, "step": 130 }, { "epoch": 0.4844290657439446, "grad_norm": 16.064565658569336, "learning_rate": 5.155709342560554e-06, "loss": 0.1313, "step": 140 }, { "epoch": 0.5190311418685121, "grad_norm": 5.043946743011475, "learning_rate": 4.809688581314879e-06, "loss": 0.1236, "step": 150 }, { "epoch": 0.5536332179930796, "grad_norm": 6.548528671264648, "learning_rate": 4.463667820069205e-06, "loss": 0.0558, "step": 160 }, { "epoch": 0.5882352941176471, "grad_norm": 1.170918345451355, "learning_rate": 4.11764705882353e-06, "loss": 0.11, "step": 170 }, { "epoch": 0.6228373702422145, "grad_norm": 9.746258735656738, "learning_rate": 3.7716262975778552e-06, "loss": 0.2231, "step": 180 }, { "epoch": 0.657439446366782, "grad_norm": 4.314220905303955, "learning_rate": 3.42560553633218e-06, "loss": 0.1057, "step": 190 }, { "epoch": 0.6920415224913494, "grad_norm": 7.863876819610596, "learning_rate": 3.0795847750865054e-06, "loss": 0.0963, "step": 200 }, { "epoch": 0.726643598615917, "grad_norm": 4.094608783721924, "learning_rate": 2.7335640138408307e-06, "loss": 0.0487, "step": 210 }, { "epoch": 0.7612456747404844, "grad_norm": 0.8956738710403442, "learning_rate": 2.387543252595156e-06, "loss": 0.0774, "step": 220 }, { "epoch": 0.7958477508650519, "grad_norm": 4.65871000289917, "learning_rate": 2.041522491349481e-06, "loss": 0.0781, "step": 230 }, { "epoch": 0.8304498269896193, "grad_norm": 6.274026870727539, "learning_rate": 1.6955017301038063e-06, "loss": 0.057, "step": 240 }, { "epoch": 0.8650519031141869, "grad_norm": 0.8215273022651672, "learning_rate": 1.3494809688581318e-06, "loss": 0.0854, "step": 250 }, { "epoch": 0.8996539792387543, "grad_norm": 12.408058166503906, "learning_rate": 1.0034602076124569e-06, "loss": 0.0743, "step": 260 }, { "epoch": 0.9342560553633218, "grad_norm": 4.427896022796631, "learning_rate": 6.57439446366782e-07, "loss": 0.0519, "step": 270 }, { "epoch": 0.9688581314878892, "grad_norm": 9.057334899902344, "learning_rate": 3.114186851211073e-07, "loss": 0.0727, "step": 280 } ], "logging_steps": 10, "max_steps": 289, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }