| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 15.0, | |
| "eval_steps": 10, | |
| "global_step": 30, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.8366013071895425, | |
| "grad_norm": 23.30156092439309, | |
| "learning_rate": 5.000000000000001e-07, | |
| "loss": 1.2058446407318115, | |
| "memory(GiB)": 70.12, | |
| "step": 1, | |
| "token_acc": 0.69863896716209, | |
| "train_speed(iter/s)": 0.003918 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 23.30156092439309, | |
| "learning_rate": 1.0000000000000002e-06, | |
| "loss": 1.2926650047302246, | |
| "memory(GiB)": 70.12, | |
| "step": 2, | |
| "token_acc": 0.7003531649008422, | |
| "train_speed(iter/s)": 0.006658 | |
| }, | |
| { | |
| "epoch": 1.8366013071895426, | |
| "grad_norm": 46.28285753049827, | |
| "learning_rate": 1.5e-06, | |
| "loss": 1.2248008251190186, | |
| "memory(GiB)": 73.88, | |
| "step": 3, | |
| "token_acc": 0.7076059411516261, | |
| "train_speed(iter/s)": 0.00554 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "grad_norm": 46.28285753049827, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 1.1912627220153809, | |
| "memory(GiB)": 73.88, | |
| "step": 4, | |
| "token_acc": 0.7437572032270457, | |
| "train_speed(iter/s)": 0.006783 | |
| }, | |
| { | |
| "epoch": 2.8366013071895426, | |
| "grad_norm": 26.053497827470103, | |
| "learning_rate": 2.5e-06, | |
| "loss": 1.2252217531204224, | |
| "memory(GiB)": 73.88, | |
| "step": 5, | |
| "token_acc": 0.7153137946908087, | |
| "train_speed(iter/s)": 0.006016 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "grad_norm": 26.053497827470103, | |
| "learning_rate": 3e-06, | |
| "loss": 1.1733195781707764, | |
| "memory(GiB)": 73.88, | |
| "step": 6, | |
| "token_acc": 0.7316524437548487, | |
| "train_speed(iter/s)": 0.006842 | |
| }, | |
| { | |
| "epoch": 3.8366013071895426, | |
| "grad_norm": 25.113187350964623, | |
| "learning_rate": 3.5e-06, | |
| "loss": 1.193232536315918, | |
| "memory(GiB)": 73.88, | |
| "step": 7, | |
| "token_acc": 0.7223855851346701, | |
| "train_speed(iter/s)": 0.006249 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "grad_norm": 25.113187350964623, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 1.2005245685577393, | |
| "memory(GiB)": 73.88, | |
| "step": 8, | |
| "token_acc": 0.6989509456264775, | |
| "train_speed(iter/s)": 0.006854 | |
| }, | |
| { | |
| "epoch": 4.836601307189542, | |
| "grad_norm": 26.790508556858097, | |
| "learning_rate": 4.5e-06, | |
| "loss": 1.1337487697601318, | |
| "memory(GiB)": 73.88, | |
| "step": 9, | |
| "token_acc": 0.726628274308862, | |
| "train_speed(iter/s)": 0.006383 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "grad_norm": 26.790508556858097, | |
| "learning_rate": 5e-06, | |
| "loss": 1.1454615592956543, | |
| "memory(GiB)": 73.88, | |
| "step": 10, | |
| "token_acc": 0.7347552821610056, | |
| "train_speed(iter/s)": 0.006871 | |
| }, | |
| { | |
| "epoch": 5.836601307189542, | |
| "grad_norm": 16.77492733077185, | |
| "learning_rate": 4.99847706754774e-06, | |
| "loss": 1.109296441078186, | |
| "memory(GiB)": 73.88, | |
| "step": 11, | |
| "token_acc": 0.7136196961760084, | |
| "train_speed(iter/s)": 0.006387 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "grad_norm": 8.395208960618595, | |
| "learning_rate": 4.993910125649561e-06, | |
| "loss": 0.9854133129119873, | |
| "memory(GiB)": 73.88, | |
| "step": 12, | |
| "token_acc": 0.73675, | |
| "train_speed(iter/s)": 0.006775 | |
| }, | |
| { | |
| "epoch": 6.836601307189542, | |
| "grad_norm": 22.01001902702143, | |
| "learning_rate": 4.986304738420684e-06, | |
| "loss": 0.9927579760551453, | |
| "memory(GiB)": 73.88, | |
| "step": 13, | |
| "token_acc": 0.7237443757544719, | |
| "train_speed(iter/s)": 0.006457 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "grad_norm": 22.01001902702143, | |
| "learning_rate": 4.975670171853926e-06, | |
| "loss": 0.9843835234642029, | |
| "memory(GiB)": 73.88, | |
| "step": 14, | |
| "token_acc": 0.7270637408568443, | |
| "train_speed(iter/s)": 0.006802 | |
| }, | |
| { | |
| "epoch": 7.836601307189542, | |
| "grad_norm": 10.16419255564952, | |
| "learning_rate": 4.962019382530521e-06, | |
| "loss": 0.9618018865585327, | |
| "memory(GiB)": 73.88, | |
| "step": 15, | |
| "token_acc": 0.7322586331851213, | |
| "train_speed(iter/s)": 0.006517 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 10.16419255564952, | |
| "learning_rate": 4.9453690018345144e-06, | |
| "loss": 0.9794554710388184, | |
| "memory(GiB)": 73.88, | |
| "step": 16, | |
| "token_acc": 0.7209527498063517, | |
| "train_speed(iter/s)": 0.006819 | |
| }, | |
| { | |
| "epoch": 8.836601307189543, | |
| "grad_norm": 8.936598159145898, | |
| "learning_rate": 4.925739315689991e-06, | |
| "loss": 0.884042501449585, | |
| "memory(GiB)": 73.88, | |
| "step": 17, | |
| "token_acc": 0.765604456673489, | |
| "train_speed(iter/s)": 0.006566 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "grad_norm": 8.936598159145898, | |
| "learning_rate": 4.903154239845798e-06, | |
| "loss": 0.8467985391616821, | |
| "memory(GiB)": 73.88, | |
| "step": 18, | |
| "token_acc": 0.7641219569841645, | |
| "train_speed(iter/s)": 0.006832 | |
| }, | |
| { | |
| "epoch": 9.836601307189543, | |
| "grad_norm": 9.489897315620743, | |
| "learning_rate": 4.8776412907378845e-06, | |
| "loss": 0.8125187754631042, | |
| "memory(GiB)": 73.88, | |
| "step": 19, | |
| "token_acc": 0.7775819474572186, | |
| "train_speed(iter/s)": 0.006605 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "grad_norm": 9.489897315620743, | |
| "learning_rate": 4.849231551964771e-06, | |
| "loss": 0.8234744071960449, | |
| "memory(GiB)": 73.88, | |
| "step": 20, | |
| "token_acc": 0.7693094048159927, | |
| "train_speed(iter/s)": 0.006841 | |
| }, | |
| { | |
| "epoch": 10.836601307189543, | |
| "grad_norm": 6.615671368004909, | |
| "learning_rate": 4.817959636416969e-06, | |
| "loss": 0.7924225330352783, | |
| "memory(GiB)": 73.88, | |
| "step": 21, | |
| "token_acc": 0.7783994898772517, | |
| "train_speed(iter/s)": 0.00658 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "grad_norm": 3.336986560168757, | |
| "learning_rate": 4.783863644106502e-06, | |
| "loss": 0.7326895594596863, | |
| "memory(GiB)": 73.88, | |
| "step": 22, | |
| "token_acc": 0.8017241379310345, | |
| "train_speed(iter/s)": 0.00679 | |
| }, | |
| { | |
| "epoch": 11.836601307189543, | |
| "grad_norm": 3.94940129971638, | |
| "learning_rate": 4.746985115747918e-06, | |
| "loss": 0.7615460753440857, | |
| "memory(GiB)": 73.88, | |
| "step": 23, | |
| "token_acc": 0.7804574209245743, | |
| "train_speed(iter/s)": 0.006604 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "grad_norm": 3.94940129971638, | |
| "learning_rate": 4.707368982147318e-06, | |
| "loss": 0.7170370817184448, | |
| "memory(GiB)": 73.88, | |
| "step": 24, | |
| "token_acc": 0.7875809693991512, | |
| "train_speed(iter/s)": 0.006799 | |
| }, | |
| { | |
| "epoch": 12.836601307189543, | |
| "grad_norm": 2.997641659787212, | |
| "learning_rate": 4.665063509461098e-06, | |
| "loss": 0.7290819883346558, | |
| "memory(GiB)": 73.88, | |
| "step": 25, | |
| "token_acc": 0.792799681401832, | |
| "train_speed(iter/s)": 0.006626 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "grad_norm": 2.997641659787212, | |
| "learning_rate": 4.620120240391065e-06, | |
| "loss": 0.7414878606796265, | |
| "memory(GiB)": 73.88, | |
| "step": 26, | |
| "token_acc": 0.7857853872187935, | |
| "train_speed(iter/s)": 0.006808 | |
| }, | |
| { | |
| "epoch": 13.836601307189543, | |
| "grad_norm": 3.139383133735181, | |
| "learning_rate": 4.572593931387604e-06, | |
| "loss": 0.7059791684150696, | |
| "memory(GiB)": 73.88, | |
| "step": 27, | |
| "token_acc": 0.7931544508930725, | |
| "train_speed(iter/s)": 0.006643 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "grad_norm": 3.139383133735181, | |
| "learning_rate": 4.522542485937369e-06, | |
| "loss": 0.7277886271476746, | |
| "memory(GiB)": 73.88, | |
| "step": 28, | |
| "token_acc": 0.7770681265206812, | |
| "train_speed(iter/s)": 0.006815 | |
| }, | |
| { | |
| "epoch": 14.836601307189543, | |
| "grad_norm": 2.9819864085495738, | |
| "learning_rate": 4.470026884016805e-06, | |
| "loss": 0.6715853214263916, | |
| "memory(GiB)": 73.88, | |
| "step": 29, | |
| "token_acc": 0.7992920128203084, | |
| "train_speed(iter/s)": 0.006666 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "grad_norm": 2.9819864085495738, | |
| "learning_rate": 4.415111107797445e-06, | |
| "loss": 0.6904126405715942, | |
| "memory(GiB)": 73.88, | |
| "step": 30, | |
| "token_acc": 0.8231940711784347, | |
| "train_speed(iter/s)": 0.006823 | |
| } | |
| ], | |
| "logging_steps": 1, | |
| "max_steps": 100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 23690363387904.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |