ubowang's picture
Upload folder using huggingface_hub
2aae28d verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"eval_steps": 10,
"global_step": 30,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.8366013071895425,
"grad_norm": 23.30156092439309,
"learning_rate": 5.000000000000001e-07,
"loss": 1.2058446407318115,
"memory(GiB)": 70.12,
"step": 1,
"token_acc": 0.69863896716209,
"train_speed(iter/s)": 0.003918
},
{
"epoch": 1.0,
"grad_norm": 23.30156092439309,
"learning_rate": 1.0000000000000002e-06,
"loss": 1.2926650047302246,
"memory(GiB)": 70.12,
"step": 2,
"token_acc": 0.7003531649008422,
"train_speed(iter/s)": 0.006658
},
{
"epoch": 1.8366013071895426,
"grad_norm": 46.28285753049827,
"learning_rate": 1.5e-06,
"loss": 1.2248008251190186,
"memory(GiB)": 73.88,
"step": 3,
"token_acc": 0.7076059411516261,
"train_speed(iter/s)": 0.00554
},
{
"epoch": 2.0,
"grad_norm": 46.28285753049827,
"learning_rate": 2.0000000000000003e-06,
"loss": 1.1912627220153809,
"memory(GiB)": 73.88,
"step": 4,
"token_acc": 0.7437572032270457,
"train_speed(iter/s)": 0.006783
},
{
"epoch": 2.8366013071895426,
"grad_norm": 26.053497827470103,
"learning_rate": 2.5e-06,
"loss": 1.2252217531204224,
"memory(GiB)": 73.88,
"step": 5,
"token_acc": 0.7153137946908087,
"train_speed(iter/s)": 0.006016
},
{
"epoch": 3.0,
"grad_norm": 26.053497827470103,
"learning_rate": 3e-06,
"loss": 1.1733195781707764,
"memory(GiB)": 73.88,
"step": 6,
"token_acc": 0.7316524437548487,
"train_speed(iter/s)": 0.006842
},
{
"epoch": 3.8366013071895426,
"grad_norm": 25.113187350964623,
"learning_rate": 3.5e-06,
"loss": 1.193232536315918,
"memory(GiB)": 73.88,
"step": 7,
"token_acc": 0.7223855851346701,
"train_speed(iter/s)": 0.006249
},
{
"epoch": 4.0,
"grad_norm": 25.113187350964623,
"learning_rate": 4.000000000000001e-06,
"loss": 1.2005245685577393,
"memory(GiB)": 73.88,
"step": 8,
"token_acc": 0.6989509456264775,
"train_speed(iter/s)": 0.006854
},
{
"epoch": 4.836601307189542,
"grad_norm": 26.790508556858097,
"learning_rate": 4.5e-06,
"loss": 1.1337487697601318,
"memory(GiB)": 73.88,
"step": 9,
"token_acc": 0.726628274308862,
"train_speed(iter/s)": 0.006383
},
{
"epoch": 5.0,
"grad_norm": 26.790508556858097,
"learning_rate": 5e-06,
"loss": 1.1454615592956543,
"memory(GiB)": 73.88,
"step": 10,
"token_acc": 0.7347552821610056,
"train_speed(iter/s)": 0.006871
},
{
"epoch": 5.836601307189542,
"grad_norm": 16.77492733077185,
"learning_rate": 4.99847706754774e-06,
"loss": 1.109296441078186,
"memory(GiB)": 73.88,
"step": 11,
"token_acc": 0.7136196961760084,
"train_speed(iter/s)": 0.006387
},
{
"epoch": 6.0,
"grad_norm": 8.395208960618595,
"learning_rate": 4.993910125649561e-06,
"loss": 0.9854133129119873,
"memory(GiB)": 73.88,
"step": 12,
"token_acc": 0.73675,
"train_speed(iter/s)": 0.006775
},
{
"epoch": 6.836601307189542,
"grad_norm": 22.01001902702143,
"learning_rate": 4.986304738420684e-06,
"loss": 0.9927579760551453,
"memory(GiB)": 73.88,
"step": 13,
"token_acc": 0.7237443757544719,
"train_speed(iter/s)": 0.006457
},
{
"epoch": 7.0,
"grad_norm": 22.01001902702143,
"learning_rate": 4.975670171853926e-06,
"loss": 0.9843835234642029,
"memory(GiB)": 73.88,
"step": 14,
"token_acc": 0.7270637408568443,
"train_speed(iter/s)": 0.006802
},
{
"epoch": 7.836601307189542,
"grad_norm": 10.16419255564952,
"learning_rate": 4.962019382530521e-06,
"loss": 0.9618018865585327,
"memory(GiB)": 73.88,
"step": 15,
"token_acc": 0.7322586331851213,
"train_speed(iter/s)": 0.006517
},
{
"epoch": 8.0,
"grad_norm": 10.16419255564952,
"learning_rate": 4.9453690018345144e-06,
"loss": 0.9794554710388184,
"memory(GiB)": 73.88,
"step": 16,
"token_acc": 0.7209527498063517,
"train_speed(iter/s)": 0.006819
},
{
"epoch": 8.836601307189543,
"grad_norm": 8.936598159145898,
"learning_rate": 4.925739315689991e-06,
"loss": 0.884042501449585,
"memory(GiB)": 73.88,
"step": 17,
"token_acc": 0.765604456673489,
"train_speed(iter/s)": 0.006566
},
{
"epoch": 9.0,
"grad_norm": 8.936598159145898,
"learning_rate": 4.903154239845798e-06,
"loss": 0.8467985391616821,
"memory(GiB)": 73.88,
"step": 18,
"token_acc": 0.7641219569841645,
"train_speed(iter/s)": 0.006832
},
{
"epoch": 9.836601307189543,
"grad_norm": 9.489897315620743,
"learning_rate": 4.8776412907378845e-06,
"loss": 0.8125187754631042,
"memory(GiB)": 73.88,
"step": 19,
"token_acc": 0.7775819474572186,
"train_speed(iter/s)": 0.006605
},
{
"epoch": 10.0,
"grad_norm": 9.489897315620743,
"learning_rate": 4.849231551964771e-06,
"loss": 0.8234744071960449,
"memory(GiB)": 73.88,
"step": 20,
"token_acc": 0.7693094048159927,
"train_speed(iter/s)": 0.006841
},
{
"epoch": 10.836601307189543,
"grad_norm": 6.615671368004909,
"learning_rate": 4.817959636416969e-06,
"loss": 0.7924225330352783,
"memory(GiB)": 73.88,
"step": 21,
"token_acc": 0.7783994898772517,
"train_speed(iter/s)": 0.00658
},
{
"epoch": 11.0,
"grad_norm": 3.336986560168757,
"learning_rate": 4.783863644106502e-06,
"loss": 0.7326895594596863,
"memory(GiB)": 73.88,
"step": 22,
"token_acc": 0.8017241379310345,
"train_speed(iter/s)": 0.00679
},
{
"epoch": 11.836601307189543,
"grad_norm": 3.94940129971638,
"learning_rate": 4.746985115747918e-06,
"loss": 0.7615460753440857,
"memory(GiB)": 73.88,
"step": 23,
"token_acc": 0.7804574209245743,
"train_speed(iter/s)": 0.006604
},
{
"epoch": 12.0,
"grad_norm": 3.94940129971638,
"learning_rate": 4.707368982147318e-06,
"loss": 0.7170370817184448,
"memory(GiB)": 73.88,
"step": 24,
"token_acc": 0.7875809693991512,
"train_speed(iter/s)": 0.006799
},
{
"epoch": 12.836601307189543,
"grad_norm": 2.997641659787212,
"learning_rate": 4.665063509461098e-06,
"loss": 0.7290819883346558,
"memory(GiB)": 73.88,
"step": 25,
"token_acc": 0.792799681401832,
"train_speed(iter/s)": 0.006626
},
{
"epoch": 13.0,
"grad_norm": 2.997641659787212,
"learning_rate": 4.620120240391065e-06,
"loss": 0.7414878606796265,
"memory(GiB)": 73.88,
"step": 26,
"token_acc": 0.7857853872187935,
"train_speed(iter/s)": 0.006808
},
{
"epoch": 13.836601307189543,
"grad_norm": 3.139383133735181,
"learning_rate": 4.572593931387604e-06,
"loss": 0.7059791684150696,
"memory(GiB)": 73.88,
"step": 27,
"token_acc": 0.7931544508930725,
"train_speed(iter/s)": 0.006643
},
{
"epoch": 14.0,
"grad_norm": 3.139383133735181,
"learning_rate": 4.522542485937369e-06,
"loss": 0.7277886271476746,
"memory(GiB)": 73.88,
"step": 28,
"token_acc": 0.7770681265206812,
"train_speed(iter/s)": 0.006815
},
{
"epoch": 14.836601307189543,
"grad_norm": 2.9819864085495738,
"learning_rate": 4.470026884016805e-06,
"loss": 0.6715853214263916,
"memory(GiB)": 73.88,
"step": 29,
"token_acc": 0.7992920128203084,
"train_speed(iter/s)": 0.006666
},
{
"epoch": 15.0,
"grad_norm": 2.9819864085495738,
"learning_rate": 4.415111107797445e-06,
"loss": 0.6904126405715942,
"memory(GiB)": 73.88,
"step": 30,
"token_acc": 0.8231940711784347,
"train_speed(iter/s)": 0.006823
}
],
"logging_steps": 1,
"max_steps": 100,
"num_input_tokens_seen": 0,
"num_train_epochs": 50,
"save_steps": 10,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 23690363387904.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}