qwen2.5_sft_it_7b_translate_epochs3 / trainer_state.json

Upload folder using huggingface_hub

1fc80ce verified 11 months ago

2.47 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 3.0,
	"eval_steps": 500,
	"global_step": 207,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.2898550724637681,
	"grad_norm": 0.7885370850563049,
	"learning_rate": 9.523809523809524e-05,
	"loss": 1.6677,
	"step": 20
	},
	{
	"epoch": 0.5797101449275363,
	"grad_norm": 0.2611646056175232,
	"learning_rate": 9.744735364682346e-05,
	"loss": 0.9644,
	"step": 40
	},
	{
	"epoch": 0.8695652173913043,
	"grad_norm": 0.3123443126678467,
	"learning_rate": 8.953878684688493e-05,
	"loss": 0.9091,
	"step": 60
	},
	{
	"epoch": 1.1594202898550725,
	"grad_norm": 0.3900753855705261,
	"learning_rate": 7.716108602402094e-05,
	"loss": 0.8599,
	"step": 80
	},
	{
	"epoch": 1.4492753623188406,
	"grad_norm": 0.3543001115322113,
	"learning_rate": 6.171332303360411e-05,
	"loss": 0.8032,
	"step": 100
	},
	{
	"epoch": 1.7391304347826086,
	"grad_norm": 0.42595645785331726,
	"learning_rate": 4.49415839006284e-05,
	"loss": 0.8319,
	"step": 120
	},
	{
	"epoch": 2.028985507246377,
	"grad_norm": 0.41535866260528564,
	"learning_rate": 2.874160584821798e-05,
	"loss": 0.8275,
	"step": 140
	},
	{
	"epoch": 2.318840579710145,
	"grad_norm": 0.45032790303230286,
	"learning_rate": 1.4944499005397371e-05,
	"loss": 0.7486,
	"step": 160
	},
	{
	"epoch": 2.608695652173913,
	"grad_norm": 0.4935797154903412,
	"learning_rate": 5.109773021462921e-06,
	"loss": 0.7324,
	"step": 180
	},
	{
	"epoch": 2.898550724637681,
	"grad_norm": 0.4422053396701813,
	"learning_rate": 3.490631287033197e-07,
	"loss": 0.7303,
	"step": 200
	}
	],
	"logging_steps": 20,
	"max_steps": 207,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 9.449028359530906e+16,
	"train_batch_size": 16,
	"trial_name": null,
	"trial_params": null
	}