Training in progress, step 5040, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 295488936
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2878ca186bfef3cce49ceaeb1b14a21f38a01c4b9b0ca7d8293f2ef04ec4f279
|
| 3 |
size 295488936
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 150487412
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8c977421cd4d1446425ef50b917af56a1e510a6d0dd97f67a30c9cbcdaa5bc9d
|
| 3 |
size 150487412
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0390e6f5919a7dede5c4b10e6ca69080410c973c54dff99d225ab4d891b78151
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ac3156db05f675ed073eccf0318853daf91582237d8a27d125ff8a6598249c53
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.9663305282592773,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-4950",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -34929,6 +34929,636 @@
|
|
| 34929 |
"eval_samples_per_second": 27.95,
|
| 34930 |
"eval_steps_per_second": 13.975,
|
| 34931 |
"step": 4950
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34932 |
}
|
| 34933 |
],
|
| 34934 |
"logging_steps": 1,
|
|
@@ -34952,12 +35582,12 @@
|
|
| 34952 |
"should_evaluate": false,
|
| 34953 |
"should_log": false,
|
| 34954 |
"should_save": true,
|
| 34955 |
-
"should_training_stop":
|
| 34956 |
},
|
| 34957 |
"attributes": {}
|
| 34958 |
}
|
| 34959 |
},
|
| 34960 |
-
"total_flos": 3.
|
| 34961 |
"train_batch_size": 2,
|
| 34962 |
"trial_name": null,
|
| 34963 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 1.9663305282592773,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-4950",
|
| 4 |
+
"epoch": 0.3772949300993768,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 5040,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 34929 |
"eval_samples_per_second": 27.95,
|
| 34930 |
"eval_steps_per_second": 13.975,
|
| 34931 |
"step": 4950
|
| 34932 |
+
},
|
| 34933 |
+
{
|
| 34934 |
+
"epoch": 0.3706323807384949,
|
| 34935 |
+
"grad_norm": 1.2411137819290161,
|
| 34936 |
+
"learning_rate": 1.544549968628295e-07,
|
| 34937 |
+
"loss": 2.1719,
|
| 34938 |
+
"step": 4951
|
| 34939 |
+
},
|
| 34940 |
+
{
|
| 34941 |
+
"epoch": 0.3707072408436734,
|
| 34942 |
+
"grad_norm": 1.2201836109161377,
|
| 34943 |
+
"learning_rate": 1.5100446638173228e-07,
|
| 34944 |
+
"loss": 1.5187,
|
| 34945 |
+
"step": 4952
|
| 34946 |
+
},
|
| 34947 |
+
{
|
| 34948 |
+
"epoch": 0.37078210094885183,
|
| 34949 |
+
"grad_norm": 1.0959951877593994,
|
| 34950 |
+
"learning_rate": 1.4759288590058263e-07,
|
| 34951 |
+
"loss": 1.2851,
|
| 34952 |
+
"step": 4953
|
| 34953 |
+
},
|
| 34954 |
+
{
|
| 34955 |
+
"epoch": 0.37085696105403027,
|
| 34956 |
+
"grad_norm": 1.2576202154159546,
|
| 34957 |
+
"learning_rate": 1.4422025675020488e-07,
|
| 34958 |
+
"loss": 1.8899,
|
| 34959 |
+
"step": 4954
|
| 34960 |
+
},
|
| 34961 |
+
{
|
| 34962 |
+
"epoch": 0.37093182115920875,
|
| 34963 |
+
"grad_norm": 1.198002576828003,
|
| 34964 |
+
"learning_rate": 1.4088658024622448e-07,
|
| 34965 |
+
"loss": 2.171,
|
| 34966 |
+
"step": 4955
|
| 34967 |
+
},
|
| 34968 |
+
{
|
| 34969 |
+
"epoch": 0.3710066812643872,
|
| 34970 |
+
"grad_norm": 1.283689022064209,
|
| 34971 |
+
"learning_rate": 1.375918576890678e-07,
|
| 34972 |
+
"loss": 2.2446,
|
| 34973 |
+
"step": 4956
|
| 34974 |
+
},
|
| 34975 |
+
{
|
| 34976 |
+
"epoch": 0.3710815413695656,
|
| 34977 |
+
"grad_norm": 1.3751139640808105,
|
| 34978 |
+
"learning_rate": 1.3433609036397342e-07,
|
| 34979 |
+
"loss": 1.8505,
|
| 34980 |
+
"step": 4957
|
| 34981 |
+
},
|
| 34982 |
+
{
|
| 34983 |
+
"epoch": 0.37115640147474405,
|
| 34984 |
+
"grad_norm": 1.064444661140442,
|
| 34985 |
+
"learning_rate": 1.3111927954098102e-07,
|
| 34986 |
+
"loss": 1.7843,
|
| 34987 |
+
"step": 4958
|
| 34988 |
+
},
|
| 34989 |
+
{
|
| 34990 |
+
"epoch": 0.37123126157992253,
|
| 34991 |
+
"grad_norm": 1.0398433208465576,
|
| 34992 |
+
"learning_rate": 1.2794142647492013e-07,
|
| 34993 |
+
"loss": 1.9175,
|
| 34994 |
+
"step": 4959
|
| 34995 |
+
},
|
| 34996 |
+
{
|
| 34997 |
+
"epoch": 0.37130612168510096,
|
| 34998 |
+
"grad_norm": 1.3033219575881958,
|
| 34999 |
+
"learning_rate": 1.248025324054658e-07,
|
| 35000 |
+
"loss": 2.3778,
|
| 35001 |
+
"step": 4960
|
| 35002 |
+
},
|
| 35003 |
+
{
|
| 35004 |
+
"epoch": 0.3713809817902794,
|
| 35005 |
+
"grad_norm": 1.0516605377197266,
|
| 35006 |
+
"learning_rate": 1.2170259855703858e-07,
|
| 35007 |
+
"loss": 2.3922,
|
| 35008 |
+
"step": 4961
|
| 35009 |
+
},
|
| 35010 |
+
{
|
| 35011 |
+
"epoch": 0.3714558418954579,
|
| 35012 |
+
"grad_norm": 1.3150949478149414,
|
| 35013 |
+
"learning_rate": 1.186416261389045e-07,
|
| 35014 |
+
"loss": 2.2298,
|
| 35015 |
+
"step": 4962
|
| 35016 |
+
},
|
| 35017 |
+
{
|
| 35018 |
+
"epoch": 0.3715307020006363,
|
| 35019 |
+
"grad_norm": 1.3337782621383667,
|
| 35020 |
+
"learning_rate": 1.1561961634510843e-07,
|
| 35021 |
+
"loss": 2.1364,
|
| 35022 |
+
"step": 4963
|
| 35023 |
+
},
|
| 35024 |
+
{
|
| 35025 |
+
"epoch": 0.37160556210581475,
|
| 35026 |
+
"grad_norm": 1.1191245317459106,
|
| 35027 |
+
"learning_rate": 1.1263657035449627e-07,
|
| 35028 |
+
"loss": 1.8137,
|
| 35029 |
+
"step": 4964
|
| 35030 |
+
},
|
| 35031 |
+
{
|
| 35032 |
+
"epoch": 0.37168042221099323,
|
| 35033 |
+
"grad_norm": 1.032953143119812,
|
| 35034 |
+
"learning_rate": 1.0969248933073717e-07,
|
| 35035 |
+
"loss": 1.5611,
|
| 35036 |
+
"step": 4965
|
| 35037 |
+
},
|
| 35038 |
+
{
|
| 35039 |
+
"epoch": 0.37175528231617166,
|
| 35040 |
+
"grad_norm": 1.4030370712280273,
|
| 35041 |
+
"learning_rate": 1.0678737442227915e-07,
|
| 35042 |
+
"loss": 1.856,
|
| 35043 |
+
"step": 4966
|
| 35044 |
+
},
|
| 35045 |
+
{
|
| 35046 |
+
"epoch": 0.3718301424213501,
|
| 35047 |
+
"grad_norm": 1.1899338960647583,
|
| 35048 |
+
"learning_rate": 1.0392122676237126e-07,
|
| 35049 |
+
"loss": 1.4475,
|
| 35050 |
+
"step": 4967
|
| 35051 |
+
},
|
| 35052 |
+
{
|
| 35053 |
+
"epoch": 0.3719050025265285,
|
| 35054 |
+
"grad_norm": 1.318682312965393,
|
| 35055 |
+
"learning_rate": 1.0109404746907469e-07,
|
| 35056 |
+
"loss": 2.1785,
|
| 35057 |
+
"step": 4968
|
| 35058 |
+
},
|
| 35059 |
+
{
|
| 35060 |
+
"epoch": 0.371979862631707,
|
| 35061 |
+
"grad_norm": 1.1499639749526978,
|
| 35062 |
+
"learning_rate": 9.830583764522949e-08,
|
| 35063 |
+
"loss": 1.7568,
|
| 35064 |
+
"step": 4969
|
| 35065 |
+
},
|
| 35066 |
+
{
|
| 35067 |
+
"epoch": 0.37205472273688545,
|
| 35068 |
+
"grad_norm": 1.2958184480667114,
|
| 35069 |
+
"learning_rate": 9.555659837849895e-08,
|
| 35070 |
+
"loss": 1.8185,
|
| 35071 |
+
"step": 4970
|
| 35072 |
+
},
|
| 35073 |
+
{
|
| 35074 |
+
"epoch": 0.3721295828420639,
|
| 35075 |
+
"grad_norm": 1.2534544467926025,
|
| 35076 |
+
"learning_rate": 9.28463307413141e-08,
|
| 35077 |
+
"loss": 2.1521,
|
| 35078 |
+
"step": 4971
|
| 35079 |
+
},
|
| 35080 |
+
{
|
| 35081 |
+
"epoch": 0.37220444294724236,
|
| 35082 |
+
"grad_norm": 1.4329583644866943,
|
| 35083 |
+
"learning_rate": 9.017503579094033e-08,
|
| 35084 |
+
"loss": 2.2751,
|
| 35085 |
+
"step": 4972
|
| 35086 |
+
},
|
| 35087 |
+
{
|
| 35088 |
+
"epoch": 0.3722793030524208,
|
| 35089 |
+
"grad_norm": 1.1662907600402832,
|
| 35090 |
+
"learning_rate": 8.754271456941077e-08,
|
| 35091 |
+
"loss": 1.9949,
|
| 35092 |
+
"step": 4973
|
| 35093 |
+
},
|
| 35094 |
+
{
|
| 35095 |
+
"epoch": 0.3723541631575992,
|
| 35096 |
+
"grad_norm": 1.1763770580291748,
|
| 35097 |
+
"learning_rate": 8.494936810355958e-08,
|
| 35098 |
+
"loss": 2.207,
|
| 35099 |
+
"step": 4974
|
| 35100 |
+
},
|
| 35101 |
+
{
|
| 35102 |
+
"epoch": 0.37242902326277766,
|
| 35103 |
+
"grad_norm": 1.3006685972213745,
|
| 35104 |
+
"learning_rate": 8.23949974050331e-08,
|
| 35105 |
+
"loss": 1.8742,
|
| 35106 |
+
"step": 4975
|
| 35107 |
+
},
|
| 35108 |
+
{
|
| 35109 |
+
"epoch": 0.37250388336795615,
|
| 35110 |
+
"grad_norm": 1.51710844039917,
|
| 35111 |
+
"learning_rate": 7.987960347025647e-08,
|
| 35112 |
+
"loss": 2.1904,
|
| 35113 |
+
"step": 4976
|
| 35114 |
+
},
|
| 35115 |
+
{
|
| 35116 |
+
"epoch": 0.3725787434731346,
|
| 35117 |
+
"grad_norm": 1.4190149307250977,
|
| 35118 |
+
"learning_rate": 7.740318728045593e-08,
|
| 35119 |
+
"loss": 2.177,
|
| 35120 |
+
"step": 4977
|
| 35121 |
+
},
|
| 35122 |
+
{
|
| 35123 |
+
"epoch": 0.372653603578313,
|
| 35124 |
+
"grad_norm": 1.3400824069976807,
|
| 35125 |
+
"learning_rate": 7.496574980166982e-08,
|
| 35126 |
+
"loss": 1.7044,
|
| 35127 |
+
"step": 4978
|
| 35128 |
+
},
|
| 35129 |
+
{
|
| 35130 |
+
"epoch": 0.3727284636834915,
|
| 35131 |
+
"grad_norm": 1.1763372421264648,
|
| 35132 |
+
"learning_rate": 7.256729198469314e-08,
|
| 35133 |
+
"loss": 1.7179,
|
| 35134 |
+
"step": 4979
|
| 35135 |
+
},
|
| 35136 |
+
{
|
| 35137 |
+
"epoch": 0.3728033237886699,
|
| 35138 |
+
"grad_norm": 1.2219486236572266,
|
| 35139 |
+
"learning_rate": 7.020781476515525e-08,
|
| 35140 |
+
"loss": 1.726,
|
| 35141 |
+
"step": 4980
|
| 35142 |
+
},
|
| 35143 |
+
{
|
| 35144 |
+
"epoch": 0.37287818389384836,
|
| 35145 |
+
"grad_norm": 1.2846757173538208,
|
| 35146 |
+
"learning_rate": 6.788731906345325e-08,
|
| 35147 |
+
"loss": 2.4991,
|
| 35148 |
+
"step": 4981
|
| 35149 |
+
},
|
| 35150 |
+
{
|
| 35151 |
+
"epoch": 0.37295304399902685,
|
| 35152 |
+
"grad_norm": 1.2243740558624268,
|
| 35153 |
+
"learning_rate": 6.560580578479636e-08,
|
| 35154 |
+
"loss": 1.6773,
|
| 35155 |
+
"step": 4982
|
| 35156 |
+
},
|
| 35157 |
+
{
|
| 35158 |
+
"epoch": 0.3730279041042053,
|
| 35159 |
+
"grad_norm": 1.3469129800796509,
|
| 35160 |
+
"learning_rate": 6.336327581916157e-08,
|
| 35161 |
+
"loss": 1.9793,
|
| 35162 |
+
"step": 4983
|
| 35163 |
+
},
|
| 35164 |
+
{
|
| 35165 |
+
"epoch": 0.3731027642093837,
|
| 35166 |
+
"grad_norm": 1.2938308715820312,
|
| 35167 |
+
"learning_rate": 6.115973004134912e-08,
|
| 35168 |
+
"loss": 2.1317,
|
| 35169 |
+
"step": 4984
|
| 35170 |
+
},
|
| 35171 |
+
{
|
| 35172 |
+
"epoch": 0.37317762431456214,
|
| 35173 |
+
"grad_norm": 1.362736463546753,
|
| 35174 |
+
"learning_rate": 5.899516931093807e-08,
|
| 35175 |
+
"loss": 2.272,
|
| 35176 |
+
"step": 4985
|
| 35177 |
+
},
|
| 35178 |
+
{
|
| 35179 |
+
"epoch": 0.3732524844197406,
|
| 35180 |
+
"grad_norm": 1.1034882068634033,
|
| 35181 |
+
"learning_rate": 5.686959447229745e-08,
|
| 35182 |
+
"loss": 1.6906,
|
| 35183 |
+
"step": 4986
|
| 35184 |
+
},
|
| 35185 |
+
{
|
| 35186 |
+
"epoch": 0.37332734452491906,
|
| 35187 |
+
"grad_norm": 1.1260019540786743,
|
| 35188 |
+
"learning_rate": 5.478300635458622e-08,
|
| 35189 |
+
"loss": 2.0264,
|
| 35190 |
+
"step": 4987
|
| 35191 |
+
},
|
| 35192 |
+
{
|
| 35193 |
+
"epoch": 0.3734022046300975,
|
| 35194 |
+
"grad_norm": 1.3279393911361694,
|
| 35195 |
+
"learning_rate": 5.273540577176439e-08,
|
| 35196 |
+
"loss": 1.6167,
|
| 35197 |
+
"step": 4988
|
| 35198 |
+
},
|
| 35199 |
+
{
|
| 35200 |
+
"epoch": 0.373477064735276,
|
| 35201 |
+
"grad_norm": 1.314508318901062,
|
| 35202 |
+
"learning_rate": 5.0726793522570814e-08,
|
| 35203 |
+
"loss": 1.5518,
|
| 35204 |
+
"step": 4989
|
| 35205 |
+
},
|
| 35206 |
+
{
|
| 35207 |
+
"epoch": 0.3735519248404544,
|
| 35208 |
+
"grad_norm": 1.2558382749557495,
|
| 35209 |
+
"learning_rate": 4.8757170390556495e-08,
|
| 35210 |
+
"loss": 1.9004,
|
| 35211 |
+
"step": 4990
|
| 35212 |
+
},
|
| 35213 |
+
{
|
| 35214 |
+
"epoch": 0.37362678494563284,
|
| 35215 |
+
"grad_norm": 1.2909865379333496,
|
| 35216 |
+
"learning_rate": 4.682653714404017e-08,
|
| 35217 |
+
"loss": 1.7055,
|
| 35218 |
+
"step": 4991
|
| 35219 |
+
},
|
| 35220 |
+
{
|
| 35221 |
+
"epoch": 0.37370164505081127,
|
| 35222 |
+
"grad_norm": 1.1747167110443115,
|
| 35223 |
+
"learning_rate": 4.493489453614164e-08,
|
| 35224 |
+
"loss": 2.4907,
|
| 35225 |
+
"step": 4992
|
| 35226 |
+
},
|
| 35227 |
+
{
|
| 35228 |
+
"epoch": 0.37377650515598976,
|
| 35229 |
+
"grad_norm": 1.4869656562805176,
|
| 35230 |
+
"learning_rate": 4.3082243304770617e-08,
|
| 35231 |
+
"loss": 2.4387,
|
| 35232 |
+
"step": 4993
|
| 35233 |
+
},
|
| 35234 |
+
{
|
| 35235 |
+
"epoch": 0.3738513652611682,
|
| 35236 |
+
"grad_norm": 1.2070189714431763,
|
| 35237 |
+
"learning_rate": 4.12685841726268e-08,
|
| 35238 |
+
"loss": 1.8671,
|
| 35239 |
+
"step": 4994
|
| 35240 |
+
},
|
| 35241 |
+
{
|
| 35242 |
+
"epoch": 0.3739262253663466,
|
| 35243 |
+
"grad_norm": 1.234904408454895,
|
| 35244 |
+
"learning_rate": 3.94939178471998e-08,
|
| 35245 |
+
"loss": 1.9867,
|
| 35246 |
+
"step": 4995
|
| 35247 |
+
},
|
| 35248 |
+
{
|
| 35249 |
+
"epoch": 0.3740010854715251,
|
| 35250 |
+
"grad_norm": 1.1703590154647827,
|
| 35251 |
+
"learning_rate": 3.775824502076919e-08,
|
| 35252 |
+
"loss": 2.0854,
|
| 35253 |
+
"step": 4996
|
| 35254 |
+
},
|
| 35255 |
+
{
|
| 35256 |
+
"epoch": 0.37407594557670354,
|
| 35257 |
+
"grad_norm": 1.4773565530776978,
|
| 35258 |
+
"learning_rate": 3.6061566370393376e-08,
|
| 35259 |
+
"loss": 2.1288,
|
| 35260 |
+
"step": 4997
|
| 35261 |
+
},
|
| 35262 |
+
{
|
| 35263 |
+
"epoch": 0.37415080568188197,
|
| 35264 |
+
"grad_norm": 1.5366137027740479,
|
| 35265 |
+
"learning_rate": 3.4403882557942915e-08,
|
| 35266 |
+
"loss": 2.2701,
|
| 35267 |
+
"step": 4998
|
| 35268 |
+
},
|
| 35269 |
+
{
|
| 35270 |
+
"epoch": 0.37422566578706046,
|
| 35271 |
+
"grad_norm": 1.0518525838851929,
|
| 35272 |
+
"learning_rate": 3.2785194230045004e-08,
|
| 35273 |
+
"loss": 1.7945,
|
| 35274 |
+
"step": 4999
|
| 35275 |
+
},
|
| 35276 |
+
{
|
| 35277 |
+
"epoch": 0.3743005258922389,
|
| 35278 |
+
"grad_norm": 1.3006261587142944,
|
| 35279 |
+
"learning_rate": 3.120550201815009e-08,
|
| 35280 |
+
"loss": 1.9247,
|
| 35281 |
+
"step": 5000
|
| 35282 |
+
},
|
| 35283 |
+
{
|
| 35284 |
+
"epoch": 0.3743753859974173,
|
| 35285 |
+
"grad_norm": 1.5011438131332397,
|
| 35286 |
+
"learning_rate": 2.9664806538465262e-08,
|
| 35287 |
+
"loss": 2.2498,
|
| 35288 |
+
"step": 5001
|
| 35289 |
+
},
|
| 35290 |
+
{
|
| 35291 |
+
"epoch": 0.37445024610259575,
|
| 35292 |
+
"grad_norm": 1.3992769718170166,
|
| 35293 |
+
"learning_rate": 2.816310839199865e-08,
|
| 35294 |
+
"loss": 1.8977,
|
| 35295 |
+
"step": 5002
|
| 35296 |
+
},
|
| 35297 |
+
{
|
| 35298 |
+
"epoch": 0.37452510620777424,
|
| 35299 |
+
"grad_norm": 1.1042829751968384,
|
| 35300 |
+
"learning_rate": 2.6700408164548328e-08,
|
| 35301 |
+
"loss": 1.2875,
|
| 35302 |
+
"step": 5003
|
| 35303 |
+
},
|
| 35304 |
+
{
|
| 35305 |
+
"epoch": 0.37459996631295267,
|
| 35306 |
+
"grad_norm": 1.28590726852417,
|
| 35307 |
+
"learning_rate": 2.5276706426713425e-08,
|
| 35308 |
+
"loss": 1.9616,
|
| 35309 |
+
"step": 5004
|
| 35310 |
+
},
|
| 35311 |
+
{
|
| 35312 |
+
"epoch": 0.3746748264181311,
|
| 35313 |
+
"grad_norm": 1.1898860931396484,
|
| 35314 |
+
"learning_rate": 2.3892003733838598e-08,
|
| 35315 |
+
"loss": 1.7639,
|
| 35316 |
+
"step": 5005
|
| 35317 |
+
},
|
| 35318 |
+
{
|
| 35319 |
+
"epoch": 0.3747496865233096,
|
| 35320 |
+
"grad_norm": 1.2147506475448608,
|
| 35321 |
+
"learning_rate": 2.2546300626091753e-08,
|
| 35322 |
+
"loss": 1.849,
|
| 35323 |
+
"step": 5006
|
| 35324 |
+
},
|
| 35325 |
+
{
|
| 35326 |
+
"epoch": 0.374824546628488,
|
| 35327 |
+
"grad_norm": 1.2294347286224365,
|
| 35328 |
+
"learning_rate": 2.123959762843075e-08,
|
| 35329 |
+
"loss": 2.1053,
|
| 35330 |
+
"step": 5007
|
| 35331 |
+
},
|
| 35332 |
+
{
|
| 35333 |
+
"epoch": 0.37489940673366645,
|
| 35334 |
+
"grad_norm": 1.2910950183868408,
|
| 35335 |
+
"learning_rate": 1.997189525055898e-08,
|
| 35336 |
+
"loss": 2.2129,
|
| 35337 |
+
"step": 5008
|
| 35338 |
+
},
|
| 35339 |
+
{
|
| 35340 |
+
"epoch": 0.3749742668388449,
|
| 35341 |
+
"grad_norm": 1.4780951738357544,
|
| 35342 |
+
"learning_rate": 1.874319398702529e-08,
|
| 35343 |
+
"loss": 1.9618,
|
| 35344 |
+
"step": 5009
|
| 35345 |
+
},
|
| 35346 |
+
{
|
| 35347 |
+
"epoch": 0.37504912694402337,
|
| 35348 |
+
"grad_norm": 1.4901782274246216,
|
| 35349 |
+
"learning_rate": 1.755349431710185e-08,
|
| 35350 |
+
"loss": 2.0023,
|
| 35351 |
+
"step": 5010
|
| 35352 |
+
},
|
| 35353 |
+
{
|
| 35354 |
+
"epoch": 0.3751239870492018,
|
| 35355 |
+
"grad_norm": 1.3150880336761475,
|
| 35356 |
+
"learning_rate": 1.6402796704895196e-08,
|
| 35357 |
+
"loss": 1.5646,
|
| 35358 |
+
"step": 5011
|
| 35359 |
+
},
|
| 35360 |
+
{
|
| 35361 |
+
"epoch": 0.37519884715438023,
|
| 35362 |
+
"grad_norm": 1.3421481847763062,
|
| 35363 |
+
"learning_rate": 1.52911015992796e-08,
|
| 35364 |
+
"loss": 2.0128,
|
| 35365 |
+
"step": 5012
|
| 35366 |
+
},
|
| 35367 |
+
{
|
| 35368 |
+
"epoch": 0.3752737072595587,
|
| 35369 |
+
"grad_norm": 1.2243260145187378,
|
| 35370 |
+
"learning_rate": 1.4218409433908175e-08,
|
| 35371 |
+
"loss": 2.037,
|
| 35372 |
+
"step": 5013
|
| 35373 |
+
},
|
| 35374 |
+
{
|
| 35375 |
+
"epoch": 0.37534856736473715,
|
| 35376 |
+
"grad_norm": 1.2326892614364624,
|
| 35377 |
+
"learning_rate": 1.3184720627235081e-08,
|
| 35378 |
+
"loss": 1.8788,
|
| 35379 |
+
"step": 5014
|
| 35380 |
+
},
|
| 35381 |
+
{
|
| 35382 |
+
"epoch": 0.3754234274699156,
|
| 35383 |
+
"grad_norm": 1.4980422258377075,
|
| 35384 |
+
"learning_rate": 1.2190035582471115e-08,
|
| 35385 |
+
"loss": 2.3279,
|
| 35386 |
+
"step": 5015
|
| 35387 |
+
},
|
| 35388 |
+
{
|
| 35389 |
+
"epoch": 0.37549828757509407,
|
| 35390 |
+
"grad_norm": 1.1286108493804932,
|
| 35391 |
+
"learning_rate": 1.123435468766143e-08,
|
| 35392 |
+
"loss": 1.798,
|
| 35393 |
+
"step": 5016
|
| 35394 |
+
},
|
| 35395 |
+
{
|
| 35396 |
+
"epoch": 0.3755731476802725,
|
| 35397 |
+
"grad_norm": 1.1455106735229492,
|
| 35398 |
+
"learning_rate": 1.031767831558561e-08,
|
| 35399 |
+
"loss": 1.6733,
|
| 35400 |
+
"step": 5017
|
| 35401 |
+
},
|
| 35402 |
+
{
|
| 35403 |
+
"epoch": 0.37564800778545093,
|
| 35404 |
+
"grad_norm": 1.0452263355255127,
|
| 35405 |
+
"learning_rate": 9.44000682383539e-09,
|
| 35406 |
+
"loss": 1.8773,
|
| 35407 |
+
"step": 5018
|
| 35408 |
+
},
|
| 35409 |
+
{
|
| 35410 |
+
"epoch": 0.37572286789062936,
|
| 35411 |
+
"grad_norm": 1.4218225479125977,
|
| 35412 |
+
"learning_rate": 8.601340554781346e-09,
|
| 35413 |
+
"loss": 2.0528,
|
| 35414 |
+
"step": 5019
|
| 35415 |
+
},
|
| 35416 |
+
{
|
| 35417 |
+
"epoch": 0.37579772799580785,
|
| 35418 |
+
"grad_norm": 1.6470973491668701,
|
| 35419 |
+
"learning_rate": 7.801679835572895e-09,
|
| 35420 |
+
"loss": 1.7786,
|
| 35421 |
+
"step": 5020
|
| 35422 |
+
},
|
| 35423 |
+
{
|
| 35424 |
+
"epoch": 0.3758725881009863,
|
| 35425 |
+
"grad_norm": 1.2469727993011475,
|
| 35426 |
+
"learning_rate": 7.041024978160504e-09,
|
| 35427 |
+
"loss": 2.051,
|
| 35428 |
+
"step": 5021
|
| 35429 |
+
},
|
| 35430 |
+
{
|
| 35431 |
+
"epoch": 0.3759474482061647,
|
| 35432 |
+
"grad_norm": 1.224454641342163,
|
| 35433 |
+
"learning_rate": 6.319376279262379e-09,
|
| 35434 |
+
"loss": 2.2662,
|
| 35435 |
+
"step": 5022
|
| 35436 |
+
},
|
| 35437 |
+
{
|
| 35438 |
+
"epoch": 0.3760223083113432,
|
| 35439 |
+
"grad_norm": 1.1871840953826904,
|
| 35440 |
+
"learning_rate": 5.636734020375567e-09,
|
| 35441 |
+
"loss": 1.5023,
|
| 35442 |
+
"step": 5023
|
| 35443 |
+
},
|
| 35444 |
+
{
|
| 35445 |
+
"epoch": 0.37609716841652163,
|
| 35446 |
+
"grad_norm": 1.135377049446106,
|
| 35447 |
+
"learning_rate": 4.993098467798163e-09,
|
| 35448 |
+
"loss": 1.3566,
|
| 35449 |
+
"step": 5024
|
| 35450 |
+
},
|
| 35451 |
+
{
|
| 35452 |
+
"epoch": 0.37617202852170006,
|
| 35453 |
+
"grad_norm": 1.3303520679473877,
|
| 35454 |
+
"learning_rate": 4.388469872618206e-09,
|
| 35455 |
+
"loss": 1.3843,
|
| 35456 |
+
"step": 5025
|
| 35457 |
+
},
|
| 35458 |
+
{
|
| 35459 |
+
"epoch": 0.3762468886268785,
|
| 35460 |
+
"grad_norm": 1.462297797203064,
|
| 35461 |
+
"learning_rate": 3.822848470669272e-09,
|
| 35462 |
+
"loss": 2.2968,
|
| 35463 |
+
"step": 5026
|
| 35464 |
+
},
|
| 35465 |
+
{
|
| 35466 |
+
"epoch": 0.376321748732057,
|
| 35467 |
+
"grad_norm": 1.2747176885604858,
|
| 35468 |
+
"learning_rate": 3.296234482619287e-09,
|
| 35469 |
+
"loss": 1.7962,
|
| 35470 |
+
"step": 5027
|
| 35471 |
+
},
|
| 35472 |
+
{
|
| 35473 |
+
"epoch": 0.3763966088372354,
|
| 35474 |
+
"grad_norm": 1.284083604812622,
|
| 35475 |
+
"learning_rate": 2.8086281138706148e-09,
|
| 35476 |
+
"loss": 2.1965,
|
| 35477 |
+
"step": 5028
|
| 35478 |
+
},
|
| 35479 |
+
{
|
| 35480 |
+
"epoch": 0.37647146894241384,
|
| 35481 |
+
"grad_norm": 1.3260568380355835,
|
| 35482 |
+
"learning_rate": 2.3600295546599704e-09,
|
| 35483 |
+
"loss": 1.9976,
|
| 35484 |
+
"step": 5029
|
| 35485 |
+
},
|
| 35486 |
+
{
|
| 35487 |
+
"epoch": 0.37654632904759233,
|
| 35488 |
+
"grad_norm": 1.0794789791107178,
|
| 35489 |
+
"learning_rate": 1.950438979958502e-09,
|
| 35490 |
+
"loss": 1.7483,
|
| 35491 |
+
"step": 5030
|
| 35492 |
+
},
|
| 35493 |
+
{
|
| 35494 |
+
"epoch": 0.37662118915277076,
|
| 35495 |
+
"grad_norm": 1.2783838510513306,
|
| 35496 |
+
"learning_rate": 1.5798565495495076e-09,
|
| 35497 |
+
"loss": 1.5004,
|
| 35498 |
+
"step": 5031
|
| 35499 |
+
},
|
| 35500 |
+
{
|
| 35501 |
+
"epoch": 0.3766960492579492,
|
| 35502 |
+
"grad_norm": 1.3007869720458984,
|
| 35503 |
+
"learning_rate": 1.2482824079951271e-09,
|
| 35504 |
+
"loss": 2.0624,
|
| 35505 |
+
"step": 5032
|
| 35506 |
+
},
|
| 35507 |
+
{
|
| 35508 |
+
"epoch": 0.3767709093631277,
|
| 35509 |
+
"grad_norm": 1.372740387916565,
|
| 35510 |
+
"learning_rate": 9.55716684636343e-10,
|
| 35511 |
+
"loss": 1.866,
|
| 35512 |
+
"step": 5033
|
| 35513 |
+
},
|
| 35514 |
+
{
|
| 35515 |
+
"epoch": 0.3768457694683061,
|
| 35516 |
+
"grad_norm": 1.371090054512024,
|
| 35517 |
+
"learning_rate": 7.021594936040821e-10,
|
| 35518 |
+
"loss": 1.9814,
|
| 35519 |
+
"step": 5034
|
| 35520 |
+
},
|
| 35521 |
+
{
|
| 35522 |
+
"epoch": 0.37692062957348454,
|
| 35523 |
+
"grad_norm": 1.1047563552856445,
|
| 35524 |
+
"learning_rate": 4.876109338081137e-10,
|
| 35525 |
+
"loss": 1.2821,
|
| 35526 |
+
"step": 5035
|
| 35527 |
+
},
|
| 35528 |
+
{
|
| 35529 |
+
"epoch": 0.376995489678663,
|
| 35530 |
+
"grad_norm": 1.2861955165863037,
|
| 35531 |
+
"learning_rate": 3.1207108893704927e-10,
|
| 35532 |
+
"loss": 1.6591,
|
| 35533 |
+
"step": 5036
|
| 35534 |
+
},
|
| 35535 |
+
{
|
| 35536 |
+
"epoch": 0.37707034978384146,
|
| 35537 |
+
"grad_norm": 1.1517539024353027,
|
| 35538 |
+
"learning_rate": 1.755400274694452e-10,
|
| 35539 |
+
"loss": 1.8742,
|
| 35540 |
+
"step": 5037
|
| 35541 |
+
},
|
| 35542 |
+
{
|
| 35543 |
+
"epoch": 0.3771452098890199,
|
| 35544 |
+
"grad_norm": 1.1889746189117432,
|
| 35545 |
+
"learning_rate": 7.80178026738021e-11,
|
| 35546 |
+
"loss": 1.9279,
|
| 35547 |
+
"step": 5038
|
| 35548 |
+
},
|
| 35549 |
+
{
|
| 35550 |
+
"epoch": 0.3772200699941983,
|
| 35551 |
+
"grad_norm": 1.262064814567566,
|
| 35552 |
+
"learning_rate": 1.950445256415634e-11,
|
| 35553 |
+
"loss": 1.9269,
|
| 35554 |
+
"step": 5039
|
| 35555 |
+
},
|
| 35556 |
+
{
|
| 35557 |
+
"epoch": 0.3772949300993768,
|
| 35558 |
+
"grad_norm": 1.4935128688812256,
|
| 35559 |
+
"learning_rate": 0.0,
|
| 35560 |
+
"loss": 1.9802,
|
| 35561 |
+
"step": 5040
|
| 35562 |
}
|
| 35563 |
],
|
| 35564 |
"logging_steps": 1,
|
|
|
|
| 35582 |
"should_evaluate": false,
|
| 35583 |
"should_log": false,
|
| 35584 |
"should_save": true,
|
| 35585 |
+
"should_training_stop": true
|
| 35586 |
},
|
| 35587 |
"attributes": {}
|
| 35588 |
}
|
| 35589 |
},
|
| 35590 |
+
"total_flos": 3.431234782573363e+17,
|
| 35591 |
"train_batch_size": 2,
|
| 35592 |
"trial_name": null,
|
| 35593 |
"trial_params": null
|