Training in progress, step 1632, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1001465824
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:606d941368e5e908892cdca2bf257777d105ccc15776c27e030aec94010ad52d
|
| 3 |
size 1001465824
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 509177556
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef54f481cfc55831d617d32ccd8c24e189c729922ed51bd0537d3a277e97f15c
|
| 3 |
size 509177556
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:376696ebf418aee8f471368726d03dcc0387f0576fac91edfc5c11f9c75144b0
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b37b8068d6bd2408ba2f4eb1ccb714af64ff240faeeda5f3beb7fcbb8af5f1bd
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.5886463522911072,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1500",
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -10595,6 +10595,930 @@
|
|
| 10595 |
"eval_samples_per_second": 5.325,
|
| 10596 |
"eval_steps_per_second": 2.663,
|
| 10597 |
"step": 1500
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10598 |
}
|
| 10599 |
],
|
| 10600 |
"logging_steps": 1,
|
|
@@ -10618,12 +11542,12 @@
|
|
| 10618 |
"should_evaluate": false,
|
| 10619 |
"should_log": false,
|
| 10620 |
"should_save": true,
|
| 10621 |
-
"should_training_stop":
|
| 10622 |
},
|
| 10623 |
"attributes": {}
|
| 10624 |
}
|
| 10625 |
},
|
| 10626 |
-
"total_flos": 1.
|
| 10627 |
"train_batch_size": 2,
|
| 10628 |
"trial_name": null,
|
| 10629 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.5886463522911072,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-1500",
|
| 4 |
+
"epoch": 0.5580918184149782,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 1632,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 10595 |
"eval_samples_per_second": 5.325,
|
| 10596 |
"eval_steps_per_second": 2.663,
|
| 10597 |
"step": 1500
|
| 10598 |
+
},
|
| 10599 |
+
{
|
| 10600 |
+
"epoch": 0.5132940070103446,
|
| 10601 |
+
"grad_norm": 0.3836262822151184,
|
| 10602 |
+
"learning_rate": 3.2016912977318236e-06,
|
| 10603 |
+
"loss": 0.669,
|
| 10604 |
+
"step": 1501
|
| 10605 |
+
},
|
| 10606 |
+
{
|
| 10607 |
+
"epoch": 0.5136359750363341,
|
| 10608 |
+
"grad_norm": 0.4739679992198944,
|
| 10609 |
+
"learning_rate": 3.1532546324110957e-06,
|
| 10610 |
+
"loss": 0.712,
|
| 10611 |
+
"step": 1502
|
| 10612 |
+
},
|
| 10613 |
+
{
|
| 10614 |
+
"epoch": 0.5139779430623237,
|
| 10615 |
+
"grad_norm": 0.45204055309295654,
|
| 10616 |
+
"learning_rate": 3.10518128176539e-06,
|
| 10617 |
+
"loss": 0.6022,
|
| 10618 |
+
"step": 1503
|
| 10619 |
+
},
|
| 10620 |
+
{
|
| 10621 |
+
"epoch": 0.5143199110883132,
|
| 10622 |
+
"grad_norm": 0.30763939023017883,
|
| 10623 |
+
"learning_rate": 3.057471426138958e-06,
|
| 10624 |
+
"loss": 0.5936,
|
| 10625 |
+
"step": 1504
|
| 10626 |
+
},
|
| 10627 |
+
{
|
| 10628 |
+
"epoch": 0.5146618791143028,
|
| 10629 |
+
"grad_norm": 0.3767003118991852,
|
| 10630 |
+
"learning_rate": 3.0101252445124404e-06,
|
| 10631 |
+
"loss": 0.6348,
|
| 10632 |
+
"step": 1505
|
| 10633 |
+
},
|
| 10634 |
+
{
|
| 10635 |
+
"epoch": 0.5150038471402923,
|
| 10636 |
+
"grad_norm": 0.41161495447158813,
|
| 10637 |
+
"learning_rate": 2.9631429145021373e-06,
|
| 10638 |
+
"loss": 0.5398,
|
| 10639 |
+
"step": 1506
|
| 10640 |
+
},
|
| 10641 |
+
{
|
| 10642 |
+
"epoch": 0.515345815166282,
|
| 10643 |
+
"grad_norm": 0.3816090226173401,
|
| 10644 |
+
"learning_rate": 2.9165246123594057e-06,
|
| 10645 |
+
"loss": 0.5449,
|
| 10646 |
+
"step": 1507
|
| 10647 |
+
},
|
| 10648 |
+
{
|
| 10649 |
+
"epoch": 0.5156877831922715,
|
| 10650 |
+
"grad_norm": 0.4627479612827301,
|
| 10651 |
+
"learning_rate": 2.8702705129699858e-06,
|
| 10652 |
+
"loss": 0.5231,
|
| 10653 |
+
"step": 1508
|
| 10654 |
+
},
|
| 10655 |
+
{
|
| 10656 |
+
"epoch": 0.5160297512182611,
|
| 10657 |
+
"grad_norm": 0.4261241853237152,
|
| 10658 |
+
"learning_rate": 2.824380789853287e-06,
|
| 10659 |
+
"loss": 0.5188,
|
| 10660 |
+
"step": 1509
|
| 10661 |
+
},
|
| 10662 |
+
{
|
| 10663 |
+
"epoch": 0.5163717192442506,
|
| 10664 |
+
"grad_norm": 0.2956761419773102,
|
| 10665 |
+
"learning_rate": 2.778855615161846e-06,
|
| 10666 |
+
"loss": 0.4516,
|
| 10667 |
+
"step": 1510
|
| 10668 |
+
},
|
| 10669 |
+
{
|
| 10670 |
+
"epoch": 0.5167136872702403,
|
| 10671 |
+
"grad_norm": 0.4120533764362335,
|
| 10672 |
+
"learning_rate": 2.7336951596805826e-06,
|
| 10673 |
+
"loss": 0.3968,
|
| 10674 |
+
"step": 1511
|
| 10675 |
+
},
|
| 10676 |
+
{
|
| 10677 |
+
"epoch": 0.5170556552962298,
|
| 10678 |
+
"grad_norm": 0.49912378191947937,
|
| 10679 |
+
"learning_rate": 2.6888995928261993e-06,
|
| 10680 |
+
"loss": 0.5095,
|
| 10681 |
+
"step": 1512
|
| 10682 |
+
},
|
| 10683 |
+
{
|
| 10684 |
+
"epoch": 0.5173976233222194,
|
| 10685 |
+
"grad_norm": 0.37485402822494507,
|
| 10686 |
+
"learning_rate": 2.6444690826465167e-06,
|
| 10687 |
+
"loss": 0.6082,
|
| 10688 |
+
"step": 1513
|
| 10689 |
+
},
|
| 10690 |
+
{
|
| 10691 |
+
"epoch": 0.5177395913482089,
|
| 10692 |
+
"grad_norm": 0.4066256880760193,
|
| 10693 |
+
"learning_rate": 2.600403795819917e-06,
|
| 10694 |
+
"loss": 0.5236,
|
| 10695 |
+
"step": 1514
|
| 10696 |
+
},
|
| 10697 |
+
{
|
| 10698 |
+
"epoch": 0.5180815593741985,
|
| 10699 |
+
"grad_norm": 0.31456810235977173,
|
| 10700 |
+
"learning_rate": 2.556703897654622e-06,
|
| 10701 |
+
"loss": 0.5165,
|
| 10702 |
+
"step": 1515
|
| 10703 |
+
},
|
| 10704 |
+
{
|
| 10705 |
+
"epoch": 0.518423527400188,
|
| 10706 |
+
"grad_norm": 0.6235669851303101,
|
| 10707 |
+
"learning_rate": 2.5133695520881516e-06,
|
| 10708 |
+
"loss": 0.6441,
|
| 10709 |
+
"step": 1516
|
| 10710 |
+
},
|
| 10711 |
+
{
|
| 10712 |
+
"epoch": 0.5187654954261777,
|
| 10713 |
+
"grad_norm": 0.29173654317855835,
|
| 10714 |
+
"learning_rate": 2.4704009216866887e-06,
|
| 10715 |
+
"loss": 0.5327,
|
| 10716 |
+
"step": 1517
|
| 10717 |
+
},
|
| 10718 |
+
{
|
| 10719 |
+
"epoch": 0.5191074634521672,
|
| 10720 |
+
"grad_norm": 0.4295627176761627,
|
| 10721 |
+
"learning_rate": 2.4277981676444016e-06,
|
| 10722 |
+
"loss": 0.5674,
|
| 10723 |
+
"step": 1518
|
| 10724 |
+
},
|
| 10725 |
+
{
|
| 10726 |
+
"epoch": 0.5194494314781568,
|
| 10727 |
+
"grad_norm": 0.6146750450134277,
|
| 10728 |
+
"learning_rate": 2.385561449782958e-06,
|
| 10729 |
+
"loss": 0.5313,
|
| 10730 |
+
"step": 1519
|
| 10731 |
+
},
|
| 10732 |
+
{
|
| 10733 |
+
"epoch": 0.5197913995041463,
|
| 10734 |
+
"grad_norm": 0.30539125204086304,
|
| 10735 |
+
"learning_rate": 2.3436909265508234e-06,
|
| 10736 |
+
"loss": 0.4175,
|
| 10737 |
+
"step": 1520
|
| 10738 |
+
},
|
| 10739 |
+
{
|
| 10740 |
+
"epoch": 0.520133367530136,
|
| 10741 |
+
"grad_norm": 0.3613576889038086,
|
| 10742 |
+
"learning_rate": 2.302186755022728e-06,
|
| 10743 |
+
"loss": 0.4297,
|
| 10744 |
+
"step": 1521
|
| 10745 |
+
},
|
| 10746 |
+
{
|
| 10747 |
+
"epoch": 0.5204753355561255,
|
| 10748 |
+
"grad_norm": 0.45236149430274963,
|
| 10749 |
+
"learning_rate": 2.2610490908990366e-06,
|
| 10750 |
+
"loss": 0.593,
|
| 10751 |
+
"step": 1522
|
| 10752 |
+
},
|
| 10753 |
+
{
|
| 10754 |
+
"epoch": 0.5208173035821151,
|
| 10755 |
+
"grad_norm": 0.4872044622898102,
|
| 10756 |
+
"learning_rate": 2.2202780885052012e-06,
|
| 10757 |
+
"loss": 0.572,
|
| 10758 |
+
"step": 1523
|
| 10759 |
+
},
|
| 10760 |
+
{
|
| 10761 |
+
"epoch": 0.5211592716081046,
|
| 10762 |
+
"grad_norm": 0.35270437598228455,
|
| 10763 |
+
"learning_rate": 2.179873900791152e-06,
|
| 10764 |
+
"loss": 0.5829,
|
| 10765 |
+
"step": 1524
|
| 10766 |
+
},
|
| 10767 |
+
{
|
| 10768 |
+
"epoch": 0.5215012396340942,
|
| 10769 |
+
"grad_norm": 0.36061957478523254,
|
| 10770 |
+
"learning_rate": 2.1398366793307312e-06,
|
| 10771 |
+
"loss": 0.469,
|
| 10772 |
+
"step": 1525
|
| 10773 |
+
},
|
| 10774 |
+
{
|
| 10775 |
+
"epoch": 0.5218432076600837,
|
| 10776 |
+
"grad_norm": 0.5612218976020813,
|
| 10777 |
+
"learning_rate": 2.100166574321172e-06,
|
| 10778 |
+
"loss": 0.6602,
|
| 10779 |
+
"step": 1526
|
| 10780 |
+
},
|
| 10781 |
+
{
|
| 10782 |
+
"epoch": 0.5221851756860734,
|
| 10783 |
+
"grad_norm": 0.36930084228515625,
|
| 10784 |
+
"learning_rate": 2.060863734582441e-06,
|
| 10785 |
+
"loss": 0.4228,
|
| 10786 |
+
"step": 1527
|
| 10787 |
+
},
|
| 10788 |
+
{
|
| 10789 |
+
"epoch": 0.5225271437120629,
|
| 10790 |
+
"grad_norm": 0.360594779253006,
|
| 10791 |
+
"learning_rate": 2.021928307556742e-06,
|
| 10792 |
+
"loss": 0.4865,
|
| 10793 |
+
"step": 1528
|
| 10794 |
+
},
|
| 10795 |
+
{
|
| 10796 |
+
"epoch": 0.5228691117380525,
|
| 10797 |
+
"grad_norm": 0.3428085148334503,
|
| 10798 |
+
"learning_rate": 1.9833604393079796e-06,
|
| 10799 |
+
"loss": 0.4919,
|
| 10800 |
+
"step": 1529
|
| 10801 |
+
},
|
| 10802 |
+
{
|
| 10803 |
+
"epoch": 0.523211079764042,
|
| 10804 |
+
"grad_norm": 0.4224722385406494,
|
| 10805 |
+
"learning_rate": 1.9451602745211515e-06,
|
| 10806 |
+
"loss": 0.5716,
|
| 10807 |
+
"step": 1530
|
| 10808 |
+
},
|
| 10809 |
+
{
|
| 10810 |
+
"epoch": 0.5235530477900316,
|
| 10811 |
+
"grad_norm": 0.44536998867988586,
|
| 10812 |
+
"learning_rate": 1.90732795650187e-06,
|
| 10813 |
+
"loss": 0.5473,
|
| 10814 |
+
"step": 1531
|
| 10815 |
+
},
|
| 10816 |
+
{
|
| 10817 |
+
"epoch": 0.5238950158160212,
|
| 10818 |
+
"grad_norm": 0.4344530701637268,
|
| 10819 |
+
"learning_rate": 1.8698636271757608e-06,
|
| 10820 |
+
"loss": 0.6324,
|
| 10821 |
+
"step": 1532
|
| 10822 |
+
},
|
| 10823 |
+
{
|
| 10824 |
+
"epoch": 0.5242369838420108,
|
| 10825 |
+
"grad_norm": 0.3958171308040619,
|
| 10826 |
+
"learning_rate": 1.8327674270879669e-06,
|
| 10827 |
+
"loss": 0.5902,
|
| 10828 |
+
"step": 1533
|
| 10829 |
+
},
|
| 10830 |
+
{
|
| 10831 |
+
"epoch": 0.5245789518680003,
|
| 10832 |
+
"grad_norm": 0.32088443636894226,
|
| 10833 |
+
"learning_rate": 1.796039495402646e-06,
|
| 10834 |
+
"loss": 0.4433,
|
| 10835 |
+
"step": 1534
|
| 10836 |
+
},
|
| 10837 |
+
{
|
| 10838 |
+
"epoch": 0.5249209198939899,
|
| 10839 |
+
"grad_norm": 0.36691734194755554,
|
| 10840 |
+
"learning_rate": 1.7596799699023947e-06,
|
| 10841 |
+
"loss": 0.5477,
|
| 10842 |
+
"step": 1535
|
| 10843 |
+
},
|
| 10844 |
+
{
|
| 10845 |
+
"epoch": 0.5252628879199794,
|
| 10846 |
+
"grad_norm": 0.38840222358703613,
|
| 10847 |
+
"learning_rate": 1.7236889869877592e-06,
|
| 10848 |
+
"loss": 0.5328,
|
| 10849 |
+
"step": 1536
|
| 10850 |
+
},
|
| 10851 |
+
{
|
| 10852 |
+
"epoch": 0.5256048559459691,
|
| 10853 |
+
"grad_norm": 0.4019063711166382,
|
| 10854 |
+
"learning_rate": 1.6880666816767255e-06,
|
| 10855 |
+
"loss": 0.4749,
|
| 10856 |
+
"step": 1537
|
| 10857 |
+
},
|
| 10858 |
+
{
|
| 10859 |
+
"epoch": 0.5259468239719586,
|
| 10860 |
+
"grad_norm": 0.44313663244247437,
|
| 10861 |
+
"learning_rate": 1.6528131876042186e-06,
|
| 10862 |
+
"loss": 0.5415,
|
| 10863 |
+
"step": 1538
|
| 10864 |
+
},
|
| 10865 |
+
{
|
| 10866 |
+
"epoch": 0.5262887919979482,
|
| 10867 |
+
"grad_norm": 0.40111416578292847,
|
| 10868 |
+
"learning_rate": 1.6179286370215485e-06,
|
| 10869 |
+
"loss": 0.586,
|
| 10870 |
+
"step": 1539
|
| 10871 |
+
},
|
| 10872 |
+
{
|
| 10873 |
+
"epoch": 0.5266307600239377,
|
| 10874 |
+
"grad_norm": 0.4282727539539337,
|
| 10875 |
+
"learning_rate": 1.583413160795999e-06,
|
| 10876 |
+
"loss": 0.7398,
|
| 10877 |
+
"step": 1540
|
| 10878 |
+
},
|
| 10879 |
+
{
|
| 10880 |
+
"epoch": 0.5269727280499273,
|
| 10881 |
+
"grad_norm": 0.44338613748550415,
|
| 10882 |
+
"learning_rate": 1.5492668884102834e-06,
|
| 10883 |
+
"loss": 0.6117,
|
| 10884 |
+
"step": 1541
|
| 10885 |
+
},
|
| 10886 |
+
{
|
| 10887 |
+
"epoch": 0.5273146960759169,
|
| 10888 |
+
"grad_norm": 0.4959453344345093,
|
| 10889 |
+
"learning_rate": 1.5154899479620676e-06,
|
| 10890 |
+
"loss": 0.8058,
|
| 10891 |
+
"step": 1542
|
| 10892 |
+
},
|
| 10893 |
+
{
|
| 10894 |
+
"epoch": 0.5276566641019065,
|
| 10895 |
+
"grad_norm": 0.49569228291511536,
|
| 10896 |
+
"learning_rate": 1.4820824661634813e-06,
|
| 10897 |
+
"loss": 0.6977,
|
| 10898 |
+
"step": 1543
|
| 10899 |
+
},
|
| 10900 |
+
{
|
| 10901 |
+
"epoch": 0.527998632127896,
|
| 10902 |
+
"grad_norm": 0.38664352893829346,
|
| 10903 |
+
"learning_rate": 1.4490445683406628e-06,
|
| 10904 |
+
"loss": 0.5547,
|
| 10905 |
+
"step": 1544
|
| 10906 |
+
},
|
| 10907 |
+
{
|
| 10908 |
+
"epoch": 0.5283406001538856,
|
| 10909 |
+
"grad_norm": 0.29265400767326355,
|
| 10910 |
+
"learning_rate": 1.4163763784332817e-06,
|
| 10911 |
+
"loss": 0.3879,
|
| 10912 |
+
"step": 1545
|
| 10913 |
+
},
|
| 10914 |
+
{
|
| 10915 |
+
"epoch": 0.5286825681798751,
|
| 10916 |
+
"grad_norm": 0.3284684419631958,
|
| 10917 |
+
"learning_rate": 1.3840780189940504e-06,
|
| 10918 |
+
"loss": 0.4904,
|
| 10919 |
+
"step": 1546
|
| 10920 |
+
},
|
| 10921 |
+
{
|
| 10922 |
+
"epoch": 0.5290245362058648,
|
| 10923 |
+
"grad_norm": 0.38930264115333557,
|
| 10924 |
+
"learning_rate": 1.3521496111883136e-06,
|
| 10925 |
+
"loss": 0.5106,
|
| 10926 |
+
"step": 1547
|
| 10927 |
+
},
|
| 10928 |
+
{
|
| 10929 |
+
"epoch": 0.5293665042318543,
|
| 10930 |
+
"grad_norm": 0.323243647813797,
|
| 10931 |
+
"learning_rate": 1.320591274793559e-06,
|
| 10932 |
+
"loss": 0.4943,
|
| 10933 |
+
"step": 1548
|
| 10934 |
+
},
|
| 10935 |
+
{
|
| 10936 |
+
"epoch": 0.5297084722578439,
|
| 10937 |
+
"grad_norm": 0.35708367824554443,
|
| 10938 |
+
"learning_rate": 1.2894031281989515e-06,
|
| 10939 |
+
"loss": 0.6117,
|
| 10940 |
+
"step": 1549
|
| 10941 |
+
},
|
| 10942 |
+
{
|
| 10943 |
+
"epoch": 0.5300504402838334,
|
| 10944 |
+
"grad_norm": 0.3199447989463806,
|
| 10945 |
+
"learning_rate": 1.2585852884049344e-06,
|
| 10946 |
+
"loss": 0.4818,
|
| 10947 |
+
"step": 1550
|
| 10948 |
+
},
|
| 10949 |
+
{
|
| 10950 |
+
"epoch": 0.530392408309823,
|
| 10951 |
+
"grad_norm": 0.4739510715007782,
|
| 10952 |
+
"learning_rate": 1.2281378710227608e-06,
|
| 10953 |
+
"loss": 0.5472,
|
| 10954 |
+
"step": 1551
|
| 10955 |
+
},
|
| 10956 |
+
{
|
| 10957 |
+
"epoch": 0.5307343763358126,
|
| 10958 |
+
"grad_norm": 0.30855345726013184,
|
| 10959 |
+
"learning_rate": 1.1980609902740414e-06,
|
| 10960 |
+
"loss": 0.4238,
|
| 10961 |
+
"step": 1552
|
| 10962 |
+
},
|
| 10963 |
+
{
|
| 10964 |
+
"epoch": 0.5310763443618022,
|
| 10965 |
+
"grad_norm": 0.32831913232803345,
|
| 10966 |
+
"learning_rate": 1.1683547589903976e-06,
|
| 10967 |
+
"loss": 0.5575,
|
| 10968 |
+
"step": 1553
|
| 10969 |
+
},
|
| 10970 |
+
{
|
| 10971 |
+
"epoch": 0.5314183123877917,
|
| 10972 |
+
"grad_norm": 0.3389270305633545,
|
| 10973 |
+
"learning_rate": 1.1390192886129303e-06,
|
| 10974 |
+
"loss": 0.6786,
|
| 10975 |
+
"step": 1554
|
| 10976 |
+
},
|
| 10977 |
+
{
|
| 10978 |
+
"epoch": 0.5317602804137813,
|
| 10979 |
+
"grad_norm": 0.3983975052833557,
|
| 10980 |
+
"learning_rate": 1.110054689191875e-06,
|
| 10981 |
+
"loss": 0.6757,
|
| 10982 |
+
"step": 1555
|
| 10983 |
+
},
|
| 10984 |
+
{
|
| 10985 |
+
"epoch": 0.5321022484397708,
|
| 10986 |
+
"grad_norm": 0.41553863883018494,
|
| 10987 |
+
"learning_rate": 1.0814610693861694e-06,
|
| 10988 |
+
"loss": 0.5852,
|
| 10989 |
+
"step": 1556
|
| 10990 |
+
},
|
| 10991 |
+
{
|
| 10992 |
+
"epoch": 0.5324442164657605,
|
| 10993 |
+
"grad_norm": 0.40326887369155884,
|
| 10994 |
+
"learning_rate": 1.0532385364630415e-06,
|
| 10995 |
+
"loss": 0.5035,
|
| 10996 |
+
"step": 1557
|
| 10997 |
+
},
|
| 10998 |
+
{
|
| 10999 |
+
"epoch": 0.53278618449175,
|
| 11000 |
+
"grad_norm": 0.5164569616317749,
|
| 11001 |
+
"learning_rate": 1.0253871962976003e-06,
|
| 11002 |
+
"loss": 0.6062,
|
| 11003 |
+
"step": 1558
|
| 11004 |
+
},
|
| 11005 |
+
{
|
| 11006 |
+
"epoch": 0.5331281525177396,
|
| 11007 |
+
"grad_norm": 0.3135557472705841,
|
| 11008 |
+
"learning_rate": 9.979071533724681e-07,
|
| 11009 |
+
"loss": 0.586,
|
| 11010 |
+
"step": 1559
|
| 11011 |
+
},
|
| 11012 |
+
{
|
| 11013 |
+
"epoch": 0.5334701205437291,
|
| 11014 |
+
"grad_norm": 0.41096481680870056,
|
| 11015 |
+
"learning_rate": 9.707985107773266e-07,
|
| 11016 |
+
"loss": 0.4761,
|
| 11017 |
+
"step": 1560
|
| 11018 |
+
},
|
| 11019 |
+
{
|
| 11020 |
+
"epoch": 0.5338120885697187,
|
| 11021 |
+
"grad_norm": 0.3477085828781128,
|
| 11022 |
+
"learning_rate": 9.440613702086265e-07,
|
| 11023 |
+
"loss": 0.4954,
|
| 11024 |
+
"step": 1561
|
| 11025 |
+
},
|
| 11026 |
+
{
|
| 11027 |
+
"epoch": 0.5341540565957082,
|
| 11028 |
+
"grad_norm": 0.40936562418937683,
|
| 11029 |
+
"learning_rate": 9.176958319691009e-07,
|
| 11030 |
+
"loss": 0.5037,
|
| 11031 |
+
"step": 1562
|
| 11032 |
+
},
|
| 11033 |
+
{
|
| 11034 |
+
"epoch": 0.5344960246216979,
|
| 11035 |
+
"grad_norm": 0.4185855984687805,
|
| 11036 |
+
"learning_rate": 8.917019949674977e-07,
|
| 11037 |
+
"loss": 0.5665,
|
| 11038 |
+
"step": 1563
|
| 11039 |
+
},
|
| 11040 |
+
{
|
| 11041 |
+
"epoch": 0.5348379926476874,
|
| 11042 |
+
"grad_norm": 0.43649426102638245,
|
| 11043 |
+
"learning_rate": 8.6607995671808e-07,
|
| 11044 |
+
"loss": 0.525,
|
| 11045 |
+
"step": 1564
|
| 11046 |
+
},
|
| 11047 |
+
{
|
| 11048 |
+
"epoch": 0.535179960673677,
|
| 11049 |
+
"grad_norm": 0.3299812972545624,
|
| 11050 |
+
"learning_rate": 8.408298133403824e-07,
|
| 11051 |
+
"loss": 0.5086,
|
| 11052 |
+
"step": 1565
|
| 11053 |
+
},
|
| 11054 |
+
{
|
| 11055 |
+
"epoch": 0.5355219286996665,
|
| 11056 |
+
"grad_norm": 0.6007355451583862,
|
| 11057 |
+
"learning_rate": 8.15951659558778e-07,
|
| 11058 |
+
"loss": 0.5678,
|
| 11059 |
+
"step": 1566
|
| 11060 |
+
},
|
| 11061 |
+
{
|
| 11062 |
+
"epoch": 0.5358638967256562,
|
| 11063 |
+
"grad_norm": 0.3821653425693512,
|
| 11064 |
+
"learning_rate": 7.914455887021554e-07,
|
| 11065 |
+
"loss": 0.4984,
|
| 11066 |
+
"step": 1567
|
| 11067 |
+
},
|
| 11068 |
+
{
|
| 11069 |
+
"epoch": 0.5362058647516457,
|
| 11070 |
+
"grad_norm": 0.39136579632759094,
|
| 11071 |
+
"learning_rate": 7.673116927035318e-07,
|
| 11072 |
+
"loss": 0.4864,
|
| 11073 |
+
"step": 1568
|
| 11074 |
+
},
|
| 11075 |
+
{
|
| 11076 |
+
"epoch": 0.5365478327776353,
|
| 11077 |
+
"grad_norm": 0.382460355758667,
|
| 11078 |
+
"learning_rate": 7.435500620997738e-07,
|
| 11079 |
+
"loss": 0.566,
|
| 11080 |
+
"step": 1569
|
| 11081 |
+
},
|
| 11082 |
+
{
|
| 11083 |
+
"epoch": 0.5368898008036248,
|
| 11084 |
+
"grad_norm": 0.4509314298629761,
|
| 11085 |
+
"learning_rate": 7.201607860311876e-07,
|
| 11086 |
+
"loss": 0.5198,
|
| 11087 |
+
"step": 1570
|
| 11088 |
+
},
|
| 11089 |
+
{
|
| 11090 |
+
"epoch": 0.5372317688296144,
|
| 11091 |
+
"grad_norm": 0.3990001380443573,
|
| 11092 |
+
"learning_rate": 6.971439522411971e-07,
|
| 11093 |
+
"loss": 0.61,
|
| 11094 |
+
"step": 1571
|
| 11095 |
+
},
|
| 11096 |
+
{
|
| 11097 |
+
"epoch": 0.5375737368556041,
|
| 11098 |
+
"grad_norm": 0.43781065940856934,
|
| 11099 |
+
"learning_rate": 6.744996470760767e-07,
|
| 11100 |
+
"loss": 0.5947,
|
| 11101 |
+
"step": 1572
|
| 11102 |
+
},
|
| 11103 |
+
{
|
| 11104 |
+
"epoch": 0.5379157048815936,
|
| 11105 |
+
"grad_norm": 0.33890876173973083,
|
| 11106 |
+
"learning_rate": 6.522279554845522e-07,
|
| 11107 |
+
"loss": 0.4811,
|
| 11108 |
+
"step": 1573
|
| 11109 |
+
},
|
| 11110 |
+
{
|
| 11111 |
+
"epoch": 0.5382576729075832,
|
| 11112 |
+
"grad_norm": 0.38705286383628845,
|
| 11113 |
+
"learning_rate": 6.303289610175234e-07,
|
| 11114 |
+
"loss": 0.6389,
|
| 11115 |
+
"step": 1574
|
| 11116 |
+
},
|
| 11117 |
+
{
|
| 11118 |
+
"epoch": 0.5385996409335727,
|
| 11119 |
+
"grad_norm": 0.35131290555000305,
|
| 11120 |
+
"learning_rate": 6.088027458277412e-07,
|
| 11121 |
+
"loss": 0.4819,
|
| 11122 |
+
"step": 1575
|
| 11123 |
+
},
|
| 11124 |
+
{
|
| 11125 |
+
"epoch": 0.5389416089595623,
|
| 11126 |
+
"grad_norm": 0.4599623382091522,
|
| 11127 |
+
"learning_rate": 5.876493906694758e-07,
|
| 11128 |
+
"loss": 0.5589,
|
| 11129 |
+
"step": 1576
|
| 11130 |
+
},
|
| 11131 |
+
{
|
| 11132 |
+
"epoch": 0.5392835769855518,
|
| 11133 |
+
"grad_norm": 0.39687713980674744,
|
| 11134 |
+
"learning_rate": 5.668689748982714e-07,
|
| 11135 |
+
"loss": 0.5568,
|
| 11136 |
+
"step": 1577
|
| 11137 |
+
},
|
| 11138 |
+
{
|
| 11139 |
+
"epoch": 0.5396255450115415,
|
| 11140 |
+
"grad_norm": 0.3937240242958069,
|
| 11141 |
+
"learning_rate": 5.464615764705805e-07,
|
| 11142 |
+
"loss": 0.5183,
|
| 11143 |
+
"step": 1578
|
| 11144 |
+
},
|
| 11145 |
+
{
|
| 11146 |
+
"epoch": 0.539967513037531,
|
| 11147 |
+
"grad_norm": 0.4357829689979553,
|
| 11148 |
+
"learning_rate": 5.264272719435193e-07,
|
| 11149 |
+
"loss": 0.4666,
|
| 11150 |
+
"step": 1579
|
| 11151 |
+
},
|
| 11152 |
+
{
|
| 11153 |
+
"epoch": 0.5403094810635206,
|
| 11154 |
+
"grad_norm": 0.5023972392082214,
|
| 11155 |
+
"learning_rate": 5.06766136474579e-07,
|
| 11156 |
+
"loss": 0.5945,
|
| 11157 |
+
"step": 1580
|
| 11158 |
+
},
|
| 11159 |
+
{
|
| 11160 |
+
"epoch": 0.5406514490895101,
|
| 11161 |
+
"grad_norm": 0.43519163131713867,
|
| 11162 |
+
"learning_rate": 4.874782438212821e-07,
|
| 11163 |
+
"loss": 0.4115,
|
| 11164 |
+
"step": 1581
|
| 11165 |
+
},
|
| 11166 |
+
{
|
| 11167 |
+
"epoch": 0.5409934171154998,
|
| 11168 |
+
"grad_norm": 0.33164143562316895,
|
| 11169 |
+
"learning_rate": 4.685636663410153e-07,
|
| 11170 |
+
"loss": 0.3942,
|
| 11171 |
+
"step": 1582
|
| 11172 |
+
},
|
| 11173 |
+
{
|
| 11174 |
+
"epoch": 0.5413353851414893,
|
| 11175 |
+
"grad_norm": 0.3539201319217682,
|
| 11176 |
+
"learning_rate": 4.500224749906412e-07,
|
| 11177 |
+
"loss": 0.4733,
|
| 11178 |
+
"step": 1583
|
| 11179 |
+
},
|
| 11180 |
+
{
|
| 11181 |
+
"epoch": 0.5416773531674789,
|
| 11182 |
+
"grad_norm": 0.39287203550338745,
|
| 11183 |
+
"learning_rate": 4.318547393263317e-07,
|
| 11184 |
+
"loss": 0.5512,
|
| 11185 |
+
"step": 1584
|
| 11186 |
+
},
|
| 11187 |
+
{
|
| 11188 |
+
"epoch": 0.5420193211934684,
|
| 11189 |
+
"grad_norm": 0.3794853985309601,
|
| 11190 |
+
"learning_rate": 4.14060527503235e-07,
|
| 11191 |
+
"loss": 0.6805,
|
| 11192 |
+
"step": 1585
|
| 11193 |
+
},
|
| 11194 |
+
{
|
| 11195 |
+
"epoch": 0.542361289219458,
|
| 11196 |
+
"grad_norm": 0.483906626701355,
|
| 11197 |
+
"learning_rate": 3.966399062752535e-07,
|
| 11198 |
+
"loss": 0.4676,
|
| 11199 |
+
"step": 1586
|
| 11200 |
+
},
|
| 11201 |
+
{
|
| 11202 |
+
"epoch": 0.5427032572454475,
|
| 11203 |
+
"grad_norm": 0.3124162554740906,
|
| 11204 |
+
"learning_rate": 3.7959294099477735e-07,
|
| 11205 |
+
"loss": 0.362,
|
| 11206 |
+
"step": 1587
|
| 11207 |
+
},
|
| 11208 |
+
{
|
| 11209 |
+
"epoch": 0.5430452252714372,
|
| 11210 |
+
"grad_norm": 0.33538541197776794,
|
| 11211 |
+
"learning_rate": 3.6291969561248474e-07,
|
| 11212 |
+
"loss": 0.5323,
|
| 11213 |
+
"step": 1588
|
| 11214 |
+
},
|
| 11215 |
+
{
|
| 11216 |
+
"epoch": 0.5433871932974267,
|
| 11217 |
+
"grad_norm": 0.6162832975387573,
|
| 11218 |
+
"learning_rate": 3.466202326770196e-07,
|
| 11219 |
+
"loss": 0.4226,
|
| 11220 |
+
"step": 1589
|
| 11221 |
+
},
|
| 11222 |
+
{
|
| 11223 |
+
"epoch": 0.5437291613234163,
|
| 11224 |
+
"grad_norm": 0.4205836057662964,
|
| 11225 |
+
"learning_rate": 3.3069461333483653e-07,
|
| 11226 |
+
"loss": 0.6923,
|
| 11227 |
+
"step": 1590
|
| 11228 |
+
},
|
| 11229 |
+
{
|
| 11230 |
+
"epoch": 0.5440711293494058,
|
| 11231 |
+
"grad_norm": 0.41717442870140076,
|
| 11232 |
+
"learning_rate": 3.1514289732992307e-07,
|
| 11233 |
+
"loss": 0.6059,
|
| 11234 |
+
"step": 1591
|
| 11235 |
+
},
|
| 11236 |
+
{
|
| 11237 |
+
"epoch": 0.5444130973753954,
|
| 11238 |
+
"grad_norm": 0.5087236166000366,
|
| 11239 |
+
"learning_rate": 2.999651430035999e-07,
|
| 11240 |
+
"loss": 0.6956,
|
| 11241 |
+
"step": 1592
|
| 11242 |
+
},
|
| 11243 |
+
{
|
| 11244 |
+
"epoch": 0.544755065401385,
|
| 11245 |
+
"grad_norm": 0.5553721785545349,
|
| 11246 |
+
"learning_rate": 2.8516140729428764e-07,
|
| 11247 |
+
"loss": 0.7199,
|
| 11248 |
+
"step": 1593
|
| 11249 |
+
},
|
| 11250 |
+
{
|
| 11251 |
+
"epoch": 0.5450970334273746,
|
| 11252 |
+
"grad_norm": 0.30246275663375854,
|
| 11253 |
+
"learning_rate": 2.70731745737296e-07,
|
| 11254 |
+
"loss": 0.5014,
|
| 11255 |
+
"step": 1594
|
| 11256 |
+
},
|
| 11257 |
+
{
|
| 11258 |
+
"epoch": 0.5454390014533641,
|
| 11259 |
+
"grad_norm": 0.36332184076309204,
|
| 11260 |
+
"learning_rate": 2.566762124646238e-07,
|
| 11261 |
+
"loss": 0.5579,
|
| 11262 |
+
"step": 1595
|
| 11263 |
+
},
|
| 11264 |
+
{
|
| 11265 |
+
"epoch": 0.5457809694793537,
|
| 11266 |
+
"grad_norm": 0.3601282238960266,
|
| 11267 |
+
"learning_rate": 2.4299486020475934e-07,
|
| 11268 |
+
"loss": 0.7086,
|
| 11269 |
+
"step": 1596
|
| 11270 |
+
},
|
| 11271 |
+
{
|
| 11272 |
+
"epoch": 0.5461229375053432,
|
| 11273 |
+
"grad_norm": 0.29977715015411377,
|
| 11274 |
+
"learning_rate": 2.2968774028245822e-07,
|
| 11275 |
+
"loss": 0.4198,
|
| 11276 |
+
"step": 1597
|
| 11277 |
+
},
|
| 11278 |
+
{
|
| 11279 |
+
"epoch": 0.5464649055313329,
|
| 11280 |
+
"grad_norm": 0.38223379850387573,
|
| 11281 |
+
"learning_rate": 2.1675490261856558e-07,
|
| 11282 |
+
"loss": 0.6874,
|
| 11283 |
+
"step": 1598
|
| 11284 |
+
},
|
| 11285 |
+
{
|
| 11286 |
+
"epoch": 0.5468068735573224,
|
| 11287 |
+
"grad_norm": 0.3903549015522003,
|
| 11288 |
+
"learning_rate": 2.0419639572983873e-07,
|
| 11289 |
+
"loss": 0.4237,
|
| 11290 |
+
"step": 1599
|
| 11291 |
+
},
|
| 11292 |
+
{
|
| 11293 |
+
"epoch": 0.547148841583312,
|
| 11294 |
+
"grad_norm": 0.34848782420158386,
|
| 11295 |
+
"learning_rate": 1.9201226672875827e-07,
|
| 11296 |
+
"loss": 0.4992,
|
| 11297 |
+
"step": 1600
|
| 11298 |
+
},
|
| 11299 |
+
{
|
| 11300 |
+
"epoch": 0.5474908096093015,
|
| 11301 |
+
"grad_norm": 0.4629409611225128,
|
| 11302 |
+
"learning_rate": 1.8020256132333935e-07,
|
| 11303 |
+
"loss": 0.5838,
|
| 11304 |
+
"step": 1601
|
| 11305 |
+
},
|
| 11306 |
+
{
|
| 11307 |
+
"epoch": 0.5478327776352911,
|
| 11308 |
+
"grad_norm": 0.3451687693595886,
|
| 11309 |
+
"learning_rate": 1.6876732381696513e-07,
|
| 11310 |
+
"loss": 0.7396,
|
| 11311 |
+
"step": 1602
|
| 11312 |
+
},
|
| 11313 |
+
{
|
| 11314 |
+
"epoch": 0.5481747456612807,
|
| 11315 |
+
"grad_norm": 0.46268150210380554,
|
| 11316 |
+
"learning_rate": 1.5770659710824254e-07,
|
| 11317 |
+
"loss": 0.5532,
|
| 11318 |
+
"step": 1603
|
| 11319 |
+
},
|
| 11320 |
+
{
|
| 11321 |
+
"epoch": 0.5485167136872703,
|
| 11322 |
+
"grad_norm": 0.38035228848457336,
|
| 11323 |
+
"learning_rate": 1.470204226908134e-07,
|
| 11324 |
+
"loss": 0.4954,
|
| 11325 |
+
"step": 1604
|
| 11326 |
+
},
|
| 11327 |
+
{
|
| 11328 |
+
"epoch": 0.5488586817132598,
|
| 11329 |
+
"grad_norm": 0.37964892387390137,
|
| 11330 |
+
"learning_rate": 1.3670884065321022e-07,
|
| 11331 |
+
"loss": 0.5787,
|
| 11332 |
+
"step": 1605
|
| 11333 |
+
},
|
| 11334 |
+
{
|
| 11335 |
+
"epoch": 0.5492006497392494,
|
| 11336 |
+
"grad_norm": 0.42854440212249756,
|
| 11337 |
+
"learning_rate": 1.267718896787118e-07,
|
| 11338 |
+
"loss": 0.597,
|
| 11339 |
+
"step": 1606
|
| 11340 |
+
},
|
| 11341 |
+
{
|
| 11342 |
+
"epoch": 0.5495426177652389,
|
| 11343 |
+
"grad_norm": 0.36185067892074585,
|
| 11344 |
+
"learning_rate": 1.1720960704517669e-07,
|
| 11345 |
+
"loss": 0.6276,
|
| 11346 |
+
"step": 1607
|
| 11347 |
+
},
|
| 11348 |
+
{
|
| 11349 |
+
"epoch": 0.5498845857912286,
|
| 11350 |
+
"grad_norm": 0.35879606008529663,
|
| 11351 |
+
"learning_rate": 1.0802202862492117e-07,
|
| 11352 |
+
"loss": 0.4173,
|
| 11353 |
+
"step": 1608
|
| 11354 |
+
},
|
| 11355 |
+
{
|
| 11356 |
+
"epoch": 0.5502265538172181,
|
| 11357 |
+
"grad_norm": 0.38417312502861023,
|
| 11358 |
+
"learning_rate": 9.920918888460806e-08,
|
| 11359 |
+
"loss": 0.6,
|
| 11360 |
+
"step": 1609
|
| 11361 |
+
},
|
| 11362 |
+
{
|
| 11363 |
+
"epoch": 0.5505685218432077,
|
| 11364 |
+
"grad_norm": 0.3673582673072815,
|
| 11365 |
+
"learning_rate": 9.077112088505812e-08,
|
| 11366 |
+
"loss": 0.4043,
|
| 11367 |
+
"step": 1610
|
| 11368 |
+
},
|
| 11369 |
+
{
|
| 11370 |
+
"epoch": 0.5509104898691972,
|
| 11371 |
+
"grad_norm": 0.3992306888103485,
|
| 11372 |
+
"learning_rate": 8.270785628116117e-08,
|
| 11373 |
+
"loss": 0.6587,
|
| 11374 |
+
"step": 1611
|
| 11375 |
+
},
|
| 11376 |
+
{
|
| 11377 |
+
"epoch": 0.5512524578951868,
|
| 11378 |
+
"grad_norm": 0.468791127204895,
|
| 11379 |
+
"learning_rate": 7.50194253217762e-08,
|
| 11380 |
+
"loss": 0.4541,
|
| 11381 |
+
"step": 1612
|
| 11382 |
+
},
|
| 11383 |
+
{
|
| 11384 |
+
"epoch": 0.5515944259211764,
|
| 11385 |
+
"grad_norm": 0.3831529915332794,
|
| 11386 |
+
"learning_rate": 6.770585684958697e-08,
|
| 11387 |
+
"loss": 0.6585,
|
| 11388 |
+
"step": 1613
|
| 11389 |
+
},
|
| 11390 |
+
{
|
| 11391 |
+
"epoch": 0.551936393947166,
|
| 11392 |
+
"grad_norm": 0.326048880815506,
|
| 11393 |
+
"learning_rate": 6.076717830098e-08,
|
| 11394 |
+
"loss": 0.7454,
|
| 11395 |
+
"step": 1614
|
| 11396 |
+
},
|
| 11397 |
+
{
|
| 11398 |
+
"epoch": 0.5522783619731555,
|
| 11399 |
+
"grad_norm": 0.3421213626861572,
|
| 11400 |
+
"learning_rate": 5.4203415706011175e-08,
|
| 11401 |
+
"loss": 0.5836,
|
| 11402 |
+
"step": 1615
|
| 11403 |
+
},
|
| 11404 |
+
{
|
| 11405 |
+
"epoch": 0.5526203299991451,
|
| 11406 |
+
"grad_norm": 0.3982676863670349,
|
| 11407 |
+
"learning_rate": 4.8014593688228136e-08,
|
| 11408 |
+
"loss": 0.4041,
|
| 11409 |
+
"step": 1616
|
| 11410 |
+
},
|
| 11411 |
+
{
|
| 11412 |
+
"epoch": 0.5529622980251346,
|
| 11413 |
+
"grad_norm": 0.32384833693504333,
|
| 11414 |
+
"learning_rate": 4.220073546460368e-08,
|
| 11415 |
+
"loss": 0.4742,
|
| 11416 |
+
"step": 1617
|
| 11417 |
+
},
|
| 11418 |
+
{
|
| 11419 |
+
"epoch": 0.5533042660511243,
|
| 11420 |
+
"grad_norm": 0.42056000232696533,
|
| 11421 |
+
"learning_rate": 3.676186284550243e-08,
|
| 11422 |
+
"loss": 0.7041,
|
| 11423 |
+
"step": 1618
|
| 11424 |
+
},
|
| 11425 |
+
{
|
| 11426 |
+
"epoch": 0.5536462340771138,
|
| 11427 |
+
"grad_norm": 0.31218430399894714,
|
| 11428 |
+
"learning_rate": 3.1697996234503205e-08,
|
| 11429 |
+
"loss": 0.3984,
|
| 11430 |
+
"step": 1619
|
| 11431 |
+
},
|
| 11432 |
+
{
|
| 11433 |
+
"epoch": 0.5539882021031034,
|
| 11434 |
+
"grad_norm": 0.4027038514614105,
|
| 11435 |
+
"learning_rate": 2.7009154628399036e-08,
|
| 11436 |
+
"loss": 0.4685,
|
| 11437 |
+
"step": 1620
|
| 11438 |
+
},
|
| 11439 |
+
{
|
| 11440 |
+
"epoch": 0.5543301701290929,
|
| 11441 |
+
"grad_norm": 0.6776359677314758,
|
| 11442 |
+
"learning_rate": 2.2695355617097238e-08,
|
| 11443 |
+
"loss": 0.623,
|
| 11444 |
+
"step": 1621
|
| 11445 |
+
},
|
| 11446 |
+
{
|
| 11447 |
+
"epoch": 0.5546721381550825,
|
| 11448 |
+
"grad_norm": 0.4552357792854309,
|
| 11449 |
+
"learning_rate": 1.8756615383552778e-08,
|
| 11450 |
+
"loss": 0.5327,
|
| 11451 |
+
"step": 1622
|
| 11452 |
+
},
|
| 11453 |
+
{
|
| 11454 |
+
"epoch": 0.555014106181072,
|
| 11455 |
+
"grad_norm": 0.3813455402851105,
|
| 11456 |
+
"learning_rate": 1.5192948703701694e-08,
|
| 11457 |
+
"loss": 0.4048,
|
| 11458 |
+
"step": 1623
|
| 11459 |
+
},
|
| 11460 |
+
{
|
| 11461 |
+
"epoch": 0.5553560742070617,
|
| 11462 |
+
"grad_norm": 0.3392491042613983,
|
| 11463 |
+
"learning_rate": 1.2004368946427758e-08,
|
| 11464 |
+
"loss": 0.5924,
|
| 11465 |
+
"step": 1624
|
| 11466 |
+
},
|
| 11467 |
+
{
|
| 11468 |
+
"epoch": 0.5556980422330512,
|
| 11469 |
+
"grad_norm": 0.3731992244720459,
|
| 11470 |
+
"learning_rate": 9.190888073495884e-09,
|
| 11471 |
+
"loss": 0.5303,
|
| 11472 |
+
"step": 1625
|
| 11473 |
+
},
|
| 11474 |
+
{
|
| 11475 |
+
"epoch": 0.5560400102590408,
|
| 11476 |
+
"grad_norm": 0.4253707826137543,
|
| 11477 |
+
"learning_rate": 6.752516639507711e-09,
|
| 11478 |
+
"loss": 0.491,
|
| 11479 |
+
"step": 1626
|
| 11480 |
+
},
|
| 11481 |
+
{
|
| 11482 |
+
"epoch": 0.5563819782850303,
|
| 11483 |
+
"grad_norm": 0.3752436935901642,
|
| 11484 |
+
"learning_rate": 4.689263791857191e-09,
|
| 11485 |
+
"loss": 0.5629,
|
| 11486 |
+
"step": 1627
|
| 11487 |
+
},
|
| 11488 |
+
{
|
| 11489 |
+
"epoch": 0.55672394631102,
|
| 11490 |
+
"grad_norm": 0.6946560740470886,
|
| 11491 |
+
"learning_rate": 3.001137270730592e-09,
|
| 11492 |
+
"loss": 0.6352,
|
| 11493 |
+
"step": 1628
|
| 11494 |
+
},
|
| 11495 |
+
{
|
| 11496 |
+
"epoch": 0.5570659143370095,
|
| 11497 |
+
"grad_norm": 0.4490332305431366,
|
| 11498 |
+
"learning_rate": 1.688143409017684e-09,
|
| 11499 |
+
"loss": 0.6046,
|
| 11500 |
+
"step": 1629
|
| 11501 |
+
},
|
| 11502 |
+
{
|
| 11503 |
+
"epoch": 0.5574078823629991,
|
| 11504 |
+
"grad_norm": 0.4461636245250702,
|
| 11505 |
+
"learning_rate": 7.502871323339378e-10,
|
| 11506 |
+
"loss": 0.5054,
|
| 11507 |
+
"step": 1630
|
| 11508 |
+
},
|
| 11509 |
+
{
|
| 11510 |
+
"epoch": 0.5577498503889886,
|
| 11511 |
+
"grad_norm": 0.3044528663158417,
|
| 11512 |
+
"learning_rate": 1.875719589983227e-10,
|
| 11513 |
+
"loss": 0.4627,
|
| 11514 |
+
"step": 1631
|
| 11515 |
+
},
|
| 11516 |
+
{
|
| 11517 |
+
"epoch": 0.5580918184149782,
|
| 11518 |
+
"grad_norm": 0.3551361560821533,
|
| 11519 |
+
"learning_rate": 0.0,
|
| 11520 |
+
"loss": 0.5083,
|
| 11521 |
+
"step": 1632
|
| 11522 |
}
|
| 11523 |
],
|
| 11524 |
"logging_steps": 1,
|
|
|
|
| 11542 |
"should_evaluate": false,
|
| 11543 |
"should_log": false,
|
| 11544 |
"should_save": true,
|
| 11545 |
+
"should_training_stop": true
|
| 11546 |
},
|
| 11547 |
"attributes": {}
|
| 11548 |
}
|
| 11549 |
},
|
| 11550 |
+
"total_flos": 1.441611980388434e+18,
|
| 11551 |
"train_batch_size": 2,
|
| 11552 |
"trial_name": null,
|
| 11553 |
"trial_params": null
|