Training in progress, step 1500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1001465824
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9e4effa8bbc0ed474583ba3f2837281343afc09606c2ccb38becde90989e4c46
|
| 3 |
size 1001465824
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 509177556
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c9b5fa21c522699eddafd48526976af6455055539ca8a6ef38f2e4198cf9d572
|
| 3 |
size 509177556
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:801e94b1ec798c0825658670c62086f2c0cee7dce743b57ecfc35839c1add960
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9162f2e18a7003bc5be2bf4c68f833fcf607f8f6f9d53d603787f1bed5f4ce08
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 0.
|
| 3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
| 4 |
-
"epoch": 0.
|
| 5 |
"eval_steps": 150,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -9537,6 +9537,1064 @@
|
|
| 9537 |
"eval_samples_per_second": 5.323,
|
| 9538 |
"eval_steps_per_second": 2.662,
|
| 9539 |
"step": 1350
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9540 |
}
|
| 9541 |
],
|
| 9542 |
"logging_steps": 1,
|
|
@@ -9565,7 +10623,7 @@
|
|
| 9565 |
"attributes": {}
|
| 9566 |
}
|
| 9567 |
},
|
| 9568 |
-
"total_flos": 1.
|
| 9569 |
"train_batch_size": 2,
|
| 9570 |
"trial_name": null,
|
| 9571 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 0.5886463522911072,
|
| 3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-1500",
|
| 4 |
+
"epoch": 0.5129520389843549,
|
| 5 |
"eval_steps": 150,
|
| 6 |
+
"global_step": 1500,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 9537 |
"eval_samples_per_second": 5.323,
|
| 9538 |
"eval_steps_per_second": 2.662,
|
| 9539 |
"step": 1350
|
| 9540 |
+
},
|
| 9541 |
+
{
|
| 9542 |
+
"epoch": 0.46199880311190905,
|
| 9543 |
+
"grad_norm": 0.46458661556243896,
|
| 9544 |
+
"learning_rate": 1.4448861670281588e-05,
|
| 9545 |
+
"loss": 0.5774,
|
| 9546 |
+
"step": 1351
|
| 9547 |
+
},
|
| 9548 |
+
{
|
| 9549 |
+
"epoch": 0.4623407711378986,
|
| 9550 |
+
"grad_norm": 0.42334747314453125,
|
| 9551 |
+
"learning_rate": 1.4348734507629114e-05,
|
| 9552 |
+
"loss": 0.7102,
|
| 9553 |
+
"step": 1352
|
| 9554 |
+
},
|
| 9555 |
+
{
|
| 9556 |
+
"epoch": 0.4626827391638882,
|
| 9557 |
+
"grad_norm": 0.39918336272239685,
|
| 9558 |
+
"learning_rate": 1.4248928660489846e-05,
|
| 9559 |
+
"loss": 0.5146,
|
| 9560 |
+
"step": 1353
|
| 9561 |
+
},
|
| 9562 |
+
{
|
| 9563 |
+
"epoch": 0.46302470718987776,
|
| 9564 |
+
"grad_norm": 0.36537671089172363,
|
| 9565 |
+
"learning_rate": 1.4149444503279297e-05,
|
| 9566 |
+
"loss": 0.5339,
|
| 9567 |
+
"step": 1354
|
| 9568 |
+
},
|
| 9569 |
+
{
|
| 9570 |
+
"epoch": 0.46336667521586733,
|
| 9571 |
+
"grad_norm": 0.4066685438156128,
|
| 9572 |
+
"learning_rate": 1.4050282409206273e-05,
|
| 9573 |
+
"loss": 0.6559,
|
| 9574 |
+
"step": 1355
|
| 9575 |
+
},
|
| 9576 |
+
{
|
| 9577 |
+
"epoch": 0.4637086432418569,
|
| 9578 |
+
"grad_norm": 0.2991009056568146,
|
| 9579 |
+
"learning_rate": 1.3951442750271349e-05,
|
| 9580 |
+
"loss": 0.4116,
|
| 9581 |
+
"step": 1356
|
| 9582 |
+
},
|
| 9583 |
+
{
|
| 9584 |
+
"epoch": 0.46405061126784647,
|
| 9585 |
+
"grad_norm": 0.37864071130752563,
|
| 9586 |
+
"learning_rate": 1.3852925897265456e-05,
|
| 9587 |
+
"loss": 0.4171,
|
| 9588 |
+
"step": 1357
|
| 9589 |
+
},
|
| 9590 |
+
{
|
| 9591 |
+
"epoch": 0.46439257929383604,
|
| 9592 |
+
"grad_norm": 0.6100689768791199,
|
| 9593 |
+
"learning_rate": 1.3754732219768619e-05,
|
| 9594 |
+
"loss": 0.658,
|
| 9595 |
+
"step": 1358
|
| 9596 |
+
},
|
| 9597 |
+
{
|
| 9598 |
+
"epoch": 0.4647345473198256,
|
| 9599 |
+
"grad_norm": 0.38960012793540955,
|
| 9600 |
+
"learning_rate": 1.3656862086148391e-05,
|
| 9601 |
+
"loss": 0.4975,
|
| 9602 |
+
"step": 1359
|
| 9603 |
+
},
|
| 9604 |
+
{
|
| 9605 |
+
"epoch": 0.4650765153458152,
|
| 9606 |
+
"grad_norm": 0.5630696415901184,
|
| 9607 |
+
"learning_rate": 1.3559315863558697e-05,
|
| 9608 |
+
"loss": 0.6081,
|
| 9609 |
+
"step": 1360
|
| 9610 |
+
},
|
| 9611 |
+
{
|
| 9612 |
+
"epoch": 0.46541848337180475,
|
| 9613 |
+
"grad_norm": 0.39844122529029846,
|
| 9614 |
+
"learning_rate": 1.3462093917938179e-05,
|
| 9615 |
+
"loss": 0.5646,
|
| 9616 |
+
"step": 1361
|
| 9617 |
+
},
|
| 9618 |
+
{
|
| 9619 |
+
"epoch": 0.4657604513977943,
|
| 9620 |
+
"grad_norm": 0.344722718000412,
|
| 9621 |
+
"learning_rate": 1.33651966140091e-05,
|
| 9622 |
+
"loss": 0.4201,
|
| 9623 |
+
"step": 1362
|
| 9624 |
+
},
|
| 9625 |
+
{
|
| 9626 |
+
"epoch": 0.4661024194237839,
|
| 9627 |
+
"grad_norm": 0.28442439436912537,
|
| 9628 |
+
"learning_rate": 1.3268624315275823e-05,
|
| 9629 |
+
"loss": 0.5606,
|
| 9630 |
+
"step": 1363
|
| 9631 |
+
},
|
| 9632 |
+
{
|
| 9633 |
+
"epoch": 0.46644438744977346,
|
| 9634 |
+
"grad_norm": 0.4574268162250519,
|
| 9635 |
+
"learning_rate": 1.3172377384023393e-05,
|
| 9636 |
+
"loss": 0.5332,
|
| 9637 |
+
"step": 1364
|
| 9638 |
+
},
|
| 9639 |
+
{
|
| 9640 |
+
"epoch": 0.466786355475763,
|
| 9641 |
+
"grad_norm": 0.3338494598865509,
|
| 9642 |
+
"learning_rate": 1.3076456181316354e-05,
|
| 9643 |
+
"loss": 0.4792,
|
| 9644 |
+
"step": 1365
|
| 9645 |
+
},
|
| 9646 |
+
{
|
| 9647 |
+
"epoch": 0.4671283235017526,
|
| 9648 |
+
"grad_norm": 0.4591316878795624,
|
| 9649 |
+
"learning_rate": 1.2980861066997297e-05,
|
| 9650 |
+
"loss": 0.5861,
|
| 9651 |
+
"step": 1366
|
| 9652 |
+
},
|
| 9653 |
+
{
|
| 9654 |
+
"epoch": 0.46747029152774217,
|
| 9655 |
+
"grad_norm": 0.3527957797050476,
|
| 9656 |
+
"learning_rate": 1.2885592399685431e-05,
|
| 9657 |
+
"loss": 0.5774,
|
| 9658 |
+
"step": 1367
|
| 9659 |
+
},
|
| 9660 |
+
{
|
| 9661 |
+
"epoch": 0.46781225955373174,
|
| 9662 |
+
"grad_norm": 0.3617342412471771,
|
| 9663 |
+
"learning_rate": 1.279065053677536e-05,
|
| 9664 |
+
"loss": 0.5277,
|
| 9665 |
+
"step": 1368
|
| 9666 |
+
},
|
| 9667 |
+
{
|
| 9668 |
+
"epoch": 0.4681542275797213,
|
| 9669 |
+
"grad_norm": 0.3831739127635956,
|
| 9670 |
+
"learning_rate": 1.2696035834435749e-05,
|
| 9671 |
+
"loss": 0.6971,
|
| 9672 |
+
"step": 1369
|
| 9673 |
+
},
|
| 9674 |
+
{
|
| 9675 |
+
"epoch": 0.4684961956057109,
|
| 9676 |
+
"grad_norm": 0.4345672130584717,
|
| 9677 |
+
"learning_rate": 1.2601748647607859e-05,
|
| 9678 |
+
"loss": 0.5492,
|
| 9679 |
+
"step": 1370
|
| 9680 |
+
},
|
| 9681 |
+
{
|
| 9682 |
+
"epoch": 0.46883816363170044,
|
| 9683 |
+
"grad_norm": 0.36282750964164734,
|
| 9684 |
+
"learning_rate": 1.2507789330004349e-05,
|
| 9685 |
+
"loss": 0.5571,
|
| 9686 |
+
"step": 1371
|
| 9687 |
+
},
|
| 9688 |
+
{
|
| 9689 |
+
"epoch": 0.46918013165769,
|
| 9690 |
+
"grad_norm": 0.4751930832862854,
|
| 9691 |
+
"learning_rate": 1.241415823410792e-05,
|
| 9692 |
+
"loss": 0.626,
|
| 9693 |
+
"step": 1372
|
| 9694 |
+
},
|
| 9695 |
+
{
|
| 9696 |
+
"epoch": 0.4695220996836796,
|
| 9697 |
+
"grad_norm": 0.3625487685203552,
|
| 9698 |
+
"learning_rate": 1.2320855711169887e-05,
|
| 9699 |
+
"loss": 0.627,
|
| 9700 |
+
"step": 1373
|
| 9701 |
+
},
|
| 9702 |
+
{
|
| 9703 |
+
"epoch": 0.46986406770966915,
|
| 9704 |
+
"grad_norm": 0.3447592854499817,
|
| 9705 |
+
"learning_rate": 1.222788211120901e-05,
|
| 9706 |
+
"loss": 0.5964,
|
| 9707 |
+
"step": 1374
|
| 9708 |
+
},
|
| 9709 |
+
{
|
| 9710 |
+
"epoch": 0.4702060357356587,
|
| 9711 |
+
"grad_norm": 0.39788079261779785,
|
| 9712 |
+
"learning_rate": 1.21352377830101e-05,
|
| 9713 |
+
"loss": 0.4275,
|
| 9714 |
+
"step": 1375
|
| 9715 |
+
},
|
| 9716 |
+
{
|
| 9717 |
+
"epoch": 0.4705480037616483,
|
| 9718 |
+
"grad_norm": 0.33655157685279846,
|
| 9719 |
+
"learning_rate": 1.2042923074122702e-05,
|
| 9720 |
+
"loss": 0.49,
|
| 9721 |
+
"step": 1376
|
| 9722 |
+
},
|
| 9723 |
+
{
|
| 9724 |
+
"epoch": 0.47088997178763786,
|
| 9725 |
+
"grad_norm": 0.5279715061187744,
|
| 9726 |
+
"learning_rate": 1.1950938330859861e-05,
|
| 9727 |
+
"loss": 0.6838,
|
| 9728 |
+
"step": 1377
|
| 9729 |
+
},
|
| 9730 |
+
{
|
| 9731 |
+
"epoch": 0.47123193981362743,
|
| 9732 |
+
"grad_norm": 0.5920473337173462,
|
| 9733 |
+
"learning_rate": 1.1859283898296735e-05,
|
| 9734 |
+
"loss": 0.6467,
|
| 9735 |
+
"step": 1378
|
| 9736 |
+
},
|
| 9737 |
+
{
|
| 9738 |
+
"epoch": 0.471573907839617,
|
| 9739 |
+
"grad_norm": 0.2907107472419739,
|
| 9740 |
+
"learning_rate": 1.1767960120269328e-05,
|
| 9741 |
+
"loss": 0.4565,
|
| 9742 |
+
"step": 1379
|
| 9743 |
+
},
|
| 9744 |
+
{
|
| 9745 |
+
"epoch": 0.47191587586560657,
|
| 9746 |
+
"grad_norm": 0.37272509932518005,
|
| 9747 |
+
"learning_rate": 1.1676967339373245e-05,
|
| 9748 |
+
"loss": 0.5862,
|
| 9749 |
+
"step": 1380
|
| 9750 |
+
},
|
| 9751 |
+
{
|
| 9752 |
+
"epoch": 0.47225784389159614,
|
| 9753 |
+
"grad_norm": 0.479973167181015,
|
| 9754 |
+
"learning_rate": 1.1586305896962412e-05,
|
| 9755 |
+
"loss": 0.7466,
|
| 9756 |
+
"step": 1381
|
| 9757 |
+
},
|
| 9758 |
+
{
|
| 9759 |
+
"epoch": 0.4725998119175857,
|
| 9760 |
+
"grad_norm": 0.33694228529930115,
|
| 9761 |
+
"learning_rate": 1.1495976133147668e-05,
|
| 9762 |
+
"loss": 0.456,
|
| 9763 |
+
"step": 1382
|
| 9764 |
+
},
|
| 9765 |
+
{
|
| 9766 |
+
"epoch": 0.4729417799435753,
|
| 9767 |
+
"grad_norm": 0.4428293704986572,
|
| 9768 |
+
"learning_rate": 1.1405978386795636e-05,
|
| 9769 |
+
"loss": 0.5808,
|
| 9770 |
+
"step": 1383
|
| 9771 |
+
},
|
| 9772 |
+
{
|
| 9773 |
+
"epoch": 0.47328374796956485,
|
| 9774 |
+
"grad_norm": 0.49617043137550354,
|
| 9775 |
+
"learning_rate": 1.1316312995527423e-05,
|
| 9776 |
+
"loss": 0.5857,
|
| 9777 |
+
"step": 1384
|
| 9778 |
+
},
|
| 9779 |
+
{
|
| 9780 |
+
"epoch": 0.4736257159955544,
|
| 9781 |
+
"grad_norm": 0.37273940443992615,
|
| 9782 |
+
"learning_rate": 1.1226980295717248e-05,
|
| 9783 |
+
"loss": 0.4798,
|
| 9784 |
+
"step": 1385
|
| 9785 |
+
},
|
| 9786 |
+
{
|
| 9787 |
+
"epoch": 0.473967684021544,
|
| 9788 |
+
"grad_norm": 0.3818013668060303,
|
| 9789 |
+
"learning_rate": 1.113798062249134e-05,
|
| 9790 |
+
"loss": 0.5763,
|
| 9791 |
+
"step": 1386
|
| 9792 |
+
},
|
| 9793 |
+
{
|
| 9794 |
+
"epoch": 0.47430965204753356,
|
| 9795 |
+
"grad_norm": 0.31985703110694885,
|
| 9796 |
+
"learning_rate": 1.1049314309726533e-05,
|
| 9797 |
+
"loss": 0.4325,
|
| 9798 |
+
"step": 1387
|
| 9799 |
+
},
|
| 9800 |
+
{
|
| 9801 |
+
"epoch": 0.47465162007352313,
|
| 9802 |
+
"grad_norm": 0.4047496020793915,
|
| 9803 |
+
"learning_rate": 1.0960981690049099e-05,
|
| 9804 |
+
"loss": 0.6111,
|
| 9805 |
+
"step": 1388
|
| 9806 |
+
},
|
| 9807 |
+
{
|
| 9808 |
+
"epoch": 0.4749935880995127,
|
| 9809 |
+
"grad_norm": 0.4212690591812134,
|
| 9810 |
+
"learning_rate": 1.08729830948335e-05,
|
| 9811 |
+
"loss": 0.5258,
|
| 9812 |
+
"step": 1389
|
| 9813 |
+
},
|
| 9814 |
+
{
|
| 9815 |
+
"epoch": 0.47533555612550227,
|
| 9816 |
+
"grad_norm": 0.34776535630226135,
|
| 9817 |
+
"learning_rate": 1.0785318854201142e-05,
|
| 9818 |
+
"loss": 0.5997,
|
| 9819 |
+
"step": 1390
|
| 9820 |
+
},
|
| 9821 |
+
{
|
| 9822 |
+
"epoch": 0.47567752415149184,
|
| 9823 |
+
"grad_norm": 0.5136944055557251,
|
| 9824 |
+
"learning_rate": 1.069798929701904e-05,
|
| 9825 |
+
"loss": 0.5356,
|
| 9826 |
+
"step": 1391
|
| 9827 |
+
},
|
| 9828 |
+
{
|
| 9829 |
+
"epoch": 0.4760194921774814,
|
| 9830 |
+
"grad_norm": 0.3962900936603546,
|
| 9831 |
+
"learning_rate": 1.0610994750898739e-05,
|
| 9832 |
+
"loss": 0.5057,
|
| 9833 |
+
"step": 1392
|
| 9834 |
+
},
|
| 9835 |
+
{
|
| 9836 |
+
"epoch": 0.476361460203471,
|
| 9837 |
+
"grad_norm": 0.49814558029174805,
|
| 9838 |
+
"learning_rate": 1.0524335542194996e-05,
|
| 9839 |
+
"loss": 0.4861,
|
| 9840 |
+
"step": 1393
|
| 9841 |
+
},
|
| 9842 |
+
{
|
| 9843 |
+
"epoch": 0.47670342822946055,
|
| 9844 |
+
"grad_norm": 0.3843591511249542,
|
| 9845 |
+
"learning_rate": 1.0438011996004581e-05,
|
| 9846 |
+
"loss": 0.5785,
|
| 9847 |
+
"step": 1394
|
| 9848 |
+
},
|
| 9849 |
+
{
|
| 9850 |
+
"epoch": 0.4770453962554501,
|
| 9851 |
+
"grad_norm": 0.32255926728248596,
|
| 9852 |
+
"learning_rate": 1.0352024436164975e-05,
|
| 9853 |
+
"loss": 0.5117,
|
| 9854 |
+
"step": 1395
|
| 9855 |
+
},
|
| 9856 |
+
{
|
| 9857 |
+
"epoch": 0.4773873642814397,
|
| 9858 |
+
"grad_norm": 0.4465799927711487,
|
| 9859 |
+
"learning_rate": 1.026637318525333e-05,
|
| 9860 |
+
"loss": 0.68,
|
| 9861 |
+
"step": 1396
|
| 9862 |
+
},
|
| 9863 |
+
{
|
| 9864 |
+
"epoch": 0.47772933230742926,
|
| 9865 |
+
"grad_norm": 0.3948836922645569,
|
| 9866 |
+
"learning_rate": 1.0181058564585088e-05,
|
| 9867 |
+
"loss": 0.7162,
|
| 9868 |
+
"step": 1397
|
| 9869 |
+
},
|
| 9870 |
+
{
|
| 9871 |
+
"epoch": 0.4780713003334188,
|
| 9872 |
+
"grad_norm": 0.7754755616188049,
|
| 9873 |
+
"learning_rate": 1.0096080894212833e-05,
|
| 9874 |
+
"loss": 0.6089,
|
| 9875 |
+
"step": 1398
|
| 9876 |
+
},
|
| 9877 |
+
{
|
| 9878 |
+
"epoch": 0.4784132683594084,
|
| 9879 |
+
"grad_norm": 0.37649857997894287,
|
| 9880 |
+
"learning_rate": 1.001144049292514e-05,
|
| 9881 |
+
"loss": 0.5256,
|
| 9882 |
+
"step": 1399
|
| 9883 |
+
},
|
| 9884 |
+
{
|
| 9885 |
+
"epoch": 0.47875523638539796,
|
| 9886 |
+
"grad_norm": 0.4943767786026001,
|
| 9887 |
+
"learning_rate": 9.927137678245357e-06,
|
| 9888 |
+
"loss": 0.4086,
|
| 9889 |
+
"step": 1400
|
| 9890 |
+
},
|
| 9891 |
+
{
|
| 9892 |
+
"epoch": 0.47909720441138753,
|
| 9893 |
+
"grad_norm": 0.6237683892250061,
|
| 9894 |
+
"learning_rate": 9.843172766430331e-06,
|
| 9895 |
+
"loss": 0.6381,
|
| 9896 |
+
"step": 1401
|
| 9897 |
+
},
|
| 9898 |
+
{
|
| 9899 |
+
"epoch": 0.4794391724373771,
|
| 9900 |
+
"grad_norm": 0.4005008935928345,
|
| 9901 |
+
"learning_rate": 9.759546072469328e-06,
|
| 9902 |
+
"loss": 0.5005,
|
| 9903 |
+
"step": 1402
|
| 9904 |
+
},
|
| 9905 |
+
{
|
| 9906 |
+
"epoch": 0.4797811404633667,
|
| 9907 |
+
"grad_norm": 0.3757031559944153,
|
| 9908 |
+
"learning_rate": 9.67625791008283e-06,
|
| 9909 |
+
"loss": 0.507,
|
| 9910 |
+
"step": 1403
|
| 9911 |
+
},
|
| 9912 |
+
{
|
| 9913 |
+
"epoch": 0.48012310848935624,
|
| 9914 |
+
"grad_norm": 0.32949626445770264,
|
| 9915 |
+
"learning_rate": 9.593308591721273e-06,
|
| 9916 |
+
"loss": 0.4857,
|
| 9917 |
+
"step": 1404
|
| 9918 |
+
},
|
| 9919 |
+
{
|
| 9920 |
+
"epoch": 0.4804650765153458,
|
| 9921 |
+
"grad_norm": 0.31370505690574646,
|
| 9922 |
+
"learning_rate": 9.510698428564014e-06,
|
| 9923 |
+
"loss": 0.5032,
|
| 9924 |
+
"step": 1405
|
| 9925 |
+
},
|
| 9926 |
+
{
|
| 9927 |
+
"epoch": 0.4808070445413354,
|
| 9928 |
+
"grad_norm": 0.45027247071266174,
|
| 9929 |
+
"learning_rate": 9.428427730518053e-06,
|
| 9930 |
+
"loss": 0.7439,
|
| 9931 |
+
"step": 1406
|
| 9932 |
+
},
|
| 9933 |
+
{
|
| 9934 |
+
"epoch": 0.48114901256732495,
|
| 9935 |
+
"grad_norm": 0.28915834426879883,
|
| 9936 |
+
"learning_rate": 9.346496806216864e-06,
|
| 9937 |
+
"loss": 0.4361,
|
| 9938 |
+
"step": 1407
|
| 9939 |
+
},
|
| 9940 |
+
{
|
| 9941 |
+
"epoch": 0.4814909805933145,
|
| 9942 |
+
"grad_norm": 0.3978510797023773,
|
| 9943 |
+
"learning_rate": 9.264905963019376e-06,
|
| 9944 |
+
"loss": 0.566,
|
| 9945 |
+
"step": 1408
|
| 9946 |
+
},
|
| 9947 |
+
{
|
| 9948 |
+
"epoch": 0.4818329486193041,
|
| 9949 |
+
"grad_norm": 0.6509686708450317,
|
| 9950 |
+
"learning_rate": 9.18365550700867e-06,
|
| 9951 |
+
"loss": 0.6359,
|
| 9952 |
+
"step": 1409
|
| 9953 |
+
},
|
| 9954 |
+
{
|
| 9955 |
+
"epoch": 0.48217491664529366,
|
| 9956 |
+
"grad_norm": 0.34217292070388794,
|
| 9957 |
+
"learning_rate": 9.10274574299087e-06,
|
| 9958 |
+
"loss": 0.5832,
|
| 9959 |
+
"step": 1410
|
| 9960 |
+
},
|
| 9961 |
+
{
|
| 9962 |
+
"epoch": 0.48251688467128323,
|
| 9963 |
+
"grad_norm": 0.36280882358551025,
|
| 9964 |
+
"learning_rate": 9.022176974494034e-06,
|
| 9965 |
+
"loss": 0.6527,
|
| 9966 |
+
"step": 1411
|
| 9967 |
+
},
|
| 9968 |
+
{
|
| 9969 |
+
"epoch": 0.4828588526972728,
|
| 9970 |
+
"grad_norm": 0.31427791714668274,
|
| 9971 |
+
"learning_rate": 8.941949503767034e-06,
|
| 9972 |
+
"loss": 0.4786,
|
| 9973 |
+
"step": 1412
|
| 9974 |
+
},
|
| 9975 |
+
{
|
| 9976 |
+
"epoch": 0.48320082072326237,
|
| 9977 |
+
"grad_norm": 0.2932129204273224,
|
| 9978 |
+
"learning_rate": 8.862063631778306e-06,
|
| 9979 |
+
"loss": 0.3879,
|
| 9980 |
+
"step": 1413
|
| 9981 |
+
},
|
| 9982 |
+
{
|
| 9983 |
+
"epoch": 0.48354278874925194,
|
| 9984 |
+
"grad_norm": 0.3400621712207794,
|
| 9985 |
+
"learning_rate": 8.78251965821485e-06,
|
| 9986 |
+
"loss": 0.3863,
|
| 9987 |
+
"step": 1414
|
| 9988 |
+
},
|
| 9989 |
+
{
|
| 9990 |
+
"epoch": 0.4838847567752415,
|
| 9991 |
+
"grad_norm": 0.4857083559036255,
|
| 9992 |
+
"learning_rate": 8.703317881481066e-06,
|
| 9993 |
+
"loss": 0.4673,
|
| 9994 |
+
"step": 1415
|
| 9995 |
+
},
|
| 9996 |
+
{
|
| 9997 |
+
"epoch": 0.4842267248012311,
|
| 9998 |
+
"grad_norm": 0.480949342250824,
|
| 9999 |
+
"learning_rate": 8.624458598697582e-06,
|
| 10000 |
+
"loss": 0.493,
|
| 10001 |
+
"step": 1416
|
| 10002 |
+
},
|
| 10003 |
+
{
|
| 10004 |
+
"epoch": 0.48456869282722065,
|
| 10005 |
+
"grad_norm": 0.42186641693115234,
|
| 10006 |
+
"learning_rate": 8.545942105700188e-06,
|
| 10007 |
+
"loss": 0.4314,
|
| 10008 |
+
"step": 1417
|
| 10009 |
+
},
|
| 10010 |
+
{
|
| 10011 |
+
"epoch": 0.4849106608532102,
|
| 10012 |
+
"grad_norm": 0.3342297077178955,
|
| 10013 |
+
"learning_rate": 8.467768697038769e-06,
|
| 10014 |
+
"loss": 0.6254,
|
| 10015 |
+
"step": 1418
|
| 10016 |
+
},
|
| 10017 |
+
{
|
| 10018 |
+
"epoch": 0.4852526288791998,
|
| 10019 |
+
"grad_norm": 0.33367452025413513,
|
| 10020 |
+
"learning_rate": 8.389938665976083e-06,
|
| 10021 |
+
"loss": 0.6116,
|
| 10022 |
+
"step": 1419
|
| 10023 |
+
},
|
| 10024 |
+
{
|
| 10025 |
+
"epoch": 0.48559459690518936,
|
| 10026 |
+
"grad_norm": 0.3980502784252167,
|
| 10027 |
+
"learning_rate": 8.312452304486761e-06,
|
| 10028 |
+
"loss": 0.5527,
|
| 10029 |
+
"step": 1420
|
| 10030 |
+
},
|
| 10031 |
+
{
|
| 10032 |
+
"epoch": 0.4859365649311789,
|
| 10033 |
+
"grad_norm": 0.3666388690471649,
|
| 10034 |
+
"learning_rate": 8.235309903256206e-06,
|
| 10035 |
+
"loss": 0.4088,
|
| 10036 |
+
"step": 1421
|
| 10037 |
+
},
|
| 10038 |
+
{
|
| 10039 |
+
"epoch": 0.4862785329571685,
|
| 10040 |
+
"grad_norm": 0.45284703373908997,
|
| 10041 |
+
"learning_rate": 8.158511751679399e-06,
|
| 10042 |
+
"loss": 0.5529,
|
| 10043 |
+
"step": 1422
|
| 10044 |
+
},
|
| 10045 |
+
{
|
| 10046 |
+
"epoch": 0.48662050098315807,
|
| 10047 |
+
"grad_norm": 0.385841965675354,
|
| 10048 |
+
"learning_rate": 8.08205813785996e-06,
|
| 10049 |
+
"loss": 0.4688,
|
| 10050 |
+
"step": 1423
|
| 10051 |
+
},
|
| 10052 |
+
{
|
| 10053 |
+
"epoch": 0.48696246900914764,
|
| 10054 |
+
"grad_norm": 0.4751913547515869,
|
| 10055 |
+
"learning_rate": 8.005949348608976e-06,
|
| 10056 |
+
"loss": 0.5193,
|
| 10057 |
+
"step": 1424
|
| 10058 |
+
},
|
| 10059 |
+
{
|
| 10060 |
+
"epoch": 0.4873044370351372,
|
| 10061 |
+
"grad_norm": 0.41884270310401917,
|
| 10062 |
+
"learning_rate": 7.930185669443946e-06,
|
| 10063 |
+
"loss": 0.6109,
|
| 10064 |
+
"step": 1425
|
| 10065 |
+
},
|
| 10066 |
+
{
|
| 10067 |
+
"epoch": 0.4876464050611268,
|
| 10068 |
+
"grad_norm": 0.32201844453811646,
|
| 10069 |
+
"learning_rate": 7.854767384587669e-06,
|
| 10070 |
+
"loss": 0.598,
|
| 10071 |
+
"step": 1426
|
| 10072 |
+
},
|
| 10073 |
+
{
|
| 10074 |
+
"epoch": 0.48798837308711634,
|
| 10075 |
+
"grad_norm": 0.41289442777633667,
|
| 10076 |
+
"learning_rate": 7.779694776967295e-06,
|
| 10077 |
+
"loss": 0.5104,
|
| 10078 |
+
"step": 1427
|
| 10079 |
+
},
|
| 10080 |
+
{
|
| 10081 |
+
"epoch": 0.4883303411131059,
|
| 10082 |
+
"grad_norm": 0.3158438801765442,
|
| 10083 |
+
"learning_rate": 7.704968128213108e-06,
|
| 10084 |
+
"loss": 0.5495,
|
| 10085 |
+
"step": 1428
|
| 10086 |
+
},
|
| 10087 |
+
{
|
| 10088 |
+
"epoch": 0.4886723091390955,
|
| 10089 |
+
"grad_norm": 0.5073384642601013,
|
| 10090 |
+
"learning_rate": 7.6305877186576e-06,
|
| 10091 |
+
"loss": 0.4147,
|
| 10092 |
+
"step": 1429
|
| 10093 |
+
},
|
| 10094 |
+
{
|
| 10095 |
+
"epoch": 0.48901427716508505,
|
| 10096 |
+
"grad_norm": 0.33985650539398193,
|
| 10097 |
+
"learning_rate": 7.556553827334367e-06,
|
| 10098 |
+
"loss": 0.6625,
|
| 10099 |
+
"step": 1430
|
| 10100 |
+
},
|
| 10101 |
+
{
|
| 10102 |
+
"epoch": 0.4893562451910746,
|
| 10103 |
+
"grad_norm": 0.3775363266468048,
|
| 10104 |
+
"learning_rate": 7.482866731977056e-06,
|
| 10105 |
+
"loss": 0.6464,
|
| 10106 |
+
"step": 1431
|
| 10107 |
+
},
|
| 10108 |
+
{
|
| 10109 |
+
"epoch": 0.4896982132170642,
|
| 10110 |
+
"grad_norm": 0.3615787625312805,
|
| 10111 |
+
"learning_rate": 7.4095267090182885e-06,
|
| 10112 |
+
"loss": 0.5859,
|
| 10113 |
+
"step": 1432
|
| 10114 |
+
},
|
| 10115 |
+
{
|
| 10116 |
+
"epoch": 0.49004018124305376,
|
| 10117 |
+
"grad_norm": 0.44534218311309814,
|
| 10118 |
+
"learning_rate": 7.336534033588727e-06,
|
| 10119 |
+
"loss": 0.9093,
|
| 10120 |
+
"step": 1433
|
| 10121 |
+
},
|
| 10122 |
+
{
|
| 10123 |
+
"epoch": 0.49038214926904333,
|
| 10124 |
+
"grad_norm": 0.3275962471961975,
|
| 10125 |
+
"learning_rate": 7.263888979515954e-06,
|
| 10126 |
+
"loss": 0.4235,
|
| 10127 |
+
"step": 1434
|
| 10128 |
+
},
|
| 10129 |
+
{
|
| 10130 |
+
"epoch": 0.4907241172950329,
|
| 10131 |
+
"grad_norm": 0.4237425625324249,
|
| 10132 |
+
"learning_rate": 7.191591819323473e-06,
|
| 10133 |
+
"loss": 0.5503,
|
| 10134 |
+
"step": 1435
|
| 10135 |
+
},
|
| 10136 |
+
{
|
| 10137 |
+
"epoch": 0.49106608532102247,
|
| 10138 |
+
"grad_norm": 0.41267287731170654,
|
| 10139 |
+
"learning_rate": 7.119642824229655e-06,
|
| 10140 |
+
"loss": 0.6242,
|
| 10141 |
+
"step": 1436
|
| 10142 |
+
},
|
| 10143 |
+
{
|
| 10144 |
+
"epoch": 0.49140805334701204,
|
| 10145 |
+
"grad_norm": 0.43237781524658203,
|
| 10146 |
+
"learning_rate": 7.048042264146815e-06,
|
| 10147 |
+
"loss": 0.6045,
|
| 10148 |
+
"step": 1437
|
| 10149 |
+
},
|
| 10150 |
+
{
|
| 10151 |
+
"epoch": 0.4917500213730016,
|
| 10152 |
+
"grad_norm": 0.33868464827537537,
|
| 10153 |
+
"learning_rate": 6.976790407680067e-06,
|
| 10154 |
+
"loss": 0.535,
|
| 10155 |
+
"step": 1438
|
| 10156 |
+
},
|
| 10157 |
+
{
|
| 10158 |
+
"epoch": 0.4920919893989912,
|
| 10159 |
+
"grad_norm": 0.37420621514320374,
|
| 10160 |
+
"learning_rate": 6.90588752212643e-06,
|
| 10161 |
+
"loss": 0.5514,
|
| 10162 |
+
"step": 1439
|
| 10163 |
+
},
|
| 10164 |
+
{
|
| 10165 |
+
"epoch": 0.49243395742498075,
|
| 10166 |
+
"grad_norm": 0.3844612240791321,
|
| 10167 |
+
"learning_rate": 6.835333873473793e-06,
|
| 10168 |
+
"loss": 0.6164,
|
| 10169 |
+
"step": 1440
|
| 10170 |
+
},
|
| 10171 |
+
{
|
| 10172 |
+
"epoch": 0.4927759254509703,
|
| 10173 |
+
"grad_norm": 0.4005042016506195,
|
| 10174 |
+
"learning_rate": 6.765129726399844e-06,
|
| 10175 |
+
"loss": 0.6643,
|
| 10176 |
+
"step": 1441
|
| 10177 |
+
},
|
| 10178 |
+
{
|
| 10179 |
+
"epoch": 0.4931178934769599,
|
| 10180 |
+
"grad_norm": 0.3358938992023468,
|
| 10181 |
+
"learning_rate": 6.695275344271168e-06,
|
| 10182 |
+
"loss": 0.4994,
|
| 10183 |
+
"step": 1442
|
| 10184 |
+
},
|
| 10185 |
+
{
|
| 10186 |
+
"epoch": 0.49345986150294946,
|
| 10187 |
+
"grad_norm": 0.3401714861392975,
|
| 10188 |
+
"learning_rate": 6.625770989142266e-06,
|
| 10189 |
+
"loss": 0.6264,
|
| 10190 |
+
"step": 1443
|
| 10191 |
+
},
|
| 10192 |
+
{
|
| 10193 |
+
"epoch": 0.49380182952893903,
|
| 10194 |
+
"grad_norm": 0.49171924591064453,
|
| 10195 |
+
"learning_rate": 6.5566169217544886e-06,
|
| 10196 |
+
"loss": 0.2698,
|
| 10197 |
+
"step": 1444
|
| 10198 |
+
},
|
| 10199 |
+
{
|
| 10200 |
+
"epoch": 0.4941437975549286,
|
| 10201 |
+
"grad_norm": 0.49176937341690063,
|
| 10202 |
+
"learning_rate": 6.4878134015350815e-06,
|
| 10203 |
+
"loss": 0.5954,
|
| 10204 |
+
"step": 1445
|
| 10205 |
+
},
|
| 10206 |
+
{
|
| 10207 |
+
"epoch": 0.49448576558091817,
|
| 10208 |
+
"grad_norm": 0.4451077878475189,
|
| 10209 |
+
"learning_rate": 6.419360686596298e-06,
|
| 10210 |
+
"loss": 0.5165,
|
| 10211 |
+
"step": 1446
|
| 10212 |
+
},
|
| 10213 |
+
{
|
| 10214 |
+
"epoch": 0.49482773360690774,
|
| 10215 |
+
"grad_norm": 0.4469234049320221,
|
| 10216 |
+
"learning_rate": 6.351259033734314e-06,
|
| 10217 |
+
"loss": 0.6972,
|
| 10218 |
+
"step": 1447
|
| 10219 |
+
},
|
| 10220 |
+
{
|
| 10221 |
+
"epoch": 0.4951697016328973,
|
| 10222 |
+
"grad_norm": 0.39374667406082153,
|
| 10223 |
+
"learning_rate": 6.283508698428353e-06,
|
| 10224 |
+
"loss": 0.4433,
|
| 10225 |
+
"step": 1448
|
| 10226 |
+
},
|
| 10227 |
+
{
|
| 10228 |
+
"epoch": 0.4955116696588869,
|
| 10229 |
+
"grad_norm": 0.40193474292755127,
|
| 10230 |
+
"learning_rate": 6.216109934839687e-06,
|
| 10231 |
+
"loss": 0.5459,
|
| 10232 |
+
"step": 1449
|
| 10233 |
+
},
|
| 10234 |
+
{
|
| 10235 |
+
"epoch": 0.49585363768487645,
|
| 10236 |
+
"grad_norm": 0.299493670463562,
|
| 10237 |
+
"learning_rate": 6.149062995810639e-06,
|
| 10238 |
+
"loss": 0.4521,
|
| 10239 |
+
"step": 1450
|
| 10240 |
+
},
|
| 10241 |
+
{
|
| 10242 |
+
"epoch": 0.496195605710866,
|
| 10243 |
+
"grad_norm": 0.3158107399940491,
|
| 10244 |
+
"learning_rate": 6.082368132863758e-06,
|
| 10245 |
+
"loss": 0.4194,
|
| 10246 |
+
"step": 1451
|
| 10247 |
+
},
|
| 10248 |
+
{
|
| 10249 |
+
"epoch": 0.4965375737368556,
|
| 10250 |
+
"grad_norm": 0.39207759499549866,
|
| 10251 |
+
"learning_rate": 6.0160255962007694e-06,
|
| 10252 |
+
"loss": 0.6173,
|
| 10253 |
+
"step": 1452
|
| 10254 |
+
},
|
| 10255 |
+
{
|
| 10256 |
+
"epoch": 0.49687954176284516,
|
| 10257 |
+
"grad_norm": 0.36359909176826477,
|
| 10258 |
+
"learning_rate": 5.950035634701645e-06,
|
| 10259 |
+
"loss": 0.5063,
|
| 10260 |
+
"step": 1453
|
| 10261 |
+
},
|
| 10262 |
+
{
|
| 10263 |
+
"epoch": 0.4972215097888347,
|
| 10264 |
+
"grad_norm": 0.3674727976322174,
|
| 10265 |
+
"learning_rate": 5.884398495923727e-06,
|
| 10266 |
+
"loss": 0.5094,
|
| 10267 |
+
"step": 1454
|
| 10268 |
+
},
|
| 10269 |
+
{
|
| 10270 |
+
"epoch": 0.4975634778148243,
|
| 10271 |
+
"grad_norm": 0.48032888770103455,
|
| 10272 |
+
"learning_rate": 5.8191144261007465e-06,
|
| 10273 |
+
"loss": 0.4669,
|
| 10274 |
+
"step": 1455
|
| 10275 |
+
},
|
| 10276 |
+
{
|
| 10277 |
+
"epoch": 0.49790544584081387,
|
| 10278 |
+
"grad_norm": 0.3460092544555664,
|
| 10279 |
+
"learning_rate": 5.754183670141888e-06,
|
| 10280 |
+
"loss": 0.5367,
|
| 10281 |
+
"step": 1456
|
| 10282 |
+
},
|
| 10283 |
+
{
|
| 10284 |
+
"epoch": 0.49824741386680343,
|
| 10285 |
+
"grad_norm": 0.2881573438644409,
|
| 10286 |
+
"learning_rate": 5.68960647163097e-06,
|
| 10287 |
+
"loss": 0.4381,
|
| 10288 |
+
"step": 1457
|
| 10289 |
+
},
|
| 10290 |
+
{
|
| 10291 |
+
"epoch": 0.498589381892793,
|
| 10292 |
+
"grad_norm": 0.4344151020050049,
|
| 10293 |
+
"learning_rate": 5.625383072825429e-06,
|
| 10294 |
+
"loss": 0.5184,
|
| 10295 |
+
"step": 1458
|
| 10296 |
+
},
|
| 10297 |
+
{
|
| 10298 |
+
"epoch": 0.4989313499187826,
|
| 10299 |
+
"grad_norm": 0.40367579460144043,
|
| 10300 |
+
"learning_rate": 5.561513714655419e-06,
|
| 10301 |
+
"loss": 0.6386,
|
| 10302 |
+
"step": 1459
|
| 10303 |
+
},
|
| 10304 |
+
{
|
| 10305 |
+
"epoch": 0.49927331794477214,
|
| 10306 |
+
"grad_norm": 0.5705030560493469,
|
| 10307 |
+
"learning_rate": 5.4979986367229564e-06,
|
| 10308 |
+
"loss": 0.5825,
|
| 10309 |
+
"step": 1460
|
| 10310 |
+
},
|
| 10311 |
+
{
|
| 10312 |
+
"epoch": 0.4996152859707617,
|
| 10313 |
+
"grad_norm": 0.3498372435569763,
|
| 10314 |
+
"learning_rate": 5.4348380773010075e-06,
|
| 10315 |
+
"loss": 0.4944,
|
| 10316 |
+
"step": 1461
|
| 10317 |
+
},
|
| 10318 |
+
{
|
| 10319 |
+
"epoch": 0.4999572539967513,
|
| 10320 |
+
"grad_norm": 0.4295266270637512,
|
| 10321 |
+
"learning_rate": 5.37203227333255e-06,
|
| 10322 |
+
"loss": 0.4896,
|
| 10323 |
+
"step": 1462
|
| 10324 |
+
},
|
| 10325 |
+
{
|
| 10326 |
+
"epoch": 0.5002992220227409,
|
| 10327 |
+
"grad_norm": 0.4837034344673157,
|
| 10328 |
+
"learning_rate": 5.3095814604297574e-06,
|
| 10329 |
+
"loss": 0.5036,
|
| 10330 |
+
"step": 1463
|
| 10331 |
+
},
|
| 10332 |
+
{
|
| 10333 |
+
"epoch": 0.5006411900487304,
|
| 10334 |
+
"grad_norm": 0.31095609068870544,
|
| 10335 |
+
"learning_rate": 5.247485872873026e-06,
|
| 10336 |
+
"loss": 0.4891,
|
| 10337 |
+
"step": 1464
|
| 10338 |
+
},
|
| 10339 |
+
{
|
| 10340 |
+
"epoch": 0.50098315807472,
|
| 10341 |
+
"grad_norm": 0.3047396242618561,
|
| 10342 |
+
"learning_rate": 5.185745743610215e-06,
|
| 10343 |
+
"loss": 0.5915,
|
| 10344 |
+
"step": 1465
|
| 10345 |
+
},
|
| 10346 |
+
{
|
| 10347 |
+
"epoch": 0.5013251261007096,
|
| 10348 |
+
"grad_norm": 0.471611350774765,
|
| 10349 |
+
"learning_rate": 5.124361304255632e-06,
|
| 10350 |
+
"loss": 0.5939,
|
| 10351 |
+
"step": 1466
|
| 10352 |
+
},
|
| 10353 |
+
{
|
| 10354 |
+
"epoch": 0.5016670941266992,
|
| 10355 |
+
"grad_norm": 0.526499330997467,
|
| 10356 |
+
"learning_rate": 5.063332785089281e-06,
|
| 10357 |
+
"loss": 0.4065,
|
| 10358 |
+
"step": 1467
|
| 10359 |
+
},
|
| 10360 |
+
{
|
| 10361 |
+
"epoch": 0.5020090621526887,
|
| 10362 |
+
"grad_norm": 0.34760183095932007,
|
| 10363 |
+
"learning_rate": 5.002660415055949e-06,
|
| 10364 |
+
"loss": 0.3351,
|
| 10365 |
+
"step": 1468
|
| 10366 |
+
},
|
| 10367 |
+
{
|
| 10368 |
+
"epoch": 0.5023510301786783,
|
| 10369 |
+
"grad_norm": 0.3515227138996124,
|
| 10370 |
+
"learning_rate": 4.942344421764322e-06,
|
| 10371 |
+
"loss": 0.4433,
|
| 10372 |
+
"step": 1469
|
| 10373 |
+
},
|
| 10374 |
+
{
|
| 10375 |
+
"epoch": 0.5026929982046678,
|
| 10376 |
+
"grad_norm": 0.38687199354171753,
|
| 10377 |
+
"learning_rate": 4.8823850314861915e-06,
|
| 10378 |
+
"loss": 0.562,
|
| 10379 |
+
"step": 1470
|
| 10380 |
+
},
|
| 10381 |
+
{
|
| 10382 |
+
"epoch": 0.5030349662306575,
|
| 10383 |
+
"grad_norm": 0.517970860004425,
|
| 10384 |
+
"learning_rate": 4.822782469155573e-06,
|
| 10385 |
+
"loss": 0.4598,
|
| 10386 |
+
"step": 1471
|
| 10387 |
+
},
|
| 10388 |
+
{
|
| 10389 |
+
"epoch": 0.503376934256647,
|
| 10390 |
+
"grad_norm": 0.40455347299575806,
|
| 10391 |
+
"learning_rate": 4.7635369583678425e-06,
|
| 10392 |
+
"loss": 0.5014,
|
| 10393 |
+
"step": 1472
|
| 10394 |
+
},
|
| 10395 |
+
{
|
| 10396 |
+
"epoch": 0.5037189022826366,
|
| 10397 |
+
"grad_norm": 0.38876280188560486,
|
| 10398 |
+
"learning_rate": 4.7046487213789344e-06,
|
| 10399 |
+
"loss": 0.5155,
|
| 10400 |
+
"step": 1473
|
| 10401 |
+
},
|
| 10402 |
+
{
|
| 10403 |
+
"epoch": 0.5040608703086261,
|
| 10404 |
+
"grad_norm": 0.4026382565498352,
|
| 10405 |
+
"learning_rate": 4.646117979104481e-06,
|
| 10406 |
+
"loss": 0.63,
|
| 10407 |
+
"step": 1474
|
| 10408 |
+
},
|
| 10409 |
+
{
|
| 10410 |
+
"epoch": 0.5044028383346157,
|
| 10411 |
+
"grad_norm": 0.398532897233963,
|
| 10412 |
+
"learning_rate": 4.587944951118994e-06,
|
| 10413 |
+
"loss": 0.5957,
|
| 10414 |
+
"step": 1475
|
| 10415 |
+
},
|
| 10416 |
+
{
|
| 10417 |
+
"epoch": 0.5047448063606053,
|
| 10418 |
+
"grad_norm": 0.4192051291465759,
|
| 10419 |
+
"learning_rate": 4.5301298556550746e-06,
|
| 10420 |
+
"loss": 0.6054,
|
| 10421 |
+
"step": 1476
|
| 10422 |
+
},
|
| 10423 |
+
{
|
| 10424 |
+
"epoch": 0.5050867743865949,
|
| 10425 |
+
"grad_norm": 0.47235310077667236,
|
| 10426 |
+
"learning_rate": 4.472672909602527e-06,
|
| 10427 |
+
"loss": 0.5894,
|
| 10428 |
+
"step": 1477
|
| 10429 |
+
},
|
| 10430 |
+
{
|
| 10431 |
+
"epoch": 0.5054287424125844,
|
| 10432 |
+
"grad_norm": 0.33159035444259644,
|
| 10433 |
+
"learning_rate": 4.415574328507577e-06,
|
| 10434 |
+
"loss": 0.4497,
|
| 10435 |
+
"step": 1478
|
| 10436 |
+
},
|
| 10437 |
+
{
|
| 10438 |
+
"epoch": 0.505770710438574,
|
| 10439 |
+
"grad_norm": 0.3283701241016388,
|
| 10440 |
+
"learning_rate": 4.358834326572092e-06,
|
| 10441 |
+
"loss": 0.5675,
|
| 10442 |
+
"step": 1479
|
| 10443 |
+
},
|
| 10444 |
+
{
|
| 10445 |
+
"epoch": 0.5061126784645635,
|
| 10446 |
+
"grad_norm": 0.4659315347671509,
|
| 10447 |
+
"learning_rate": 4.3024531166527495e-06,
|
| 10448 |
+
"loss": 0.4861,
|
| 10449 |
+
"step": 1480
|
| 10450 |
+
},
|
| 10451 |
+
{
|
| 10452 |
+
"epoch": 0.5064546464905532,
|
| 10453 |
+
"grad_norm": 0.424280047416687,
|
| 10454 |
+
"learning_rate": 4.246430910260191e-06,
|
| 10455 |
+
"loss": 0.6762,
|
| 10456 |
+
"step": 1481
|
| 10457 |
+
},
|
| 10458 |
+
{
|
| 10459 |
+
"epoch": 0.5067966145165427,
|
| 10460 |
+
"grad_norm": 0.4000745713710785,
|
| 10461 |
+
"learning_rate": 4.190767917558369e-06,
|
| 10462 |
+
"loss": 0.6834,
|
| 10463 |
+
"step": 1482
|
| 10464 |
+
},
|
| 10465 |
+
{
|
| 10466 |
+
"epoch": 0.5071385825425323,
|
| 10467 |
+
"grad_norm": 0.49172914028167725,
|
| 10468 |
+
"learning_rate": 4.135464347363571e-06,
|
| 10469 |
+
"loss": 0.6214,
|
| 10470 |
+
"step": 1483
|
| 10471 |
+
},
|
| 10472 |
+
{
|
| 10473 |
+
"epoch": 0.5074805505685218,
|
| 10474 |
+
"grad_norm": 0.35509413480758667,
|
| 10475 |
+
"learning_rate": 4.080520407143795e-06,
|
| 10476 |
+
"loss": 0.5186,
|
| 10477 |
+
"step": 1484
|
| 10478 |
+
},
|
| 10479 |
+
{
|
| 10480 |
+
"epoch": 0.5078225185945114,
|
| 10481 |
+
"grad_norm": 0.3477623164653778,
|
| 10482 |
+
"learning_rate": 4.025936303017897e-06,
|
| 10483 |
+
"loss": 0.4725,
|
| 10484 |
+
"step": 1485
|
| 10485 |
+
},
|
| 10486 |
+
{
|
| 10487 |
+
"epoch": 0.508164486620501,
|
| 10488 |
+
"grad_norm": 0.3009217381477356,
|
| 10489 |
+
"learning_rate": 3.97171223975481e-06,
|
| 10490 |
+
"loss": 0.4228,
|
| 10491 |
+
"step": 1486
|
| 10492 |
+
},
|
| 10493 |
+
{
|
| 10494 |
+
"epoch": 0.5085064546464906,
|
| 10495 |
+
"grad_norm": 0.30407074093818665,
|
| 10496 |
+
"learning_rate": 3.917848420772818e-06,
|
| 10497 |
+
"loss": 0.4755,
|
| 10498 |
+
"step": 1487
|
| 10499 |
+
},
|
| 10500 |
+
{
|
| 10501 |
+
"epoch": 0.5088484226724801,
|
| 10502 |
+
"grad_norm": 0.5132246613502502,
|
| 10503 |
+
"learning_rate": 3.864345048138751e-06,
|
| 10504 |
+
"loss": 0.5118,
|
| 10505 |
+
"step": 1488
|
| 10506 |
+
},
|
| 10507 |
+
{
|
| 10508 |
+
"epoch": 0.5091903906984697,
|
| 10509 |
+
"grad_norm": 0.5252590775489807,
|
| 10510 |
+
"learning_rate": 3.8112023225672755e-06,
|
| 10511 |
+
"loss": 0.5199,
|
| 10512 |
+
"step": 1489
|
| 10513 |
+
},
|
| 10514 |
+
{
|
| 10515 |
+
"epoch": 0.5095323587244592,
|
| 10516 |
+
"grad_norm": 0.4393605589866638,
|
| 10517 |
+
"learning_rate": 3.758420443420085e-06,
|
| 10518 |
+
"loss": 0.5528,
|
| 10519 |
+
"step": 1490
|
| 10520 |
+
},
|
| 10521 |
+
{
|
| 10522 |
+
"epoch": 0.5098743267504489,
|
| 10523 |
+
"grad_norm": 0.591144323348999,
|
| 10524 |
+
"learning_rate": 3.7059996087051795e-06,
|
| 10525 |
+
"loss": 0.7475,
|
| 10526 |
+
"step": 1491
|
| 10527 |
+
},
|
| 10528 |
+
{
|
| 10529 |
+
"epoch": 0.5102162947764384,
|
| 10530 |
+
"grad_norm": 0.5281643867492676,
|
| 10531 |
+
"learning_rate": 3.6539400150761515e-06,
|
| 10532 |
+
"loss": 0.6121,
|
| 10533 |
+
"step": 1492
|
| 10534 |
+
},
|
| 10535 |
+
{
|
| 10536 |
+
"epoch": 0.510558262802428,
|
| 10537 |
+
"grad_norm": 0.241044819355011,
|
| 10538 |
+
"learning_rate": 3.60224185783139e-06,
|
| 10539 |
+
"loss": 0.3838,
|
| 10540 |
+
"step": 1493
|
| 10541 |
+
},
|
| 10542 |
+
{
|
| 10543 |
+
"epoch": 0.5109002308284175,
|
| 10544 |
+
"grad_norm": 0.5283997058868408,
|
| 10545 |
+
"learning_rate": 3.5509053309133897e-06,
|
| 10546 |
+
"loss": 0.4947,
|
| 10547 |
+
"step": 1494
|
| 10548 |
+
},
|
| 10549 |
+
{
|
| 10550 |
+
"epoch": 0.5112421988544071,
|
| 10551 |
+
"grad_norm": 0.5331889986991882,
|
| 10552 |
+
"learning_rate": 3.49993062690801e-06,
|
| 10553 |
+
"loss": 0.5312,
|
| 10554 |
+
"step": 1495
|
| 10555 |
+
},
|
| 10556 |
+
{
|
| 10557 |
+
"epoch": 0.5115841668803967,
|
| 10558 |
+
"grad_norm": 0.5044165253639221,
|
| 10559 |
+
"learning_rate": 3.449317937043728e-06,
|
| 10560 |
+
"loss": 0.4942,
|
| 10561 |
+
"step": 1496
|
| 10562 |
+
},
|
| 10563 |
+
{
|
| 10564 |
+
"epoch": 0.5119261349063863,
|
| 10565 |
+
"grad_norm": 0.38311871886253357,
|
| 10566 |
+
"learning_rate": 3.399067451191007e-06,
|
| 10567 |
+
"loss": 0.594,
|
| 10568 |
+
"step": 1497
|
| 10569 |
+
},
|
| 10570 |
+
{
|
| 10571 |
+
"epoch": 0.5122681029323758,
|
| 10572 |
+
"grad_norm": 0.4012651741504669,
|
| 10573 |
+
"learning_rate": 3.349179357861487e-06,
|
| 10574 |
+
"loss": 0.4338,
|
| 10575 |
+
"step": 1498
|
| 10576 |
+
},
|
| 10577 |
+
{
|
| 10578 |
+
"epoch": 0.5126100709583654,
|
| 10579 |
+
"grad_norm": 0.38673850893974304,
|
| 10580 |
+
"learning_rate": 3.2996538442072844e-06,
|
| 10581 |
+
"loss": 0.5927,
|
| 10582 |
+
"step": 1499
|
| 10583 |
+
},
|
| 10584 |
+
{
|
| 10585 |
+
"epoch": 0.5129520389843549,
|
| 10586 |
+
"grad_norm": 0.3748995363712311,
|
| 10587 |
+
"learning_rate": 3.2504910960203694e-06,
|
| 10588 |
+
"loss": 0.3867,
|
| 10589 |
+
"step": 1500
|
| 10590 |
+
},
|
| 10591 |
+
{
|
| 10592 |
+
"epoch": 0.5129520389843549,
|
| 10593 |
+
"eval_loss": 0.5886463522911072,
|
| 10594 |
+
"eval_runtime": 231.3531,
|
| 10595 |
+
"eval_samples_per_second": 5.325,
|
| 10596 |
+
"eval_steps_per_second": 2.663,
|
| 10597 |
+
"step": 1500
|
| 10598 |
}
|
| 10599 |
],
|
| 10600 |
"logging_steps": 1,
|
|
|
|
| 10623 |
"attributes": {}
|
| 10624 |
}
|
| 10625 |
},
|
| 10626 |
+
"total_flos": 1.3256905345676083e+18,
|
| 10627 |
"train_batch_size": 2,
|
| 10628 |
"trial_name": null,
|
| 10629 |
"trial_params": null
|