Training in progress, step 6200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 21253336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:83a7c07e51238afaa7c40d988f34e3738452e0f7d0951dbdfd2d1a2ee920e6f4
|
| 3 |
size 21253336
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 10952762
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca245b5c5818c369c760d40c19814c70787271c71cb16b41a34ef13138831071
|
| 3 |
size 10952762
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:004276e1bb014253bb15da64b4c20e1898f1faddc15b7d64ac8624fcece1991d
|
| 3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:48d5aa75a71817f6d3ca1f492d6f89412df01d1749b3c6a5faf3ff446d681f07
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.12408096343278885,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-5600",
|
| 4 |
-
"epoch": 2.
|
| 5 |
"eval_steps": 200,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -42255,6 +42255,1414 @@
|
|
| 42255 |
"eval_samples_per_second": 157.556,
|
| 42256 |
"eval_steps_per_second": 6.584,
|
| 42257 |
"step": 6000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42258 |
}
|
| 42259 |
],
|
| 42260 |
"logging_steps": 1,
|
|
@@ -42269,7 +43677,7 @@
|
|
| 42269 |
"early_stopping_threshold": 0.0
|
| 42270 |
},
|
| 42271 |
"attributes": {
|
| 42272 |
-
"early_stopping_patience_counter":
|
| 42273 |
}
|
| 42274 |
},
|
| 42275 |
"TrainerControl": {
|
|
@@ -42278,12 +43686,12 @@
|
|
| 42278 |
"should_evaluate": false,
|
| 42279 |
"should_log": false,
|
| 42280 |
"should_save": true,
|
| 42281 |
-
"should_training_stop":
|
| 42282 |
},
|
| 42283 |
"attributes": {}
|
| 42284 |
}
|
| 42285 |
},
|
| 42286 |
-
"total_flos": 1.
|
| 42287 |
"train_batch_size": 24,
|
| 42288 |
"trial_name": null,
|
| 42289 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
"best_metric": 0.12408096343278885,
|
| 3 |
"best_model_checkpoint": "miner_id_24/checkpoint-5600",
|
| 4 |
+
"epoch": 2.9775483251290673,
|
| 5 |
"eval_steps": 200,
|
| 6 |
+
"global_step": 6200,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 42255 |
"eval_samples_per_second": 157.556,
|
| 42256 |
"eval_steps_per_second": 6.584,
|
| 42257 |
"step": 6000
|
| 42258 |
+
},
|
| 42259 |
+
{
|
| 42260 |
+
"epoch": 2.8819786288870213,
|
| 42261 |
+
"grad_norm": 1.059691309928894,
|
| 42262 |
+
"learning_rate": 7.656405705057435e-07,
|
| 42263 |
+
"loss": 0.5278,
|
| 42264 |
+
"step": 6001
|
| 42265 |
+
},
|
| 42266 |
+
{
|
| 42267 |
+
"epoch": 2.8824588786168808,
|
| 42268 |
+
"grad_norm": 0.8245054483413696,
|
| 42269 |
+
"learning_rate": 7.594111053758623e-07,
|
| 42270 |
+
"loss": 0.4993,
|
| 42271 |
+
"step": 6002
|
| 42272 |
+
},
|
| 42273 |
+
{
|
| 42274 |
+
"epoch": 2.8829391283467403,
|
| 42275 |
+
"grad_norm": 1.2777291536331177,
|
| 42276 |
+
"learning_rate": 7.532069896093675e-07,
|
| 42277 |
+
"loss": 0.5102,
|
| 42278 |
+
"step": 6003
|
| 42279 |
+
},
|
| 42280 |
+
{
|
| 42281 |
+
"epoch": 2.8834193780765998,
|
| 42282 |
+
"grad_norm": 1.0950323343276978,
|
| 42283 |
+
"learning_rate": 7.470282247910132e-07,
|
| 42284 |
+
"loss": 0.4979,
|
| 42285 |
+
"step": 6004
|
| 42286 |
+
},
|
| 42287 |
+
{
|
| 42288 |
+
"epoch": 2.8838996278064593,
|
| 42289 |
+
"grad_norm": 0.9586261510848999,
|
| 42290 |
+
"learning_rate": 7.408748124990594e-07,
|
| 42291 |
+
"loss": 0.4817,
|
| 42292 |
+
"step": 6005
|
| 42293 |
+
},
|
| 42294 |
+
{
|
| 42295 |
+
"epoch": 2.8843798775363187,
|
| 42296 |
+
"grad_norm": 1.327382206916809,
|
| 42297 |
+
"learning_rate": 7.347467543052932e-07,
|
| 42298 |
+
"loss": 0.544,
|
| 42299 |
+
"step": 6006
|
| 42300 |
+
},
|
| 42301 |
+
{
|
| 42302 |
+
"epoch": 2.8848601272661787,
|
| 42303 |
+
"grad_norm": 0.7627050876617432,
|
| 42304 |
+
"learning_rate": 7.286440517750181e-07,
|
| 42305 |
+
"loss": 0.4609,
|
| 42306 |
+
"step": 6007
|
| 42307 |
+
},
|
| 42308 |
+
{
|
| 42309 |
+
"epoch": 2.885340376996038,
|
| 42310 |
+
"grad_norm": 0.9781989455223083,
|
| 42311 |
+
"learning_rate": 7.22566706467076e-07,
|
| 42312 |
+
"loss": 0.5256,
|
| 42313 |
+
"step": 6008
|
| 42314 |
+
},
|
| 42315 |
+
{
|
| 42316 |
+
"epoch": 2.8858206267258977,
|
| 42317 |
+
"grad_norm": 1.4245251417160034,
|
| 42318 |
+
"learning_rate": 7.16514719933803e-07,
|
| 42319 |
+
"loss": 0.5307,
|
| 42320 |
+
"step": 6009
|
| 42321 |
+
},
|
| 42322 |
+
{
|
| 42323 |
+
"epoch": 2.886300876455757,
|
| 42324 |
+
"grad_norm": 1.1358163356781006,
|
| 42325 |
+
"learning_rate": 7.104880937211178e-07,
|
| 42326 |
+
"loss": 0.5862,
|
| 42327 |
+
"step": 6010
|
| 42328 |
+
},
|
| 42329 |
+
{
|
| 42330 |
+
"epoch": 2.8867811261856167,
|
| 42331 |
+
"grad_norm": 0.8729516863822937,
|
| 42332 |
+
"learning_rate": 7.044868293683893e-07,
|
| 42333 |
+
"loss": 0.4935,
|
| 42334 |
+
"step": 6011
|
| 42335 |
+
},
|
| 42336 |
+
{
|
| 42337 |
+
"epoch": 2.887261375915476,
|
| 42338 |
+
"grad_norm": 1.374247670173645,
|
| 42339 |
+
"learning_rate": 6.985109284085578e-07,
|
| 42340 |
+
"loss": 0.5493,
|
| 42341 |
+
"step": 6012
|
| 42342 |
+
},
|
| 42343 |
+
{
|
| 42344 |
+
"epoch": 2.8877416256453357,
|
| 42345 |
+
"grad_norm": 1.301226258277893,
|
| 42346 |
+
"learning_rate": 6.925603923680579e-07,
|
| 42347 |
+
"loss": 0.5663,
|
| 42348 |
+
"step": 6013
|
| 42349 |
+
},
|
| 42350 |
+
{
|
| 42351 |
+
"epoch": 2.888221875375195,
|
| 42352 |
+
"grad_norm": 1.1913409233093262,
|
| 42353 |
+
"learning_rate": 6.866352227668626e-07,
|
| 42354 |
+
"loss": 0.5533,
|
| 42355 |
+
"step": 6014
|
| 42356 |
+
},
|
| 42357 |
+
{
|
| 42358 |
+
"epoch": 2.8887021251050546,
|
| 42359 |
+
"grad_norm": 0.9043430685997009,
|
| 42360 |
+
"learning_rate": 6.807354211184613e-07,
|
| 42361 |
+
"loss": 0.4747,
|
| 42362 |
+
"step": 6015
|
| 42363 |
+
},
|
| 42364 |
+
{
|
| 42365 |
+
"epoch": 2.889182374834914,
|
| 42366 |
+
"grad_norm": 0.9711902737617493,
|
| 42367 |
+
"learning_rate": 6.748609889298596e-07,
|
| 42368 |
+
"loss": 0.5061,
|
| 42369 |
+
"step": 6016
|
| 42370 |
+
},
|
| 42371 |
+
{
|
| 42372 |
+
"epoch": 2.8896626245647736,
|
| 42373 |
+
"grad_norm": 1.0060513019561768,
|
| 42374 |
+
"learning_rate": 6.690119277015683e-07,
|
| 42375 |
+
"loss": 0.4921,
|
| 42376 |
+
"step": 6017
|
| 42377 |
+
},
|
| 42378 |
+
{
|
| 42379 |
+
"epoch": 2.890142874294633,
|
| 42380 |
+
"grad_norm": 1.0134211778640747,
|
| 42381 |
+
"learning_rate": 6.631882389276478e-07,
|
| 42382 |
+
"loss": 0.5235,
|
| 42383 |
+
"step": 6018
|
| 42384 |
+
},
|
| 42385 |
+
{
|
| 42386 |
+
"epoch": 2.8906231240244926,
|
| 42387 |
+
"grad_norm": 0.9697439670562744,
|
| 42388 |
+
"learning_rate": 6.573899240956527e-07,
|
| 42389 |
+
"loss": 0.4913,
|
| 42390 |
+
"step": 6019
|
| 42391 |
+
},
|
| 42392 |
+
{
|
| 42393 |
+
"epoch": 2.891103373754352,
|
| 42394 |
+
"grad_norm": 0.8868252635002136,
|
| 42395 |
+
"learning_rate": 6.51616984686676e-07,
|
| 42396 |
+
"loss": 0.4761,
|
| 42397 |
+
"step": 6020
|
| 42398 |
+
},
|
| 42399 |
+
{
|
| 42400 |
+
"epoch": 2.8915836234842116,
|
| 42401 |
+
"grad_norm": 0.9749808311462402,
|
| 42402 |
+
"learning_rate": 6.45869422175327e-07,
|
| 42403 |
+
"loss": 0.4921,
|
| 42404 |
+
"step": 6021
|
| 42405 |
+
},
|
| 42406 |
+
{
|
| 42407 |
+
"epoch": 2.892063873214071,
|
| 42408 |
+
"grad_norm": 0.9134318232536316,
|
| 42409 |
+
"learning_rate": 6.401472380297091e-07,
|
| 42410 |
+
"loss": 0.4945,
|
| 42411 |
+
"step": 6022
|
| 42412 |
+
},
|
| 42413 |
+
{
|
| 42414 |
+
"epoch": 2.8925441229439306,
|
| 42415 |
+
"grad_norm": 1.094684362411499,
|
| 42416 |
+
"learning_rate": 6.344504337114643e-07,
|
| 42417 |
+
"loss": 0.5318,
|
| 42418 |
+
"step": 6023
|
| 42419 |
+
},
|
| 42420 |
+
{
|
| 42421 |
+
"epoch": 2.8930243726737905,
|
| 42422 |
+
"grad_norm": 1.2772841453552246,
|
| 42423 |
+
"learning_rate": 6.287790106757396e-07,
|
| 42424 |
+
"loss": 0.4856,
|
| 42425 |
+
"step": 6024
|
| 42426 |
+
},
|
| 42427 |
+
{
|
| 42428 |
+
"epoch": 2.89350462240365,
|
| 42429 |
+
"grad_norm": 0.9391446113586426,
|
| 42430 |
+
"learning_rate": 6.231329703712207e-07,
|
| 42431 |
+
"loss": 0.525,
|
| 42432 |
+
"step": 6025
|
| 42433 |
+
},
|
| 42434 |
+
{
|
| 42435 |
+
"epoch": 2.8939848721335095,
|
| 42436 |
+
"grad_norm": 1.024240493774414,
|
| 42437 |
+
"learning_rate": 6.175123142400985e-07,
|
| 42438 |
+
"loss": 0.495,
|
| 42439 |
+
"step": 6026
|
| 42440 |
+
},
|
| 42441 |
+
{
|
| 42442 |
+
"epoch": 2.894465121863369,
|
| 42443 |
+
"grad_norm": 0.9741129279136658,
|
| 42444 |
+
"learning_rate": 6.119170437180466e-07,
|
| 42445 |
+
"loss": 0.5282,
|
| 42446 |
+
"step": 6027
|
| 42447 |
+
},
|
| 42448 |
+
{
|
| 42449 |
+
"epoch": 2.8949453715932285,
|
| 42450 |
+
"grad_norm": 0.9005728363990784,
|
| 42451 |
+
"learning_rate": 6.063471602343218e-07,
|
| 42452 |
+
"loss": 0.5017,
|
| 42453 |
+
"step": 6028
|
| 42454 |
+
},
|
| 42455 |
+
{
|
| 42456 |
+
"epoch": 2.895425621323088,
|
| 42457 |
+
"grad_norm": 1.0421838760375977,
|
| 42458 |
+
"learning_rate": 6.008026652116305e-07,
|
| 42459 |
+
"loss": 0.4854,
|
| 42460 |
+
"step": 6029
|
| 42461 |
+
},
|
| 42462 |
+
{
|
| 42463 |
+
"epoch": 2.8959058710529475,
|
| 42464 |
+
"grad_norm": 0.951676070690155,
|
| 42465 |
+
"learning_rate": 5.952835600662288e-07,
|
| 42466 |
+
"loss": 0.4992,
|
| 42467 |
+
"step": 6030
|
| 42468 |
+
},
|
| 42469 |
+
{
|
| 42470 |
+
"epoch": 2.896386120782807,
|
| 42471 |
+
"grad_norm": 0.9691091775894165,
|
| 42472 |
+
"learning_rate": 5.897898462078888e-07,
|
| 42473 |
+
"loss": 0.5214,
|
| 42474 |
+
"step": 6031
|
| 42475 |
+
},
|
| 42476 |
+
{
|
| 42477 |
+
"epoch": 2.8968663705126665,
|
| 42478 |
+
"grad_norm": 0.8523342609405518,
|
| 42479 |
+
"learning_rate": 5.843215250398882e-07,
|
| 42480 |
+
"loss": 0.4728,
|
| 42481 |
+
"step": 6032
|
| 42482 |
+
},
|
| 42483 |
+
{
|
| 42484 |
+
"epoch": 2.897346620242526,
|
| 42485 |
+
"grad_norm": 0.9989683032035828,
|
| 42486 |
+
"learning_rate": 5.788785979590095e-07,
|
| 42487 |
+
"loss": 0.4664,
|
| 42488 |
+
"step": 6033
|
| 42489 |
+
},
|
| 42490 |
+
{
|
| 42491 |
+
"epoch": 2.8978268699723855,
|
| 42492 |
+
"grad_norm": 1.4147025346755981,
|
| 42493 |
+
"learning_rate": 5.734610663555628e-07,
|
| 42494 |
+
"loss": 0.5576,
|
| 42495 |
+
"step": 6034
|
| 42496 |
+
},
|
| 42497 |
+
{
|
| 42498 |
+
"epoch": 2.8983071197022454,
|
| 42499 |
+
"grad_norm": 1.0927692651748657,
|
| 42500 |
+
"learning_rate": 5.680689316133636e-07,
|
| 42501 |
+
"loss": 0.5354,
|
| 42502 |
+
"step": 6035
|
| 42503 |
+
},
|
| 42504 |
+
{
|
| 42505 |
+
"epoch": 2.898787369432105,
|
| 42506 |
+
"grad_norm": 1.0628910064697266,
|
| 42507 |
+
"learning_rate": 5.627021951097545e-07,
|
| 42508 |
+
"loss": 0.4913,
|
| 42509 |
+
"step": 6036
|
| 42510 |
+
},
|
| 42511 |
+
{
|
| 42512 |
+
"epoch": 2.8992676191619644,
|
| 42513 |
+
"grad_norm": 0.9283753037452698,
|
| 42514 |
+
"learning_rate": 5.573608582155721e-07,
|
| 42515 |
+
"loss": 0.5329,
|
| 42516 |
+
"step": 6037
|
| 42517 |
+
},
|
| 42518 |
+
{
|
| 42519 |
+
"epoch": 2.899747868891824,
|
| 42520 |
+
"grad_norm": 0.8634472489356995,
|
| 42521 |
+
"learning_rate": 5.520449222951585e-07,
|
| 42522 |
+
"loss": 0.4574,
|
| 42523 |
+
"step": 6038
|
| 42524 |
+
},
|
| 42525 |
+
{
|
| 42526 |
+
"epoch": 2.9002281186216834,
|
| 42527 |
+
"grad_norm": 0.9945970177650452,
|
| 42528 |
+
"learning_rate": 5.467543887064053e-07,
|
| 42529 |
+
"loss": 0.4786,
|
| 42530 |
+
"step": 6039
|
| 42531 |
+
},
|
| 42532 |
+
{
|
| 42533 |
+
"epoch": 2.900708368351543,
|
| 42534 |
+
"grad_norm": 1.0942494869232178,
|
| 42535 |
+
"learning_rate": 5.41489258800687e-07,
|
| 42536 |
+
"loss": 0.5294,
|
| 42537 |
+
"step": 6040
|
| 42538 |
+
},
|
| 42539 |
+
{
|
| 42540 |
+
"epoch": 2.9011886180814024,
|
| 42541 |
+
"grad_norm": 1.1848549842834473,
|
| 42542 |
+
"learning_rate": 5.362495339228834e-07,
|
| 42543 |
+
"loss": 0.5498,
|
| 42544 |
+
"step": 6041
|
| 42545 |
+
},
|
| 42546 |
+
{
|
| 42547 |
+
"epoch": 2.901668867811262,
|
| 42548 |
+
"grad_norm": 1.313307762145996,
|
| 42549 |
+
"learning_rate": 5.310352154113907e-07,
|
| 42550 |
+
"loss": 0.524,
|
| 42551 |
+
"step": 6042
|
| 42552 |
+
},
|
| 42553 |
+
{
|
| 42554 |
+
"epoch": 2.9021491175411214,
|
| 42555 |
+
"grad_norm": 1.1878790855407715,
|
| 42556 |
+
"learning_rate": 5.258463045981432e-07,
|
| 42557 |
+
"loss": 0.5353,
|
| 42558 |
+
"step": 6043
|
| 42559 |
+
},
|
| 42560 |
+
{
|
| 42561 |
+
"epoch": 2.902629367270981,
|
| 42562 |
+
"grad_norm": 1.283842921257019,
|
| 42563 |
+
"learning_rate": 5.206828028085364e-07,
|
| 42564 |
+
"loss": 0.5463,
|
| 42565 |
+
"step": 6044
|
| 42566 |
+
},
|
| 42567 |
+
{
|
| 42568 |
+
"epoch": 2.9031096170008404,
|
| 42569 |
+
"grad_norm": 1.003977656364441,
|
| 42570 |
+
"learning_rate": 5.155447113615153e-07,
|
| 42571 |
+
"loss": 0.4998,
|
| 42572 |
+
"step": 6045
|
| 42573 |
+
},
|
| 42574 |
+
{
|
| 42575 |
+
"epoch": 2.9035898667307,
|
| 42576 |
+
"grad_norm": 0.9022426009178162,
|
| 42577 |
+
"learning_rate": 5.104320315695188e-07,
|
| 42578 |
+
"loss": 0.5119,
|
| 42579 |
+
"step": 6046
|
| 42580 |
+
},
|
| 42581 |
+
{
|
| 42582 |
+
"epoch": 2.9040701164605593,
|
| 42583 |
+
"grad_norm": 0.957444965839386,
|
| 42584 |
+
"learning_rate": 5.053447647385023e-07,
|
| 42585 |
+
"loss": 0.4884,
|
| 42586 |
+
"step": 6047
|
| 42587 |
+
},
|
| 42588 |
+
{
|
| 42589 |
+
"epoch": 2.904550366190419,
|
| 42590 |
+
"grad_norm": 0.9504349827766418,
|
| 42591 |
+
"learning_rate": 5.002829121679153e-07,
|
| 42592 |
+
"loss": 0.532,
|
| 42593 |
+
"step": 6048
|
| 42594 |
+
},
|
| 42595 |
+
{
|
| 42596 |
+
"epoch": 2.9050306159202783,
|
| 42597 |
+
"grad_norm": 1.0048445463180542,
|
| 42598 |
+
"learning_rate": 4.952464751507235e-07,
|
| 42599 |
+
"loss": 0.5297,
|
| 42600 |
+
"step": 6049
|
| 42601 |
+
},
|
| 42602 |
+
{
|
| 42603 |
+
"epoch": 2.905510865650138,
|
| 42604 |
+
"grad_norm": 0.8786672949790955,
|
| 42605 |
+
"learning_rate": 4.902354549733978e-07,
|
| 42606 |
+
"loss": 0.4675,
|
| 42607 |
+
"step": 6050
|
| 42608 |
+
},
|
| 42609 |
+
{
|
| 42610 |
+
"epoch": 2.9059911153799973,
|
| 42611 |
+
"grad_norm": 1.1563762426376343,
|
| 42612 |
+
"learning_rate": 4.852498529159366e-07,
|
| 42613 |
+
"loss": 0.5548,
|
| 42614 |
+
"step": 6051
|
| 42615 |
+
},
|
| 42616 |
+
{
|
| 42617 |
+
"epoch": 2.9064713651098573,
|
| 42618 |
+
"grad_norm": 1.21589195728302,
|
| 42619 |
+
"learning_rate": 4.802896702518101e-07,
|
| 42620 |
+
"loss": 0.5141,
|
| 42621 |
+
"step": 6052
|
| 42622 |
+
},
|
| 42623 |
+
{
|
| 42624 |
+
"epoch": 2.9069516148397168,
|
| 42625 |
+
"grad_norm": 1.1137498617172241,
|
| 42626 |
+
"learning_rate": 4.7535490824802686e-07,
|
| 42627 |
+
"loss": 0.4656,
|
| 42628 |
+
"step": 6053
|
| 42629 |
+
},
|
| 42630 |
+
{
|
| 42631 |
+
"epoch": 2.9074318645695763,
|
| 42632 |
+
"grad_norm": 0.9434409737586975,
|
| 42633 |
+
"learning_rate": 4.704455681650788e-07,
|
| 42634 |
+
"loss": 0.5169,
|
| 42635 |
+
"step": 6054
|
| 42636 |
+
},
|
| 42637 |
+
{
|
| 42638 |
+
"epoch": 2.9079121142994357,
|
| 42639 |
+
"grad_norm": 1.3749167919158936,
|
| 42640 |
+
"learning_rate": 4.6556165125699604e-07,
|
| 42641 |
+
"loss": 0.5646,
|
| 42642 |
+
"step": 6055
|
| 42643 |
+
},
|
| 42644 |
+
{
|
| 42645 |
+
"epoch": 2.9083923640292952,
|
| 42646 |
+
"grad_norm": 0.9305620789527893,
|
| 42647 |
+
"learning_rate": 4.6070315877126957e-07,
|
| 42648 |
+
"loss": 0.4873,
|
| 42649 |
+
"step": 6056
|
| 42650 |
+
},
|
| 42651 |
+
{
|
| 42652 |
+
"epoch": 2.9088726137591547,
|
| 42653 |
+
"grad_norm": 1.0196714401245117,
|
| 42654 |
+
"learning_rate": 4.5587009194894004e-07,
|
| 42655 |
+
"loss": 0.5063,
|
| 42656 |
+
"step": 6057
|
| 42657 |
+
},
|
| 42658 |
+
{
|
| 42659 |
+
"epoch": 2.9093528634890142,
|
| 42660 |
+
"grad_norm": 1.0446749925613403,
|
| 42661 |
+
"learning_rate": 4.51062452024531e-07,
|
| 42662 |
+
"loss": 0.4861,
|
| 42663 |
+
"step": 6058
|
| 42664 |
+
},
|
| 42665 |
+
{
|
| 42666 |
+
"epoch": 2.9098331132188737,
|
| 42667 |
+
"grad_norm": 1.1384238004684448,
|
| 42668 |
+
"learning_rate": 4.462802402260602e-07,
|
| 42669 |
+
"loss": 0.5303,
|
| 42670 |
+
"step": 6059
|
| 42671 |
+
},
|
| 42672 |
+
{
|
| 42673 |
+
"epoch": 2.910313362948733,
|
| 42674 |
+
"grad_norm": 0.9151965975761414,
|
| 42675 |
+
"learning_rate": 4.415234577750726e-07,
|
| 42676 |
+
"loss": 0.5,
|
| 42677 |
+
"step": 6060
|
| 42678 |
+
},
|
| 42679 |
+
{
|
| 42680 |
+
"epoch": 2.9107936126785927,
|
| 42681 |
+
"grad_norm": 2.2920780181884766,
|
| 42682 |
+
"learning_rate": 4.3679210588661866e-07,
|
| 42683 |
+
"loss": 0.6922,
|
| 42684 |
+
"step": 6061
|
| 42685 |
+
},
|
| 42686 |
+
{
|
| 42687 |
+
"epoch": 2.9112738624084527,
|
| 42688 |
+
"grad_norm": 0.9247322678565979,
|
| 42689 |
+
"learning_rate": 4.320861857692315e-07,
|
| 42690 |
+
"loss": 0.5214,
|
| 42691 |
+
"step": 6062
|
| 42692 |
+
},
|
| 42693 |
+
{
|
| 42694 |
+
"epoch": 2.911754112138312,
|
| 42695 |
+
"grad_norm": 0.8479911088943481,
|
| 42696 |
+
"learning_rate": 4.2740569862497193e-07,
|
| 42697 |
+
"loss": 0.4689,
|
| 42698 |
+
"step": 6063
|
| 42699 |
+
},
|
| 42700 |
+
{
|
| 42701 |
+
"epoch": 2.9122343618681716,
|
| 42702 |
+
"grad_norm": 1.0244017839431763,
|
| 42703 |
+
"learning_rate": 4.227506456493835e-07,
|
| 42704 |
+
"loss": 0.5034,
|
| 42705 |
+
"step": 6064
|
| 42706 |
+
},
|
| 42707 |
+
{
|
| 42708 |
+
"epoch": 2.912714611598031,
|
| 42709 |
+
"grad_norm": 0.9050112962722778,
|
| 42710 |
+
"learning_rate": 4.181210280315151e-07,
|
| 42711 |
+
"loss": 0.4883,
|
| 42712 |
+
"step": 6065
|
| 42713 |
+
},
|
| 42714 |
+
{
|
| 42715 |
+
"epoch": 2.9131948613278906,
|
| 42716 |
+
"grad_norm": 0.8834241628646851,
|
| 42717 |
+
"learning_rate": 4.13516846953943e-07,
|
| 42718 |
+
"loss": 0.4912,
|
| 42719 |
+
"step": 6066
|
| 42720 |
+
},
|
| 42721 |
+
{
|
| 42722 |
+
"epoch": 2.91367511105775,
|
| 42723 |
+
"grad_norm": 1.1294069290161133,
|
| 42724 |
+
"learning_rate": 4.0893810359272645e-07,
|
| 42725 |
+
"loss": 0.5391,
|
| 42726 |
+
"step": 6067
|
| 42727 |
+
},
|
| 42728 |
+
{
|
| 42729 |
+
"epoch": 2.9141553607876096,
|
| 42730 |
+
"grad_norm": 1.0348907709121704,
|
| 42731 |
+
"learning_rate": 4.043847991174188e-07,
|
| 42732 |
+
"loss": 0.5425,
|
| 42733 |
+
"step": 6068
|
| 42734 |
+
},
|
| 42735 |
+
{
|
| 42736 |
+
"epoch": 2.914635610517469,
|
| 42737 |
+
"grad_norm": 0.9479727149009705,
|
| 42738 |
+
"learning_rate": 3.9985693469108966e-07,
|
| 42739 |
+
"loss": 0.4748,
|
| 42740 |
+
"step": 6069
|
| 42741 |
+
},
|
| 42742 |
+
{
|
| 42743 |
+
"epoch": 2.9151158602473286,
|
| 42744 |
+
"grad_norm": 1.0968918800354004,
|
| 42745 |
+
"learning_rate": 3.953545114703139e-07,
|
| 42746 |
+
"loss": 0.5308,
|
| 42747 |
+
"step": 6070
|
| 42748 |
+
},
|
| 42749 |
+
{
|
| 42750 |
+
"epoch": 2.915596109977188,
|
| 42751 |
+
"grad_norm": 1.1369953155517578,
|
| 42752 |
+
"learning_rate": 3.908775306051604e-07,
|
| 42753 |
+
"loss": 0.5205,
|
| 42754 |
+
"step": 6071
|
| 42755 |
+
},
|
| 42756 |
+
{
|
| 42757 |
+
"epoch": 2.9160763597070476,
|
| 42758 |
+
"grad_norm": 0.9642462730407715,
|
| 42759 |
+
"learning_rate": 3.864259932391923e-07,
|
| 42760 |
+
"loss": 0.5189,
|
| 42761 |
+
"step": 6072
|
| 42762 |
+
},
|
| 42763 |
+
{
|
| 42764 |
+
"epoch": 2.916556609436907,
|
| 42765 |
+
"grad_norm": 1.1746912002563477,
|
| 42766 |
+
"learning_rate": 3.819999005094776e-07,
|
| 42767 |
+
"loss": 0.5465,
|
| 42768 |
+
"step": 6073
|
| 42769 |
+
},
|
| 42770 |
+
{
|
| 42771 |
+
"epoch": 2.9170368591667666,
|
| 42772 |
+
"grad_norm": 0.9538284540176392,
|
| 42773 |
+
"learning_rate": 3.775992535466011e-07,
|
| 42774 |
+
"loss": 0.5,
|
| 42775 |
+
"step": 6074
|
| 42776 |
+
},
|
| 42777 |
+
{
|
| 42778 |
+
"epoch": 2.917517108896626,
|
| 42779 |
+
"grad_norm": 1.2396122217178345,
|
| 42780 |
+
"learning_rate": 3.732240534746301e-07,
|
| 42781 |
+
"loss": 0.5408,
|
| 42782 |
+
"step": 6075
|
| 42783 |
+
},
|
| 42784 |
+
{
|
| 42785 |
+
"epoch": 2.9179973586264856,
|
| 42786 |
+
"grad_norm": 0.9474676847457886,
|
| 42787 |
+
"learning_rate": 3.688743014111262e-07,
|
| 42788 |
+
"loss": 0.4935,
|
| 42789 |
+
"step": 6076
|
| 42790 |
+
},
|
| 42791 |
+
{
|
| 42792 |
+
"epoch": 2.918477608356345,
|
| 42793 |
+
"grad_norm": 1.2469604015350342,
|
| 42794 |
+
"learning_rate": 3.6454999846717855e-07,
|
| 42795 |
+
"loss": 0.5144,
|
| 42796 |
+
"step": 6077
|
| 42797 |
+
},
|
| 42798 |
+
{
|
| 42799 |
+
"epoch": 2.9189578580862046,
|
| 42800 |
+
"grad_norm": 1.8287394046783447,
|
| 42801 |
+
"learning_rate": 3.6025114574734785e-07,
|
| 42802 |
+
"loss": 0.5973,
|
| 42803 |
+
"step": 6078
|
| 42804 |
+
},
|
| 42805 |
+
{
|
| 42806 |
+
"epoch": 2.9194381078160645,
|
| 42807 |
+
"grad_norm": 1.6717603206634521,
|
| 42808 |
+
"learning_rate": 3.5597774434971143e-07,
|
| 42809 |
+
"loss": 0.4906,
|
| 42810 |
+
"step": 6079
|
| 42811 |
+
},
|
| 42812 |
+
{
|
| 42813 |
+
"epoch": 2.919918357545924,
|
| 42814 |
+
"grad_norm": 0.9443919658660889,
|
| 42815 |
+
"learning_rate": 3.517297953658405e-07,
|
| 42816 |
+
"loss": 0.4733,
|
| 42817 |
+
"step": 6080
|
| 42818 |
+
},
|
| 42819 |
+
{
|
| 42820 |
+
"epoch": 2.9203986072757835,
|
| 42821 |
+
"grad_norm": 0.8544421792030334,
|
| 42822 |
+
"learning_rate": 3.4750729988078934e-07,
|
| 42823 |
+
"loss": 0.4918,
|
| 42824 |
+
"step": 6081
|
| 42825 |
+
},
|
| 42826 |
+
{
|
| 42827 |
+
"epoch": 2.920878857005643,
|
| 42828 |
+
"grad_norm": 0.949880838394165,
|
| 42829 |
+
"learning_rate": 3.4331025897313964e-07,
|
| 42830 |
+
"loss": 0.4985,
|
| 42831 |
+
"step": 6082
|
| 42832 |
+
},
|
| 42833 |
+
{
|
| 42834 |
+
"epoch": 2.9213591067355025,
|
| 42835 |
+
"grad_norm": 1.0241985321044922,
|
| 42836 |
+
"learning_rate": 3.391386737149449e-07,
|
| 42837 |
+
"loss": 0.5004,
|
| 42838 |
+
"step": 6083
|
| 42839 |
+
},
|
| 42840 |
+
{
|
| 42841 |
+
"epoch": 2.921839356465362,
|
| 42842 |
+
"grad_norm": 0.9273295402526855,
|
| 42843 |
+
"learning_rate": 3.3499254517177503e-07,
|
| 42844 |
+
"loss": 0.5129,
|
| 42845 |
+
"step": 6084
|
| 42846 |
+
},
|
| 42847 |
+
{
|
| 42848 |
+
"epoch": 2.9223196061952215,
|
| 42849 |
+
"grad_norm": 1.095513105392456,
|
| 42850 |
+
"learning_rate": 3.3087187440268287e-07,
|
| 42851 |
+
"loss": 0.5352,
|
| 42852 |
+
"step": 6085
|
| 42853 |
+
},
|
| 42854 |
+
{
|
| 42855 |
+
"epoch": 2.922799855925081,
|
| 42856 |
+
"grad_norm": 0.8524777293205261,
|
| 42857 |
+
"learning_rate": 3.267766624602375e-07,
|
| 42858 |
+
"loss": 0.4843,
|
| 42859 |
+
"step": 6086
|
| 42860 |
+
},
|
| 42861 |
+
{
|
| 42862 |
+
"epoch": 2.9232801056549405,
|
| 42863 |
+
"grad_norm": 0.8703526258468628,
|
| 42864 |
+
"learning_rate": 3.2270691039048004e-07,
|
| 42865 |
+
"loss": 0.4754,
|
| 42866 |
+
"step": 6087
|
| 42867 |
+
},
|
| 42868 |
+
{
|
| 42869 |
+
"epoch": 2.9237603553848,
|
| 42870 |
+
"grad_norm": 1.0583552122116089,
|
| 42871 |
+
"learning_rate": 3.1866261923296783e-07,
|
| 42872 |
+
"loss": 0.5172,
|
| 42873 |
+
"step": 6088
|
| 42874 |
+
},
|
| 42875 |
+
{
|
| 42876 |
+
"epoch": 2.92424060511466,
|
| 42877 |
+
"grad_norm": 0.9731481671333313,
|
| 42878 |
+
"learning_rate": 3.146437900207411e-07,
|
| 42879 |
+
"loss": 0.5157,
|
| 42880 |
+
"step": 6089
|
| 42881 |
+
},
|
| 42882 |
+
{
|
| 42883 |
+
"epoch": 2.9247208548445194,
|
| 42884 |
+
"grad_norm": 0.992031455039978,
|
| 42885 |
+
"learning_rate": 3.1065042378034535e-07,
|
| 42886 |
+
"loss": 0.4894,
|
| 42887 |
+
"step": 6090
|
| 42888 |
+
},
|
| 42889 |
+
{
|
| 42890 |
+
"epoch": 2.925201104574379,
|
| 42891 |
+
"grad_norm": 0.9598735570907593,
|
| 42892 |
+
"learning_rate": 3.066825215318314e-07,
|
| 42893 |
+
"loss": 0.4762,
|
| 42894 |
+
"step": 6091
|
| 42895 |
+
},
|
| 42896 |
+
{
|
| 42897 |
+
"epoch": 2.9256813543042384,
|
| 42898 |
+
"grad_norm": 1.3992528915405273,
|
| 42899 |
+
"learning_rate": 3.027400842887218e-07,
|
| 42900 |
+
"loss": 0.5185,
|
| 42901 |
+
"step": 6092
|
| 42902 |
+
},
|
| 42903 |
+
{
|
| 42904 |
+
"epoch": 2.926161604034098,
|
| 42905 |
+
"grad_norm": 1.0443490743637085,
|
| 42906 |
+
"learning_rate": 2.988231130580554e-07,
|
| 42907 |
+
"loss": 0.5149,
|
| 42908 |
+
"step": 6093
|
| 42909 |
+
},
|
| 42910 |
+
{
|
| 42911 |
+
"epoch": 2.9266418537639574,
|
| 42912 |
+
"grad_norm": 1.0154131650924683,
|
| 42913 |
+
"learning_rate": 2.949316088403542e-07,
|
| 42914 |
+
"loss": 0.5188,
|
| 42915 |
+
"step": 6094
|
| 42916 |
+
},
|
| 42917 |
+
{
|
| 42918 |
+
"epoch": 2.927122103493817,
|
| 42919 |
+
"grad_norm": 0.9042947888374329,
|
| 42920 |
+
"learning_rate": 2.91065572629623e-07,
|
| 42921 |
+
"loss": 0.5095,
|
| 42922 |
+
"step": 6095
|
| 42923 |
+
},
|
| 42924 |
+
{
|
| 42925 |
+
"epoch": 2.9276023532236763,
|
| 42926 |
+
"grad_norm": 0.8214787244796753,
|
| 42927 |
+
"learning_rate": 2.872250054134051e-07,
|
| 42928 |
+
"loss": 0.4628,
|
| 42929 |
+
"step": 6096
|
| 42930 |
+
},
|
| 42931 |
+
{
|
| 42932 |
+
"epoch": 2.928082602953536,
|
| 42933 |
+
"grad_norm": 1.3600598573684692,
|
| 42934 |
+
"learning_rate": 2.8340990817269377e-07,
|
| 42935 |
+
"loss": 0.4978,
|
| 42936 |
+
"step": 6097
|
| 42937 |
+
},
|
| 42938 |
+
{
|
| 42939 |
+
"epoch": 2.9285628526833953,
|
| 42940 |
+
"grad_norm": 1.0531233549118042,
|
| 42941 |
+
"learning_rate": 2.7962028188198706e-07,
|
| 42942 |
+
"loss": 0.5192,
|
| 42943 |
+
"step": 6098
|
| 42944 |
+
},
|
| 42945 |
+
{
|
| 42946 |
+
"epoch": 2.929043102413255,
|
| 42947 |
+
"grad_norm": 2.1405086517333984,
|
| 42948 |
+
"learning_rate": 2.758561275092886e-07,
|
| 42949 |
+
"loss": 0.583,
|
| 42950 |
+
"step": 6099
|
| 42951 |
+
},
|
| 42952 |
+
{
|
| 42953 |
+
"epoch": 2.9295233521431143,
|
| 42954 |
+
"grad_norm": 1.4039134979248047,
|
| 42955 |
+
"learning_rate": 2.721174460160958e-07,
|
| 42956 |
+
"loss": 0.5415,
|
| 42957 |
+
"step": 6100
|
| 42958 |
+
},
|
| 42959 |
+
{
|
| 42960 |
+
"epoch": 2.930003601872974,
|
| 42961 |
+
"grad_norm": 0.9771497845649719,
|
| 42962 |
+
"learning_rate": 2.6840423835738926e-07,
|
| 42963 |
+
"loss": 0.4586,
|
| 42964 |
+
"step": 6101
|
| 42965 |
+
},
|
| 42966 |
+
{
|
| 42967 |
+
"epoch": 2.9304838516028333,
|
| 42968 |
+
"grad_norm": 1.1628845930099487,
|
| 42969 |
+
"learning_rate": 2.647165054816325e-07,
|
| 42970 |
+
"loss": 0.4963,
|
| 42971 |
+
"step": 6102
|
| 42972 |
+
},
|
| 42973 |
+
{
|
| 42974 |
+
"epoch": 2.930964101332693,
|
| 42975 |
+
"grad_norm": 1.2514691352844238,
|
| 42976 |
+
"learning_rate": 2.610542483308165e-07,
|
| 42977 |
+
"loss": 0.6303,
|
| 42978 |
+
"step": 6103
|
| 42979 |
+
},
|
| 42980 |
+
{
|
| 42981 |
+
"epoch": 2.9314443510625523,
|
| 42982 |
+
"grad_norm": 1.0992681980133057,
|
| 42983 |
+
"learning_rate": 2.574174678403818e-07,
|
| 42984 |
+
"loss": 0.5069,
|
| 42985 |
+
"step": 6104
|
| 42986 |
+
},
|
| 42987 |
+
{
|
| 42988 |
+
"epoch": 2.931924600792412,
|
| 42989 |
+
"grad_norm": 0.8845913410186768,
|
| 42990 |
+
"learning_rate": 2.5380616493930753e-07,
|
| 42991 |
+
"loss": 0.4808,
|
| 42992 |
+
"step": 6105
|
| 42993 |
+
},
|
| 42994 |
+
{
|
| 42995 |
+
"epoch": 2.9324048505222717,
|
| 42996 |
+
"grad_norm": 1.2922005653381348,
|
| 42997 |
+
"learning_rate": 2.5022034055003364e-07,
|
| 42998 |
+
"loss": 0.554,
|
| 42999 |
+
"step": 6106
|
| 43000 |
+
},
|
| 43001 |
+
{
|
| 43002 |
+
"epoch": 2.9328851002521312,
|
| 43003 |
+
"grad_norm": 0.9516582489013672,
|
| 43004 |
+
"learning_rate": 2.4665999558848296e-07,
|
| 43005 |
+
"loss": 0.4622,
|
| 43006 |
+
"step": 6107
|
| 43007 |
+
},
|
| 43008 |
+
{
|
| 43009 |
+
"epoch": 2.9333653499819907,
|
| 43010 |
+
"grad_norm": 0.8160479664802551,
|
| 43011 |
+
"learning_rate": 2.431251309641058e-07,
|
| 43012 |
+
"loss": 0.4416,
|
| 43013 |
+
"step": 6108
|
| 43014 |
+
},
|
| 43015 |
+
{
|
| 43016 |
+
"epoch": 2.93384559971185,
|
| 43017 |
+
"grad_norm": 1.0128464698791504,
|
| 43018 |
+
"learning_rate": 2.396157475798244e-07,
|
| 43019 |
+
"loss": 0.5294,
|
| 43020 |
+
"step": 6109
|
| 43021 |
+
},
|
| 43022 |
+
{
|
| 43023 |
+
"epoch": 2.9343258494417097,
|
| 43024 |
+
"grad_norm": 0.9867235422134399,
|
| 43025 |
+
"learning_rate": 2.361318463320439e-07,
|
| 43026 |
+
"loss": 0.5171,
|
| 43027 |
+
"step": 6110
|
| 43028 |
+
},
|
| 43029 |
+
{
|
| 43030 |
+
"epoch": 2.934806099171569,
|
| 43031 |
+
"grad_norm": 1.2857671976089478,
|
| 43032 |
+
"learning_rate": 2.326734281106746e-07,
|
| 43033 |
+
"loss": 0.4839,
|
| 43034 |
+
"step": 6111
|
| 43035 |
+
},
|
| 43036 |
+
{
|
| 43037 |
+
"epoch": 2.9352863489014287,
|
| 43038 |
+
"grad_norm": 1.1951898336410522,
|
| 43039 |
+
"learning_rate": 2.2924049379909884e-07,
|
| 43040 |
+
"loss": 0.5143,
|
| 43041 |
+
"step": 6112
|
| 43042 |
+
},
|
| 43043 |
+
{
|
| 43044 |
+
"epoch": 2.935766598631288,
|
| 43045 |
+
"grad_norm": 1.0616852045059204,
|
| 43046 |
+
"learning_rate": 2.2583304427421515e-07,
|
| 43047 |
+
"loss": 0.5287,
|
| 43048 |
+
"step": 6113
|
| 43049 |
+
},
|
| 43050 |
+
{
|
| 43051 |
+
"epoch": 2.9362468483611477,
|
| 43052 |
+
"grad_norm": 0.8602057099342346,
|
| 43053 |
+
"learning_rate": 2.2245108040640504e-07,
|
| 43054 |
+
"loss": 0.467,
|
| 43055 |
+
"step": 6114
|
| 43056 |
+
},
|
| 43057 |
+
{
|
| 43058 |
+
"epoch": 2.936727098091007,
|
| 43059 |
+
"grad_norm": 1.0095205307006836,
|
| 43060 |
+
"learning_rate": 2.190946030595331e-07,
|
| 43061 |
+
"loss": 0.5164,
|
| 43062 |
+
"step": 6115
|
| 43063 |
+
},
|
| 43064 |
+
{
|
| 43065 |
+
"epoch": 2.937207347820867,
|
| 43066 |
+
"grad_norm": 0.9440839886665344,
|
| 43067 |
+
"learning_rate": 2.1576361309093573e-07,
|
| 43068 |
+
"loss": 0.5245,
|
| 43069 |
+
"step": 6116
|
| 43070 |
+
},
|
| 43071 |
+
{
|
| 43072 |
+
"epoch": 2.9376875975507266,
|
| 43073 |
+
"grad_norm": 1.0187512636184692,
|
| 43074 |
+
"learning_rate": 2.1245811135148785e-07,
|
| 43075 |
+
"loss": 0.5355,
|
| 43076 |
+
"step": 6117
|
| 43077 |
+
},
|
| 43078 |
+
{
|
| 43079 |
+
"epoch": 2.938167847280586,
|
| 43080 |
+
"grad_norm": 1.1666048765182495,
|
| 43081 |
+
"learning_rate": 2.0917809868550298e-07,
|
| 43082 |
+
"loss": 0.5446,
|
| 43083 |
+
"step": 6118
|
| 43084 |
+
},
|
| 43085 |
+
{
|
| 43086 |
+
"epoch": 2.9386480970104456,
|
| 43087 |
+
"grad_norm": 1.482763409614563,
|
| 43088 |
+
"learning_rate": 2.05923575930822e-07,
|
| 43089 |
+
"loss": 0.5024,
|
| 43090 |
+
"step": 6119
|
| 43091 |
+
},
|
| 43092 |
+
{
|
| 43093 |
+
"epoch": 2.939128346740305,
|
| 43094 |
+
"grad_norm": 0.8116981387138367,
|
| 43095 |
+
"learning_rate": 2.0269454391874666e-07,
|
| 43096 |
+
"loss": 0.5036,
|
| 43097 |
+
"step": 6120
|
| 43098 |
+
},
|
| 43099 |
+
{
|
| 43100 |
+
"epoch": 2.9396085964701646,
|
| 43101 |
+
"grad_norm": 1.0943511724472046,
|
| 43102 |
+
"learning_rate": 1.9949100347409488e-07,
|
| 43103 |
+
"loss": 0.5927,
|
| 43104 |
+
"step": 6121
|
| 43105 |
+
},
|
| 43106 |
+
{
|
| 43107 |
+
"epoch": 2.940088846200024,
|
| 43108 |
+
"grad_norm": 1.011591911315918,
|
| 43109 |
+
"learning_rate": 1.9631295541513438e-07,
|
| 43110 |
+
"loss": 0.4953,
|
| 43111 |
+
"step": 6122
|
| 43112 |
+
},
|
| 43113 |
+
{
|
| 43114 |
+
"epoch": 2.9405690959298836,
|
| 43115 |
+
"grad_norm": 1.0537197589874268,
|
| 43116 |
+
"learning_rate": 1.9316040055366024e-07,
|
| 43117 |
+
"loss": 0.5189,
|
| 43118 |
+
"step": 6123
|
| 43119 |
+
},
|
| 43120 |
+
{
|
| 43121 |
+
"epoch": 2.941049345659743,
|
| 43122 |
+
"grad_norm": 0.989774227142334,
|
| 43123 |
+
"learning_rate": 1.9003333969493942e-07,
|
| 43124 |
+
"loss": 0.5258,
|
| 43125 |
+
"step": 6124
|
| 43126 |
+
},
|
| 43127 |
+
{
|
| 43128 |
+
"epoch": 2.9415295953896026,
|
| 43129 |
+
"grad_norm": 1.2443652153015137,
|
| 43130 |
+
"learning_rate": 1.8693177363773295e-07,
|
| 43131 |
+
"loss": 0.5496,
|
| 43132 |
+
"step": 6125
|
| 43133 |
+
},
|
| 43134 |
+
{
|
| 43135 |
+
"epoch": 2.942009845119462,
|
| 43136 |
+
"grad_norm": 1.1581547260284424,
|
| 43137 |
+
"learning_rate": 1.838557031742738e-07,
|
| 43138 |
+
"loss": 0.5221,
|
| 43139 |
+
"step": 6126
|
| 43140 |
+
},
|
| 43141 |
+
{
|
| 43142 |
+
"epoch": 2.9424900948493216,
|
| 43143 |
+
"grad_norm": 0.8082072734832764,
|
| 43144 |
+
"learning_rate": 1.8080512909028903e-07,
|
| 43145 |
+
"loss": 0.4449,
|
| 43146 |
+
"step": 6127
|
| 43147 |
+
},
|
| 43148 |
+
{
|
| 43149 |
+
"epoch": 2.942970344579181,
|
| 43150 |
+
"grad_norm": 0.9887663125991821,
|
| 43151 |
+
"learning_rate": 1.7778005216502192e-07,
|
| 43152 |
+
"loss": 0.5425,
|
| 43153 |
+
"step": 6128
|
| 43154 |
+
},
|
| 43155 |
+
{
|
| 43156 |
+
"epoch": 2.9434505943090405,
|
| 43157 |
+
"grad_norm": 1.3568744659423828,
|
| 43158 |
+
"learning_rate": 1.7478047317115442e-07,
|
| 43159 |
+
"loss": 0.5477,
|
| 43160 |
+
"step": 6129
|
| 43161 |
+
},
|
| 43162 |
+
{
|
| 43163 |
+
"epoch": 2.9439308440389,
|
| 43164 |
+
"grad_norm": 0.9268665909767151,
|
| 43165 |
+
"learning_rate": 1.7180639287488476e-07,
|
| 43166 |
+
"loss": 0.4908,
|
| 43167 |
+
"step": 6130
|
| 43168 |
+
},
|
| 43169 |
+
{
|
| 43170 |
+
"epoch": 2.9444110937687595,
|
| 43171 |
+
"grad_norm": 1.0100455284118652,
|
| 43172 |
+
"learning_rate": 1.6885781203589413e-07,
|
| 43173 |
+
"loss": 0.4889,
|
| 43174 |
+
"step": 6131
|
| 43175 |
+
},
|
| 43176 |
+
{
|
| 43177 |
+
"epoch": 2.944891343498619,
|
| 43178 |
+
"grad_norm": 1.098319172859192,
|
| 43179 |
+
"learning_rate": 1.6593473140734673e-07,
|
| 43180 |
+
"loss": 0.4875,
|
| 43181 |
+
"step": 6132
|
| 43182 |
+
},
|
| 43183 |
+
{
|
| 43184 |
+
"epoch": 2.945371593228479,
|
| 43185 |
+
"grad_norm": 1.1685726642608643,
|
| 43186 |
+
"learning_rate": 1.6303715173590085e-07,
|
| 43187 |
+
"loss": 0.519,
|
| 43188 |
+
"step": 6133
|
| 43189 |
+
},
|
| 43190 |
+
{
|
| 43191 |
+
"epoch": 2.9458518429583385,
|
| 43192 |
+
"grad_norm": 1.1108653545379639,
|
| 43193 |
+
"learning_rate": 1.6016507376169777e-07,
|
| 43194 |
+
"loss": 0.51,
|
| 43195 |
+
"step": 6134
|
| 43196 |
+
},
|
| 43197 |
+
{
|
| 43198 |
+
"epoch": 2.946332092688198,
|
| 43199 |
+
"grad_norm": 1.1678224802017212,
|
| 43200 |
+
"learning_rate": 1.5731849821833954e-07,
|
| 43201 |
+
"loss": 0.5359,
|
| 43202 |
+
"step": 6135
|
| 43203 |
+
},
|
| 43204 |
+
{
|
| 43205 |
+
"epoch": 2.9468123424180575,
|
| 43206 |
+
"grad_norm": 0.9436126947402954,
|
| 43207 |
+
"learning_rate": 1.544974258329668e-07,
|
| 43208 |
+
"loss": 0.4739,
|
| 43209 |
+
"step": 6136
|
| 43210 |
+
},
|
| 43211 |
+
{
|
| 43212 |
+
"epoch": 2.947292592147917,
|
| 43213 |
+
"grad_norm": 1.0964453220367432,
|
| 43214 |
+
"learning_rate": 1.5170185732615861e-07,
|
| 43215 |
+
"loss": 0.5157,
|
| 43216 |
+
"step": 6137
|
| 43217 |
+
},
|
| 43218 |
+
{
|
| 43219 |
+
"epoch": 2.9477728418777764,
|
| 43220 |
+
"grad_norm": 1.0992753505706787,
|
| 43221 |
+
"learning_rate": 1.4893179341199936e-07,
|
| 43222 |
+
"loss": 0.5308,
|
| 43223 |
+
"step": 6138
|
| 43224 |
+
},
|
| 43225 |
+
{
|
| 43226 |
+
"epoch": 2.948253091607636,
|
| 43227 |
+
"grad_norm": 1.032257080078125,
|
| 43228 |
+
"learning_rate": 1.461872347980564e-07,
|
| 43229 |
+
"loss": 0.5315,
|
| 43230 |
+
"step": 6139
|
| 43231 |
+
},
|
| 43232 |
+
{
|
| 43233 |
+
"epoch": 2.9487333413374954,
|
| 43234 |
+
"grad_norm": 1.2790552377700806,
|
| 43235 |
+
"learning_rate": 1.4346818218539116e-07,
|
| 43236 |
+
"loss": 0.5357,
|
| 43237 |
+
"step": 6140
|
| 43238 |
+
},
|
| 43239 |
+
{
|
| 43240 |
+
"epoch": 2.949213591067355,
|
| 43241 |
+
"grad_norm": 0.9251823425292969,
|
| 43242 |
+
"learning_rate": 1.4077463626852582e-07,
|
| 43243 |
+
"loss": 0.5084,
|
| 43244 |
+
"step": 6141
|
| 43245 |
+
},
|
| 43246 |
+
{
|
| 43247 |
+
"epoch": 2.9496938407972144,
|
| 43248 |
+
"grad_norm": 0.8136346936225891,
|
| 43249 |
+
"learning_rate": 1.3810659773547675e-07,
|
| 43250 |
+
"loss": 0.491,
|
| 43251 |
+
"step": 6142
|
| 43252 |
+
},
|
| 43253 |
+
{
|
| 43254 |
+
"epoch": 2.9501740905270744,
|
| 43255 |
+
"grad_norm": 1.1861847639083862,
|
| 43256 |
+
"learning_rate": 1.354640672677765e-07,
|
| 43257 |
+
"loss": 0.5209,
|
| 43258 |
+
"step": 6143
|
| 43259 |
+
},
|
| 43260 |
+
{
|
| 43261 |
+
"epoch": 2.950654340256934,
|
| 43262 |
+
"grad_norm": 1.9485915899276733,
|
| 43263 |
+
"learning_rate": 1.3284704554039628e-07,
|
| 43264 |
+
"loss": 0.5816,
|
| 43265 |
+
"step": 6144
|
| 43266 |
+
},
|
| 43267 |
+
{
|
| 43268 |
+
"epoch": 2.9511345899867933,
|
| 43269 |
+
"grad_norm": 1.0041749477386475,
|
| 43270 |
+
"learning_rate": 1.302555332218125e-07,
|
| 43271 |
+
"loss": 0.4833,
|
| 43272 |
+
"step": 6145
|
| 43273 |
+
},
|
| 43274 |
+
{
|
| 43275 |
+
"epoch": 2.951614839716653,
|
| 43276 |
+
"grad_norm": 0.9592583775520325,
|
| 43277 |
+
"learning_rate": 1.2768953097398451e-07,
|
| 43278 |
+
"loss": 0.4865,
|
| 43279 |
+
"step": 6146
|
| 43280 |
+
},
|
| 43281 |
+
{
|
| 43282 |
+
"epoch": 2.9520950894465123,
|
| 43283 |
+
"grad_norm": 0.9189772009849548,
|
| 43284 |
+
"learning_rate": 1.2514903945235468e-07,
|
| 43285 |
+
"loss": 0.4895,
|
| 43286 |
+
"step": 6147
|
| 43287 |
+
},
|
| 43288 |
+
{
|
| 43289 |
+
"epoch": 2.952575339176372,
|
| 43290 |
+
"grad_norm": 0.9006944894790649,
|
| 43291 |
+
"learning_rate": 1.2263405930585948e-07,
|
| 43292 |
+
"loss": 0.4491,
|
| 43293 |
+
"step": 6148
|
| 43294 |
+
},
|
| 43295 |
+
{
|
| 43296 |
+
"epoch": 2.9530555889062313,
|
| 43297 |
+
"grad_norm": 0.9726596474647522,
|
| 43298 |
+
"learning_rate": 1.2014459117689613e-07,
|
| 43299 |
+
"loss": 0.5209,
|
| 43300 |
+
"step": 6149
|
| 43301 |
+
},
|
| 43302 |
+
{
|
| 43303 |
+
"epoch": 2.953535838636091,
|
| 43304 |
+
"grad_norm": 1.1057935953140259,
|
| 43305 |
+
"learning_rate": 1.1768063570136711e-07,
|
| 43306 |
+
"loss": 0.561,
|
| 43307 |
+
"step": 6150
|
| 43308 |
+
},
|
| 43309 |
+
{
|
| 43310 |
+
"epoch": 2.9540160883659503,
|
| 43311 |
+
"grad_norm": 0.9758303761482239,
|
| 43312 |
+
"learning_rate": 1.1524219350863563e-07,
|
| 43313 |
+
"loss": 0.4964,
|
| 43314 |
+
"step": 6151
|
| 43315 |
+
},
|
| 43316 |
+
{
|
| 43317 |
+
"epoch": 2.95449633809581,
|
| 43318 |
+
"grad_norm": 0.9577860832214355,
|
| 43319 |
+
"learning_rate": 1.1282926522158122e-07,
|
| 43320 |
+
"loss": 0.4965,
|
| 43321 |
+
"step": 6152
|
| 43322 |
+
},
|
| 43323 |
+
{
|
| 43324 |
+
"epoch": 2.9549765878256693,
|
| 43325 |
+
"grad_norm": 1.118918776512146,
|
| 43326 |
+
"learning_rate": 1.1044185145653307e-07,
|
| 43327 |
+
"loss": 0.5035,
|
| 43328 |
+
"step": 6153
|
| 43329 |
+
},
|
| 43330 |
+
{
|
| 43331 |
+
"epoch": 2.955456837555529,
|
| 43332 |
+
"grad_norm": 1.3922710418701172,
|
| 43333 |
+
"learning_rate": 1.0807995282332562e-07,
|
| 43334 |
+
"loss": 0.543,
|
| 43335 |
+
"step": 6154
|
| 43336 |
+
},
|
| 43337 |
+
{
|
| 43338 |
+
"epoch": 2.9559370872853883,
|
| 43339 |
+
"grad_norm": 1.3283149003982544,
|
| 43340 |
+
"learning_rate": 1.0574356992525403e-07,
|
| 43341 |
+
"loss": 0.5835,
|
| 43342 |
+
"step": 6155
|
| 43343 |
+
},
|
| 43344 |
+
{
|
| 43345 |
+
"epoch": 2.956417337015248,
|
| 43346 |
+
"grad_norm": 0.9737280607223511,
|
| 43347 |
+
"learning_rate": 1.0343270335910759e-07,
|
| 43348 |
+
"loss": 0.5028,
|
| 43349 |
+
"step": 6156
|
| 43350 |
+
},
|
| 43351 |
+
{
|
| 43352 |
+
"epoch": 2.9568975867451073,
|
| 43353 |
+
"grad_norm": 0.8545934557914734,
|
| 43354 |
+
"learning_rate": 1.011473537151697e-07,
|
| 43355 |
+
"loss": 0.4275,
|
| 43356 |
+
"step": 6157
|
| 43357 |
+
},
|
| 43358 |
+
{
|
| 43359 |
+
"epoch": 2.9573778364749668,
|
| 43360 |
+
"grad_norm": 1.344193458557129,
|
| 43361 |
+
"learning_rate": 9.888752157719561e-08,
|
| 43362 |
+
"loss": 0.4945,
|
| 43363 |
+
"step": 6158
|
| 43364 |
+
},
|
| 43365 |
+
{
|
| 43366 |
+
"epoch": 2.9578580862048263,
|
| 43367 |
+
"grad_norm": 0.9803805351257324,
|
| 43368 |
+
"learning_rate": 9.665320752242357e-08,
|
| 43369 |
+
"loss": 0.5197,
|
| 43370 |
+
"step": 6159
|
| 43371 |
+
},
|
| 43372 |
+
{
|
| 43373 |
+
"epoch": 2.958338335934686,
|
| 43374 |
+
"grad_norm": 1.179153323173523,
|
| 43375 |
+
"learning_rate": 9.444441212155264e-08,
|
| 43376 |
+
"loss": 0.5044,
|
| 43377 |
+
"step": 6160
|
| 43378 |
+
},
|
| 43379 |
+
{
|
| 43380 |
+
"epoch": 2.9588185856645457,
|
| 43381 |
+
"grad_norm": 0.9996656179428101,
|
| 43382 |
+
"learning_rate": 9.226113593880925e-08,
|
| 43383 |
+
"loss": 0.5034,
|
| 43384 |
+
"step": 6161
|
| 43385 |
+
},
|
| 43386 |
+
{
|
| 43387 |
+
"epoch": 2.959298835394405,
|
| 43388 |
+
"grad_norm": 0.9858896732330322,
|
| 43389 |
+
"learning_rate": 9.010337953185843e-08,
|
| 43390 |
+
"loss": 0.4707,
|
| 43391 |
+
"step": 6162
|
| 43392 |
+
},
|
| 43393 |
+
{
|
| 43394 |
+
"epoch": 2.9597790851242647,
|
| 43395 |
+
"grad_norm": 1.4443416595458984,
|
| 43396 |
+
"learning_rate": 8.797114345185931e-08,
|
| 43397 |
+
"loss": 0.5373,
|
| 43398 |
+
"step": 6163
|
| 43399 |
+
},
|
| 43400 |
+
{
|
| 43401 |
+
"epoch": 2.960259334854124,
|
| 43402 |
+
"grad_norm": 1.2540090084075928,
|
| 43403 |
+
"learning_rate": 8.586442824347618e-08,
|
| 43404 |
+
"loss": 0.5084,
|
| 43405 |
+
"step": 6164
|
| 43406 |
+
},
|
| 43407 |
+
{
|
| 43408 |
+
"epoch": 2.9607395845839837,
|
| 43409 |
+
"grad_norm": 1.0520621538162231,
|
| 43410 |
+
"learning_rate": 8.378323444481196e-08,
|
| 43411 |
+
"loss": 0.5556,
|
| 43412 |
+
"step": 6165
|
| 43413 |
+
},
|
| 43414 |
+
{
|
| 43415 |
+
"epoch": 2.961219834313843,
|
| 43416 |
+
"grad_norm": 1.0082722902297974,
|
| 43417 |
+
"learning_rate": 8.172756258748581e-08,
|
| 43418 |
+
"loss": 0.5069,
|
| 43419 |
+
"step": 6166
|
| 43420 |
+
},
|
| 43421 |
+
{
|
| 43422 |
+
"epoch": 2.9617000840437027,
|
| 43423 |
+
"grad_norm": 1.133471965789795,
|
| 43424 |
+
"learning_rate": 7.969741319658886e-08,
|
| 43425 |
+
"loss": 0.5275,
|
| 43426 |
+
"step": 6167
|
| 43427 |
+
},
|
| 43428 |
+
{
|
| 43429 |
+
"epoch": 2.962180333773562,
|
| 43430 |
+
"grad_norm": 1.6830706596374512,
|
| 43431 |
+
"learning_rate": 7.769278679068404e-08,
|
| 43432 |
+
"loss": 0.4995,
|
| 43433 |
+
"step": 6168
|
| 43434 |
+
},
|
| 43435 |
+
{
|
| 43436 |
+
"epoch": 2.9626605835034217,
|
| 43437 |
+
"grad_norm": 0.9531680345535278,
|
| 43438 |
+
"learning_rate": 7.571368388181732e-08,
|
| 43439 |
+
"loss": 0.4945,
|
| 43440 |
+
"step": 6169
|
| 43441 |
+
},
|
| 43442 |
+
{
|
| 43443 |
+
"epoch": 2.963140833233281,
|
| 43444 |
+
"grad_norm": 1.3332419395446777,
|
| 43445 |
+
"learning_rate": 7.376010497551767e-08,
|
| 43446 |
+
"loss": 0.5482,
|
| 43447 |
+
"step": 6170
|
| 43448 |
+
},
|
| 43449 |
+
{
|
| 43450 |
+
"epoch": 2.963621082963141,
|
| 43451 |
+
"grad_norm": 1.0001851320266724,
|
| 43452 |
+
"learning_rate": 7.1832050570797e-08,
|
| 43453 |
+
"loss": 0.4879,
|
| 43454 |
+
"step": 6171
|
| 43455 |
+
},
|
| 43456 |
+
{
|
| 43457 |
+
"epoch": 2.9641013326930006,
|
| 43458 |
+
"grad_norm": 1.1255953311920166,
|
| 43459 |
+
"learning_rate": 6.992952116013918e-08,
|
| 43460 |
+
"loss": 0.5539,
|
| 43461 |
+
"step": 6172
|
| 43462 |
+
},
|
| 43463 |
+
{
|
| 43464 |
+
"epoch": 2.96458158242286,
|
| 43465 |
+
"grad_norm": 1.1258360147476196,
|
| 43466 |
+
"learning_rate": 6.805251722953321e-08,
|
| 43467 |
+
"loss": 0.4902,
|
| 43468 |
+
"step": 6173
|
| 43469 |
+
},
|
| 43470 |
+
{
|
| 43471 |
+
"epoch": 2.9650618321527196,
|
| 43472 |
+
"grad_norm": 1.3705956935882568,
|
| 43473 |
+
"learning_rate": 6.62010392584067e-08,
|
| 43474 |
+
"loss": 0.5516,
|
| 43475 |
+
"step": 6174
|
| 43476 |
+
},
|
| 43477 |
+
{
|
| 43478 |
+
"epoch": 2.965542081882579,
|
| 43479 |
+
"grad_norm": 0.8385779857635498,
|
| 43480 |
+
"learning_rate": 6.437508771969248e-08,
|
| 43481 |
+
"loss": 0.4961,
|
| 43482 |
+
"step": 6175
|
| 43483 |
+
},
|
| 43484 |
+
{
|
| 43485 |
+
"epoch": 2.9660223316124386,
|
| 43486 |
+
"grad_norm": 1.0862523317337036,
|
| 43487 |
+
"learning_rate": 6.25746630798063e-08,
|
| 43488 |
+
"loss": 0.5219,
|
| 43489 |
+
"step": 6176
|
| 43490 |
+
},
|
| 43491 |
+
{
|
| 43492 |
+
"epoch": 2.966502581342298,
|
| 43493 |
+
"grad_norm": 0.9276284575462341,
|
| 43494 |
+
"learning_rate": 6.079976579863589e-08,
|
| 43495 |
+
"loss": 0.4518,
|
| 43496 |
+
"step": 6177
|
| 43497 |
+
},
|
| 43498 |
+
{
|
| 43499 |
+
"epoch": 2.9669828310721575,
|
| 43500 |
+
"grad_norm": 1.0246821641921997,
|
| 43501 |
+
"learning_rate": 5.905039632954079e-08,
|
| 43502 |
+
"loss": 0.5438,
|
| 43503 |
+
"step": 6178
|
| 43504 |
+
},
|
| 43505 |
+
{
|
| 43506 |
+
"epoch": 2.967463080802017,
|
| 43507 |
+
"grad_norm": 0.8367186784744263,
|
| 43508 |
+
"learning_rate": 5.732655511938578e-08,
|
| 43509 |
+
"loss": 0.4939,
|
| 43510 |
+
"step": 6179
|
| 43511 |
+
},
|
| 43512 |
+
{
|
| 43513 |
+
"epoch": 2.9679433305318765,
|
| 43514 |
+
"grad_norm": 0.9959518313407898,
|
| 43515 |
+
"learning_rate": 5.562824260848531e-08,
|
| 43516 |
+
"loss": 0.4789,
|
| 43517 |
+
"step": 6180
|
| 43518 |
+
},
|
| 43519 |
+
{
|
| 43520 |
+
"epoch": 2.968423580261736,
|
| 43521 |
+
"grad_norm": 0.906194269657135,
|
| 43522 |
+
"learning_rate": 5.395545923063683e-08,
|
| 43523 |
+
"loss": 0.4436,
|
| 43524 |
+
"step": 6181
|
| 43525 |
+
},
|
| 43526 |
+
{
|
| 43527 |
+
"epoch": 2.9689038299915955,
|
| 43528 |
+
"grad_norm": 0.9833221435546875,
|
| 43529 |
+
"learning_rate": 5.2308205413142964e-08,
|
| 43530 |
+
"loss": 0.4892,
|
| 43531 |
+
"step": 6182
|
| 43532 |
+
},
|
| 43533 |
+
{
|
| 43534 |
+
"epoch": 2.969384079721455,
|
| 43535 |
+
"grad_norm": 0.8796568512916565,
|
| 43536 |
+
"learning_rate": 5.068648157675604e-08,
|
| 43537 |
+
"loss": 0.501,
|
| 43538 |
+
"step": 6183
|
| 43539 |
+
},
|
| 43540 |
+
{
|
| 43541 |
+
"epoch": 2.9698643294513145,
|
| 43542 |
+
"grad_norm": 1.5020475387573242,
|
| 43543 |
+
"learning_rate": 4.909028813573357e-08,
|
| 43544 |
+
"loss": 0.5203,
|
| 43545 |
+
"step": 6184
|
| 43546 |
+
},
|
| 43547 |
+
{
|
| 43548 |
+
"epoch": 2.970344579181174,
|
| 43549 |
+
"grad_norm": 0.9882216453552246,
|
| 43550 |
+
"learning_rate": 4.751962549777167e-08,
|
| 43551 |
+
"loss": 0.5011,
|
| 43552 |
+
"step": 6185
|
| 43553 |
+
},
|
| 43554 |
+
{
|
| 43555 |
+
"epoch": 2.9708248289110335,
|
| 43556 |
+
"grad_norm": 1.1814683675765991,
|
| 43557 |
+
"learning_rate": 4.597449406409382e-08,
|
| 43558 |
+
"loss": 0.5586,
|
| 43559 |
+
"step": 6186
|
| 43560 |
+
},
|
| 43561 |
+
{
|
| 43562 |
+
"epoch": 2.971305078640893,
|
| 43563 |
+
"grad_norm": 0.9532738924026489,
|
| 43564 |
+
"learning_rate": 4.445489422936211e-08,
|
| 43565 |
+
"loss": 0.5274,
|
| 43566 |
+
"step": 6187
|
| 43567 |
+
},
|
| 43568 |
+
{
|
| 43569 |
+
"epoch": 2.971785328370753,
|
| 43570 |
+
"grad_norm": 1.0383621454238892,
|
| 43571 |
+
"learning_rate": 4.296082638173271e-08,
|
| 43572 |
+
"loss": 0.4749,
|
| 43573 |
+
"step": 6188
|
| 43574 |
+
},
|
| 43575 |
+
{
|
| 43576 |
+
"epoch": 2.9722655781006124,
|
| 43577 |
+
"grad_norm": 0.7210098505020142,
|
| 43578 |
+
"learning_rate": 4.149229090285589e-08,
|
| 43579 |
+
"loss": 0.4322,
|
| 43580 |
+
"step": 6189
|
| 43581 |
+
},
|
| 43582 |
+
{
|
| 43583 |
+
"epoch": 2.972745827830472,
|
| 43584 |
+
"grad_norm": 1.1749037504196167,
|
| 43585 |
+
"learning_rate": 4.0049288167842705e-08,
|
| 43586 |
+
"loss": 0.5319,
|
| 43587 |
+
"step": 6190
|
| 43588 |
+
},
|
| 43589 |
+
{
|
| 43590 |
+
"epoch": 2.9732260775603314,
|
| 43591 |
+
"grad_norm": 1.0950450897216797,
|
| 43592 |
+
"learning_rate": 3.8631818545264986e-08,
|
| 43593 |
+
"loss": 0.4961,
|
| 43594 |
+
"step": 6191
|
| 43595 |
+
},
|
| 43596 |
+
{
|
| 43597 |
+
"epoch": 2.973706327290191,
|
| 43598 |
+
"grad_norm": 1.0302187204360962,
|
| 43599 |
+
"learning_rate": 3.7239882397210876e-08,
|
| 43600 |
+
"loss": 0.5125,
|
| 43601 |
+
"step": 6192
|
| 43602 |
+
},
|
| 43603 |
+
{
|
| 43604 |
+
"epoch": 2.9741865770200504,
|
| 43605 |
+
"grad_norm": 1.0376499891281128,
|
| 43606 |
+
"learning_rate": 3.58734800792071e-08,
|
| 43607 |
+
"loss": 0.5335,
|
| 43608 |
+
"step": 6193
|
| 43609 |
+
},
|
| 43610 |
+
{
|
| 43611 |
+
"epoch": 2.97466682674991,
|
| 43612 |
+
"grad_norm": 1.0481114387512207,
|
| 43613 |
+
"learning_rate": 3.4532611940307766e-08,
|
| 43614 |
+
"loss": 0.4871,
|
| 43615 |
+
"step": 6194
|
| 43616 |
+
},
|
| 43617 |
+
{
|
| 43618 |
+
"epoch": 2.9751470764797694,
|
| 43619 |
+
"grad_norm": 0.8409088253974915,
|
| 43620 |
+
"learning_rate": 3.321727832299448e-08,
|
| 43621 |
+
"loss": 0.4613,
|
| 43622 |
+
"step": 6195
|
| 43623 |
+
},
|
| 43624 |
+
{
|
| 43625 |
+
"epoch": 2.975627326209629,
|
| 43626 |
+
"grad_norm": 1.3083711862564087,
|
| 43627 |
+
"learning_rate": 3.1927479563254036e-08,
|
| 43628 |
+
"loss": 0.5265,
|
| 43629 |
+
"step": 6196
|
| 43630 |
+
},
|
| 43631 |
+
{
|
| 43632 |
+
"epoch": 2.9761075759394884,
|
| 43633 |
+
"grad_norm": 0.9847288727760315,
|
| 43634 |
+
"learning_rate": 3.0663215990534014e-08,
|
| 43635 |
+
"loss": 0.4811,
|
| 43636 |
+
"step": 6197
|
| 43637 |
+
},
|
| 43638 |
+
{
|
| 43639 |
+
"epoch": 2.9765878256693483,
|
| 43640 |
+
"grad_norm": 0.9412215352058411,
|
| 43641 |
+
"learning_rate": 2.942448792778718e-08,
|
| 43642 |
+
"loss": 0.5228,
|
| 43643 |
+
"step": 6198
|
| 43644 |
+
},
|
| 43645 |
+
{
|
| 43646 |
+
"epoch": 2.977068075399208,
|
| 43647 |
+
"grad_norm": 1.1179654598236084,
|
| 43648 |
+
"learning_rate": 2.8211295691416006e-08,
|
| 43649 |
+
"loss": 0.5169,
|
| 43650 |
+
"step": 6199
|
| 43651 |
+
},
|
| 43652 |
+
{
|
| 43653 |
+
"epoch": 2.9775483251290673,
|
| 43654 |
+
"grad_norm": 0.9434910416603088,
|
| 43655 |
+
"learning_rate": 2.702363959131704e-08,
|
| 43656 |
+
"loss": 0.47,
|
| 43657 |
+
"step": 6200
|
| 43658 |
+
},
|
| 43659 |
+
{
|
| 43660 |
+
"epoch": 2.9775483251290673,
|
| 43661 |
+
"eval_loss": 0.12413108348846436,
|
| 43662 |
+
"eval_runtime": 6.3651,
|
| 43663 |
+
"eval_samples_per_second": 157.892,
|
| 43664 |
+
"eval_steps_per_second": 6.598,
|
| 43665 |
+
"step": 6200
|
| 43666 |
}
|
| 43667 |
],
|
| 43668 |
"logging_steps": 1,
|
|
|
|
| 43677 |
"early_stopping_threshold": 0.0
|
| 43678 |
},
|
| 43679 |
"attributes": {
|
| 43680 |
+
"early_stopping_patience_counter": 3
|
| 43681 |
}
|
| 43682 |
},
|
| 43683 |
"TrainerControl": {
|
|
|
|
| 43686 |
"should_evaluate": false,
|
| 43687 |
"should_log": false,
|
| 43688 |
"should_save": true,
|
| 43689 |
+
"should_training_stop": true
|
| 43690 |
},
|
| 43691 |
"attributes": {}
|
| 43692 |
}
|
| 43693 |
},
|
| 43694 |
+
"total_flos": 1.4918862613826765e+17,
|
| 43695 |
"train_batch_size": 24,
|
| 43696 |
"trial_name": null,
|
| 43697 |
"trial_params": null
|