Training in progress, step 1100, checkpoint
Browse files
last-checkpoint/model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 2423056460
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:17da929491964a5f7f99889aaa103f6cf49cd29bb8fc13a170212805274c07d5
|
| 3 |
size 2423056460
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4846590727
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b9ad4522ff26c74ce5cf69802ba9845044b45448be6840c74e8202de52eff047
|
| 3 |
size 4846590727
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14645
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efe32a646280b36133122c6a4087e90d85fcd1cec8818af4943c2006708455b0
|
| 3 |
size 14645
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7a30c8e8fd04d409bda330535b6ff99d9919b3adf898382237213c6c96c77dd2
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
-
"best_metric": 3.
|
| 3 |
-
"best_model_checkpoint": "./w2v-bert-2.0-yoruba_naijavoices_1m/checkpoint-
|
| 4 |
-
"epoch":
|
| 5 |
"eval_steps": 100,
|
| 6 |
-
"global_step":
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
@@ -6397,6 +6397,1426 @@
|
|
| 6397 |
"eval_steps_per_second": 0.469,
|
| 6398 |
"eval_wer": 1.0,
|
| 6399 |
"step": 900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6400 |
}
|
| 6401 |
],
|
| 6402 |
"logging_steps": 1.0,
|
|
@@ -6425,7 +7845,7 @@
|
|
| 6425 |
"attributes": {}
|
| 6426 |
}
|
| 6427 |
},
|
| 6428 |
-
"total_flos":
|
| 6429 |
"train_batch_size": 160,
|
| 6430 |
"trial_name": null,
|
| 6431 |
"trial_params": null
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_metric": 3.433760166168213,
|
| 3 |
+
"best_model_checkpoint": "./w2v-bert-2.0-yoruba_naijavoices_1m/checkpoint-1100",
|
| 4 |
+
"epoch": 1100.0,
|
| 5 |
"eval_steps": 100,
|
| 6 |
+
"global_step": 1100,
|
| 7 |
"is_hyper_param_search": false,
|
| 8 |
"is_local_process_zero": true,
|
| 9 |
"is_world_process_zero": true,
|
|
|
|
| 6397 |
"eval_steps_per_second": 0.469,
|
| 6398 |
"eval_wer": 1.0,
|
| 6399 |
"step": 900
|
| 6400 |
+
},
|
| 6401 |
+
{
|
| 6402 |
+
"epoch": 901.0,
|
| 6403 |
+
"grad_norm": 7.289846897125244,
|
| 6404 |
+
"learning_rate": 1.7960000000000002e-07,
|
| 6405 |
+
"loss": 1.6777,
|
| 6406 |
+
"step": 901
|
| 6407 |
+
},
|
| 6408 |
+
{
|
| 6409 |
+
"epoch": 902.0,
|
| 6410 |
+
"grad_norm": 1.5632303953170776,
|
| 6411 |
+
"learning_rate": 1.798e-07,
|
| 6412 |
+
"loss": 1.6843,
|
| 6413 |
+
"step": 902
|
| 6414 |
+
},
|
| 6415 |
+
{
|
| 6416 |
+
"epoch": 903.0,
|
| 6417 |
+
"grad_norm": 1.2164356708526611,
|
| 6418 |
+
"learning_rate": 1.8e-07,
|
| 6419 |
+
"loss": 1.6713,
|
| 6420 |
+
"step": 903
|
| 6421 |
+
},
|
| 6422 |
+
{
|
| 6423 |
+
"epoch": 904.0,
|
| 6424 |
+
"grad_norm": 1.7437841892242432,
|
| 6425 |
+
"learning_rate": 1.802e-07,
|
| 6426 |
+
"loss": 1.6749,
|
| 6427 |
+
"step": 904
|
| 6428 |
+
},
|
| 6429 |
+
{
|
| 6430 |
+
"epoch": 905.0,
|
| 6431 |
+
"grad_norm": 1.2455863952636719,
|
| 6432 |
+
"learning_rate": 1.804e-07,
|
| 6433 |
+
"loss": 1.6721,
|
| 6434 |
+
"step": 905
|
| 6435 |
+
},
|
| 6436 |
+
{
|
| 6437 |
+
"epoch": 906.0,
|
| 6438 |
+
"grad_norm": 1.3489329814910889,
|
| 6439 |
+
"learning_rate": 1.8060000000000002e-07,
|
| 6440 |
+
"loss": 1.6771,
|
| 6441 |
+
"step": 906
|
| 6442 |
+
},
|
| 6443 |
+
{
|
| 6444 |
+
"epoch": 907.0,
|
| 6445 |
+
"grad_norm": 1.2651866674423218,
|
| 6446 |
+
"learning_rate": 1.808e-07,
|
| 6447 |
+
"loss": 1.6682,
|
| 6448 |
+
"step": 907
|
| 6449 |
+
},
|
| 6450 |
+
{
|
| 6451 |
+
"epoch": 908.0,
|
| 6452 |
+
"grad_norm": 1.280983328819275,
|
| 6453 |
+
"learning_rate": 1.81e-07,
|
| 6454 |
+
"loss": 1.6765,
|
| 6455 |
+
"step": 908
|
| 6456 |
+
},
|
| 6457 |
+
{
|
| 6458 |
+
"epoch": 909.0,
|
| 6459 |
+
"grad_norm": 1.3800833225250244,
|
| 6460 |
+
"learning_rate": 1.812e-07,
|
| 6461 |
+
"loss": 1.6697,
|
| 6462 |
+
"step": 909
|
| 6463 |
+
},
|
| 6464 |
+
{
|
| 6465 |
+
"epoch": 910.0,
|
| 6466 |
+
"grad_norm": 1.18763267993927,
|
| 6467 |
+
"learning_rate": 1.814e-07,
|
| 6468 |
+
"loss": 1.6612,
|
| 6469 |
+
"step": 910
|
| 6470 |
+
},
|
| 6471 |
+
{
|
| 6472 |
+
"epoch": 911.0,
|
| 6473 |
+
"grad_norm": 1.9815888404846191,
|
| 6474 |
+
"learning_rate": 1.816e-07,
|
| 6475 |
+
"loss": 1.6715,
|
| 6476 |
+
"step": 911
|
| 6477 |
+
},
|
| 6478 |
+
{
|
| 6479 |
+
"epoch": 912.0,
|
| 6480 |
+
"grad_norm": 2.0720643997192383,
|
| 6481 |
+
"learning_rate": 1.818e-07,
|
| 6482 |
+
"loss": 1.6681,
|
| 6483 |
+
"step": 912
|
| 6484 |
+
},
|
| 6485 |
+
{
|
| 6486 |
+
"epoch": 913.0,
|
| 6487 |
+
"grad_norm": 1.1363974809646606,
|
| 6488 |
+
"learning_rate": 1.82e-07,
|
| 6489 |
+
"loss": 1.6675,
|
| 6490 |
+
"step": 913
|
| 6491 |
+
},
|
| 6492 |
+
{
|
| 6493 |
+
"epoch": 914.0,
|
| 6494 |
+
"grad_norm": 1.1961008310317993,
|
| 6495 |
+
"learning_rate": 1.822e-07,
|
| 6496 |
+
"loss": 1.6712,
|
| 6497 |
+
"step": 914
|
| 6498 |
+
},
|
| 6499 |
+
{
|
| 6500 |
+
"epoch": 915.0,
|
| 6501 |
+
"grad_norm": 3.5271594524383545,
|
| 6502 |
+
"learning_rate": 1.8240000000000002e-07,
|
| 6503 |
+
"loss": 1.6629,
|
| 6504 |
+
"step": 915
|
| 6505 |
+
},
|
| 6506 |
+
{
|
| 6507 |
+
"epoch": 916.0,
|
| 6508 |
+
"grad_norm": 1.2886525392532349,
|
| 6509 |
+
"learning_rate": 1.826e-07,
|
| 6510 |
+
"loss": 1.6638,
|
| 6511 |
+
"step": 916
|
| 6512 |
+
},
|
| 6513 |
+
{
|
| 6514 |
+
"epoch": 917.0,
|
| 6515 |
+
"grad_norm": 3.6112558841705322,
|
| 6516 |
+
"learning_rate": 1.828e-07,
|
| 6517 |
+
"loss": 1.667,
|
| 6518 |
+
"step": 917
|
| 6519 |
+
},
|
| 6520 |
+
{
|
| 6521 |
+
"epoch": 918.0,
|
| 6522 |
+
"grad_norm": 1.1059443950653076,
|
| 6523 |
+
"learning_rate": 1.83e-07,
|
| 6524 |
+
"loss": 1.6659,
|
| 6525 |
+
"step": 918
|
| 6526 |
+
},
|
| 6527 |
+
{
|
| 6528 |
+
"epoch": 919.0,
|
| 6529 |
+
"grad_norm": 1.2059946060180664,
|
| 6530 |
+
"learning_rate": 1.832e-07,
|
| 6531 |
+
"loss": 1.6583,
|
| 6532 |
+
"step": 919
|
| 6533 |
+
},
|
| 6534 |
+
{
|
| 6535 |
+
"epoch": 920.0,
|
| 6536 |
+
"grad_norm": 1.0824497938156128,
|
| 6537 |
+
"learning_rate": 1.8340000000000001e-07,
|
| 6538 |
+
"loss": 1.6599,
|
| 6539 |
+
"step": 920
|
| 6540 |
+
},
|
| 6541 |
+
{
|
| 6542 |
+
"epoch": 921.0,
|
| 6543 |
+
"grad_norm": 1.2805767059326172,
|
| 6544 |
+
"learning_rate": 1.836e-07,
|
| 6545 |
+
"loss": 1.6527,
|
| 6546 |
+
"step": 921
|
| 6547 |
+
},
|
| 6548 |
+
{
|
| 6549 |
+
"epoch": 922.0,
|
| 6550 |
+
"grad_norm": 16.977970123291016,
|
| 6551 |
+
"learning_rate": 1.838e-07,
|
| 6552 |
+
"loss": 1.6621,
|
| 6553 |
+
"step": 922
|
| 6554 |
+
},
|
| 6555 |
+
{
|
| 6556 |
+
"epoch": 923.0,
|
| 6557 |
+
"grad_norm": 1.8109819889068604,
|
| 6558 |
+
"learning_rate": 1.84e-07,
|
| 6559 |
+
"loss": 1.6573,
|
| 6560 |
+
"step": 923
|
| 6561 |
+
},
|
| 6562 |
+
{
|
| 6563 |
+
"epoch": 924.0,
|
| 6564 |
+
"grad_norm": 4.374696254730225,
|
| 6565 |
+
"learning_rate": 1.842e-07,
|
| 6566 |
+
"loss": 1.6646,
|
| 6567 |
+
"step": 924
|
| 6568 |
+
},
|
| 6569 |
+
{
|
| 6570 |
+
"epoch": 925.0,
|
| 6571 |
+
"grad_norm": 1.1373530626296997,
|
| 6572 |
+
"learning_rate": 1.844e-07,
|
| 6573 |
+
"loss": 1.6555,
|
| 6574 |
+
"step": 925
|
| 6575 |
+
},
|
| 6576 |
+
{
|
| 6577 |
+
"epoch": 926.0,
|
| 6578 |
+
"grad_norm": 1.135043978691101,
|
| 6579 |
+
"learning_rate": 1.846e-07,
|
| 6580 |
+
"loss": 1.6594,
|
| 6581 |
+
"step": 926
|
| 6582 |
+
},
|
| 6583 |
+
{
|
| 6584 |
+
"epoch": 927.0,
|
| 6585 |
+
"grad_norm": 1.1703171730041504,
|
| 6586 |
+
"learning_rate": 1.848e-07,
|
| 6587 |
+
"loss": 1.6546,
|
| 6588 |
+
"step": 927
|
| 6589 |
+
},
|
| 6590 |
+
{
|
| 6591 |
+
"epoch": 928.0,
|
| 6592 |
+
"grad_norm": 1.405674695968628,
|
| 6593 |
+
"learning_rate": 1.85e-07,
|
| 6594 |
+
"loss": 1.6552,
|
| 6595 |
+
"step": 928
|
| 6596 |
+
},
|
| 6597 |
+
{
|
| 6598 |
+
"epoch": 929.0,
|
| 6599 |
+
"grad_norm": 1.2653117179870605,
|
| 6600 |
+
"learning_rate": 1.8520000000000002e-07,
|
| 6601 |
+
"loss": 1.6531,
|
| 6602 |
+
"step": 929
|
| 6603 |
+
},
|
| 6604 |
+
{
|
| 6605 |
+
"epoch": 930.0,
|
| 6606 |
+
"grad_norm": 1.0446969270706177,
|
| 6607 |
+
"learning_rate": 1.854e-07,
|
| 6608 |
+
"loss": 1.6535,
|
| 6609 |
+
"step": 930
|
| 6610 |
+
},
|
| 6611 |
+
{
|
| 6612 |
+
"epoch": 931.0,
|
| 6613 |
+
"grad_norm": 1.2374356985092163,
|
| 6614 |
+
"learning_rate": 1.856e-07,
|
| 6615 |
+
"loss": 1.6471,
|
| 6616 |
+
"step": 931
|
| 6617 |
+
},
|
| 6618 |
+
{
|
| 6619 |
+
"epoch": 932.0,
|
| 6620 |
+
"grad_norm": 1.827778935432434,
|
| 6621 |
+
"learning_rate": 1.858e-07,
|
| 6622 |
+
"loss": 1.6569,
|
| 6623 |
+
"step": 932
|
| 6624 |
+
},
|
| 6625 |
+
{
|
| 6626 |
+
"epoch": 933.0,
|
| 6627 |
+
"grad_norm": 1.4429855346679688,
|
| 6628 |
+
"learning_rate": 1.86e-07,
|
| 6629 |
+
"loss": 1.6523,
|
| 6630 |
+
"step": 933
|
| 6631 |
+
},
|
| 6632 |
+
{
|
| 6633 |
+
"epoch": 934.0,
|
| 6634 |
+
"grad_norm": 1.0403034687042236,
|
| 6635 |
+
"learning_rate": 1.862e-07,
|
| 6636 |
+
"loss": 1.6476,
|
| 6637 |
+
"step": 934
|
| 6638 |
+
},
|
| 6639 |
+
{
|
| 6640 |
+
"epoch": 935.0,
|
| 6641 |
+
"grad_norm": 1.3327019214630127,
|
| 6642 |
+
"learning_rate": 1.8640000000000003e-07,
|
| 6643 |
+
"loss": 1.6511,
|
| 6644 |
+
"step": 935
|
| 6645 |
+
},
|
| 6646 |
+
{
|
| 6647 |
+
"epoch": 936.0,
|
| 6648 |
+
"grad_norm": 3.2895419597625732,
|
| 6649 |
+
"learning_rate": 1.866e-07,
|
| 6650 |
+
"loss": 1.6423,
|
| 6651 |
+
"step": 936
|
| 6652 |
+
},
|
| 6653 |
+
{
|
| 6654 |
+
"epoch": 937.0,
|
| 6655 |
+
"grad_norm": 1.3313167095184326,
|
| 6656 |
+
"learning_rate": 1.868e-07,
|
| 6657 |
+
"loss": 1.6475,
|
| 6658 |
+
"step": 937
|
| 6659 |
+
},
|
| 6660 |
+
{
|
| 6661 |
+
"epoch": 938.0,
|
| 6662 |
+
"grad_norm": 1.0129790306091309,
|
| 6663 |
+
"learning_rate": 1.87e-07,
|
| 6664 |
+
"loss": 1.6489,
|
| 6665 |
+
"step": 938
|
| 6666 |
+
},
|
| 6667 |
+
{
|
| 6668 |
+
"epoch": 939.0,
|
| 6669 |
+
"grad_norm": 4.673969268798828,
|
| 6670 |
+
"learning_rate": 1.872e-07,
|
| 6671 |
+
"loss": 1.6496,
|
| 6672 |
+
"step": 939
|
| 6673 |
+
},
|
| 6674 |
+
{
|
| 6675 |
+
"epoch": 940.0,
|
| 6676 |
+
"grad_norm": 4.099658012390137,
|
| 6677 |
+
"learning_rate": 1.8740000000000002e-07,
|
| 6678 |
+
"loss": 1.6458,
|
| 6679 |
+
"step": 940
|
| 6680 |
+
},
|
| 6681 |
+
{
|
| 6682 |
+
"epoch": 941.0,
|
| 6683 |
+
"grad_norm": 1.4593976736068726,
|
| 6684 |
+
"learning_rate": 1.8759999999999999e-07,
|
| 6685 |
+
"loss": 1.6477,
|
| 6686 |
+
"step": 941
|
| 6687 |
+
},
|
| 6688 |
+
{
|
| 6689 |
+
"epoch": 942.0,
|
| 6690 |
+
"grad_norm": 1.2744107246398926,
|
| 6691 |
+
"learning_rate": 1.878e-07,
|
| 6692 |
+
"loss": 1.6503,
|
| 6693 |
+
"step": 942
|
| 6694 |
+
},
|
| 6695 |
+
{
|
| 6696 |
+
"epoch": 943.0,
|
| 6697 |
+
"grad_norm": 1.1063960790634155,
|
| 6698 |
+
"learning_rate": 1.8800000000000002e-07,
|
| 6699 |
+
"loss": 1.6401,
|
| 6700 |
+
"step": 943
|
| 6701 |
+
},
|
| 6702 |
+
{
|
| 6703 |
+
"epoch": 944.0,
|
| 6704 |
+
"grad_norm": 2.0892364978790283,
|
| 6705 |
+
"learning_rate": 1.882e-07,
|
| 6706 |
+
"loss": 1.6417,
|
| 6707 |
+
"step": 944
|
| 6708 |
+
},
|
| 6709 |
+
{
|
| 6710 |
+
"epoch": 945.0,
|
| 6711 |
+
"grad_norm": 1.0549347400665283,
|
| 6712 |
+
"learning_rate": 1.884e-07,
|
| 6713 |
+
"loss": 1.6428,
|
| 6714 |
+
"step": 945
|
| 6715 |
+
},
|
| 6716 |
+
{
|
| 6717 |
+
"epoch": 946.0,
|
| 6718 |
+
"grad_norm": 1.0397493839263916,
|
| 6719 |
+
"learning_rate": 1.886e-07,
|
| 6720 |
+
"loss": 1.6375,
|
| 6721 |
+
"step": 946
|
| 6722 |
+
},
|
| 6723 |
+
{
|
| 6724 |
+
"epoch": 947.0,
|
| 6725 |
+
"grad_norm": 1.138031244277954,
|
| 6726 |
+
"learning_rate": 1.888e-07,
|
| 6727 |
+
"loss": 1.641,
|
| 6728 |
+
"step": 947
|
| 6729 |
+
},
|
| 6730 |
+
{
|
| 6731 |
+
"epoch": 948.0,
|
| 6732 |
+
"grad_norm": 1.2404905557632446,
|
| 6733 |
+
"learning_rate": 1.89e-07,
|
| 6734 |
+
"loss": 1.6408,
|
| 6735 |
+
"step": 948
|
| 6736 |
+
},
|
| 6737 |
+
{
|
| 6738 |
+
"epoch": 949.0,
|
| 6739 |
+
"grad_norm": 1.1873743534088135,
|
| 6740 |
+
"learning_rate": 1.8920000000000003e-07,
|
| 6741 |
+
"loss": 1.6333,
|
| 6742 |
+
"step": 949
|
| 6743 |
+
},
|
| 6744 |
+
{
|
| 6745 |
+
"epoch": 950.0,
|
| 6746 |
+
"grad_norm": 1.2409088611602783,
|
| 6747 |
+
"learning_rate": 1.894e-07,
|
| 6748 |
+
"loss": 1.6389,
|
| 6749 |
+
"step": 950
|
| 6750 |
+
},
|
| 6751 |
+
{
|
| 6752 |
+
"epoch": 951.0,
|
| 6753 |
+
"grad_norm": 4.043206691741943,
|
| 6754 |
+
"learning_rate": 1.896e-07,
|
| 6755 |
+
"loss": 1.6314,
|
| 6756 |
+
"step": 951
|
| 6757 |
+
},
|
| 6758 |
+
{
|
| 6759 |
+
"epoch": 952.0,
|
| 6760 |
+
"grad_norm": 0.9756543040275574,
|
| 6761 |
+
"learning_rate": 1.8980000000000002e-07,
|
| 6762 |
+
"loss": 1.6365,
|
| 6763 |
+
"step": 952
|
| 6764 |
+
},
|
| 6765 |
+
{
|
| 6766 |
+
"epoch": 953.0,
|
| 6767 |
+
"grad_norm": 1.0446370840072632,
|
| 6768 |
+
"learning_rate": 1.9e-07,
|
| 6769 |
+
"loss": 1.6328,
|
| 6770 |
+
"step": 953
|
| 6771 |
+
},
|
| 6772 |
+
{
|
| 6773 |
+
"epoch": 954.0,
|
| 6774 |
+
"grad_norm": 1.219887137413025,
|
| 6775 |
+
"learning_rate": 1.9020000000000002e-07,
|
| 6776 |
+
"loss": 1.6364,
|
| 6777 |
+
"step": 954
|
| 6778 |
+
},
|
| 6779 |
+
{
|
| 6780 |
+
"epoch": 955.0,
|
| 6781 |
+
"grad_norm": 0.9210452437400818,
|
| 6782 |
+
"learning_rate": 1.9039999999999998e-07,
|
| 6783 |
+
"loss": 1.6361,
|
| 6784 |
+
"step": 955
|
| 6785 |
+
},
|
| 6786 |
+
{
|
| 6787 |
+
"epoch": 956.0,
|
| 6788 |
+
"grad_norm": 1.0765300989151,
|
| 6789 |
+
"learning_rate": 1.906e-07,
|
| 6790 |
+
"loss": 1.63,
|
| 6791 |
+
"step": 956
|
| 6792 |
+
},
|
| 6793 |
+
{
|
| 6794 |
+
"epoch": 957.0,
|
| 6795 |
+
"grad_norm": 2.0179483890533447,
|
| 6796 |
+
"learning_rate": 1.9080000000000001e-07,
|
| 6797 |
+
"loss": 1.6323,
|
| 6798 |
+
"step": 957
|
| 6799 |
+
},
|
| 6800 |
+
{
|
| 6801 |
+
"epoch": 958.0,
|
| 6802 |
+
"grad_norm": 1.5964845418930054,
|
| 6803 |
+
"learning_rate": 1.91e-07,
|
| 6804 |
+
"loss": 1.6281,
|
| 6805 |
+
"step": 958
|
| 6806 |
+
},
|
| 6807 |
+
{
|
| 6808 |
+
"epoch": 959.0,
|
| 6809 |
+
"grad_norm": 2.0451290607452393,
|
| 6810 |
+
"learning_rate": 1.912e-07,
|
| 6811 |
+
"loss": 1.6306,
|
| 6812 |
+
"step": 959
|
| 6813 |
+
},
|
| 6814 |
+
{
|
| 6815 |
+
"epoch": 960.0,
|
| 6816 |
+
"grad_norm": 0.9319092631340027,
|
| 6817 |
+
"learning_rate": 1.914e-07,
|
| 6818 |
+
"loss": 1.6248,
|
| 6819 |
+
"step": 960
|
| 6820 |
+
},
|
| 6821 |
+
{
|
| 6822 |
+
"epoch": 961.0,
|
| 6823 |
+
"grad_norm": 40.506675720214844,
|
| 6824 |
+
"learning_rate": 1.916e-07,
|
| 6825 |
+
"loss": 1.6355,
|
| 6826 |
+
"step": 961
|
| 6827 |
+
},
|
| 6828 |
+
{
|
| 6829 |
+
"epoch": 962.0,
|
| 6830 |
+
"grad_norm": 0.9858155846595764,
|
| 6831 |
+
"learning_rate": 1.918e-07,
|
| 6832 |
+
"loss": 1.6226,
|
| 6833 |
+
"step": 962
|
| 6834 |
+
},
|
| 6835 |
+
{
|
| 6836 |
+
"epoch": 963.0,
|
| 6837 |
+
"grad_norm": 1.0020569562911987,
|
| 6838 |
+
"learning_rate": 1.9200000000000003e-07,
|
| 6839 |
+
"loss": 1.6272,
|
| 6840 |
+
"step": 963
|
| 6841 |
+
},
|
| 6842 |
+
{
|
| 6843 |
+
"epoch": 964.0,
|
| 6844 |
+
"grad_norm": 1.4338146448135376,
|
| 6845 |
+
"learning_rate": 1.922e-07,
|
| 6846 |
+
"loss": 1.6289,
|
| 6847 |
+
"step": 964
|
| 6848 |
+
},
|
| 6849 |
+
{
|
| 6850 |
+
"epoch": 965.0,
|
| 6851 |
+
"grad_norm": 1.052070140838623,
|
| 6852 |
+
"learning_rate": 1.924e-07,
|
| 6853 |
+
"loss": 1.6252,
|
| 6854 |
+
"step": 965
|
| 6855 |
+
},
|
| 6856 |
+
{
|
| 6857 |
+
"epoch": 966.0,
|
| 6858 |
+
"grad_norm": 1.4018009901046753,
|
| 6859 |
+
"learning_rate": 1.9260000000000002e-07,
|
| 6860 |
+
"loss": 1.6173,
|
| 6861 |
+
"step": 966
|
| 6862 |
+
},
|
| 6863 |
+
{
|
| 6864 |
+
"epoch": 967.0,
|
| 6865 |
+
"grad_norm": 1.1901110410690308,
|
| 6866 |
+
"learning_rate": 1.928e-07,
|
| 6867 |
+
"loss": 1.6237,
|
| 6868 |
+
"step": 967
|
| 6869 |
+
},
|
| 6870 |
+
{
|
| 6871 |
+
"epoch": 968.0,
|
| 6872 |
+
"grad_norm": 0.9189120531082153,
|
| 6873 |
+
"learning_rate": 1.9300000000000002e-07,
|
| 6874 |
+
"loss": 1.6199,
|
| 6875 |
+
"step": 968
|
| 6876 |
+
},
|
| 6877 |
+
{
|
| 6878 |
+
"epoch": 969.0,
|
| 6879 |
+
"grad_norm": 0.9501894116401672,
|
| 6880 |
+
"learning_rate": 1.932e-07,
|
| 6881 |
+
"loss": 1.6202,
|
| 6882 |
+
"step": 969
|
| 6883 |
+
},
|
| 6884 |
+
{
|
| 6885 |
+
"epoch": 970.0,
|
| 6886 |
+
"grad_norm": 1.4390616416931152,
|
| 6887 |
+
"learning_rate": 1.934e-07,
|
| 6888 |
+
"loss": 1.6176,
|
| 6889 |
+
"step": 970
|
| 6890 |
+
},
|
| 6891 |
+
{
|
| 6892 |
+
"epoch": 971.0,
|
| 6893 |
+
"grad_norm": 2.0271224975585938,
|
| 6894 |
+
"learning_rate": 1.936e-07,
|
| 6895 |
+
"loss": 1.6155,
|
| 6896 |
+
"step": 971
|
| 6897 |
+
},
|
| 6898 |
+
{
|
| 6899 |
+
"epoch": 972.0,
|
| 6900 |
+
"grad_norm": 0.9586737751960754,
|
| 6901 |
+
"learning_rate": 1.938e-07,
|
| 6902 |
+
"loss": 1.616,
|
| 6903 |
+
"step": 972
|
| 6904 |
+
},
|
| 6905 |
+
{
|
| 6906 |
+
"epoch": 973.0,
|
| 6907 |
+
"grad_norm": 1.2440752983093262,
|
| 6908 |
+
"learning_rate": 1.94e-07,
|
| 6909 |
+
"loss": 1.6205,
|
| 6910 |
+
"step": 973
|
| 6911 |
+
},
|
| 6912 |
+
{
|
| 6913 |
+
"epoch": 974.0,
|
| 6914 |
+
"grad_norm": 1.0020045042037964,
|
| 6915 |
+
"learning_rate": 1.942e-07,
|
| 6916 |
+
"loss": 1.6218,
|
| 6917 |
+
"step": 974
|
| 6918 |
+
},
|
| 6919 |
+
{
|
| 6920 |
+
"epoch": 975.0,
|
| 6921 |
+
"grad_norm": 2.2769696712493896,
|
| 6922 |
+
"learning_rate": 1.944e-07,
|
| 6923 |
+
"loss": 1.6198,
|
| 6924 |
+
"step": 975
|
| 6925 |
+
},
|
| 6926 |
+
{
|
| 6927 |
+
"epoch": 976.0,
|
| 6928 |
+
"grad_norm": 0.9601196646690369,
|
| 6929 |
+
"learning_rate": 1.946e-07,
|
| 6930 |
+
"loss": 1.6167,
|
| 6931 |
+
"step": 976
|
| 6932 |
+
},
|
| 6933 |
+
{
|
| 6934 |
+
"epoch": 977.0,
|
| 6935 |
+
"grad_norm": 0.9332568049430847,
|
| 6936 |
+
"learning_rate": 1.9480000000000002e-07,
|
| 6937 |
+
"loss": 1.6089,
|
| 6938 |
+
"step": 977
|
| 6939 |
+
},
|
| 6940 |
+
{
|
| 6941 |
+
"epoch": 978.0,
|
| 6942 |
+
"grad_norm": 0.9011194109916687,
|
| 6943 |
+
"learning_rate": 1.9499999999999999e-07,
|
| 6944 |
+
"loss": 1.618,
|
| 6945 |
+
"step": 978
|
| 6946 |
+
},
|
| 6947 |
+
{
|
| 6948 |
+
"epoch": 979.0,
|
| 6949 |
+
"grad_norm": 1.0645091533660889,
|
| 6950 |
+
"learning_rate": 1.952e-07,
|
| 6951 |
+
"loss": 1.6082,
|
| 6952 |
+
"step": 979
|
| 6953 |
+
},
|
| 6954 |
+
{
|
| 6955 |
+
"epoch": 980.0,
|
| 6956 |
+
"grad_norm": 1.2063056230545044,
|
| 6957 |
+
"learning_rate": 1.9540000000000002e-07,
|
| 6958 |
+
"loss": 1.6059,
|
| 6959 |
+
"step": 980
|
| 6960 |
+
},
|
| 6961 |
+
{
|
| 6962 |
+
"epoch": 981.0,
|
| 6963 |
+
"grad_norm": 2.6769473552703857,
|
| 6964 |
+
"learning_rate": 1.956e-07,
|
| 6965 |
+
"loss": 1.6117,
|
| 6966 |
+
"step": 981
|
| 6967 |
+
},
|
| 6968 |
+
{
|
| 6969 |
+
"epoch": 982.0,
|
| 6970 |
+
"grad_norm": 1.0495208501815796,
|
| 6971 |
+
"learning_rate": 1.9580000000000002e-07,
|
| 6972 |
+
"loss": 1.6072,
|
| 6973 |
+
"step": 982
|
| 6974 |
+
},
|
| 6975 |
+
{
|
| 6976 |
+
"epoch": 983.0,
|
| 6977 |
+
"grad_norm": 0.9004780054092407,
|
| 6978 |
+
"learning_rate": 1.96e-07,
|
| 6979 |
+
"loss": 1.6097,
|
| 6980 |
+
"step": 983
|
| 6981 |
+
},
|
| 6982 |
+
{
|
| 6983 |
+
"epoch": 984.0,
|
| 6984 |
+
"grad_norm": 2.2003862857818604,
|
| 6985 |
+
"learning_rate": 1.962e-07,
|
| 6986 |
+
"loss": 1.6124,
|
| 6987 |
+
"step": 984
|
| 6988 |
+
},
|
| 6989 |
+
{
|
| 6990 |
+
"epoch": 985.0,
|
| 6991 |
+
"grad_norm": 1.6124346256256104,
|
| 6992 |
+
"learning_rate": 1.964e-07,
|
| 6993 |
+
"loss": 1.607,
|
| 6994 |
+
"step": 985
|
| 6995 |
+
},
|
| 6996 |
+
{
|
| 6997 |
+
"epoch": 986.0,
|
| 6998 |
+
"grad_norm": 3.331295967102051,
|
| 6999 |
+
"learning_rate": 1.9660000000000003e-07,
|
| 7000 |
+
"loss": 1.6101,
|
| 7001 |
+
"step": 986
|
| 7002 |
+
},
|
| 7003 |
+
{
|
| 7004 |
+
"epoch": 987.0,
|
| 7005 |
+
"grad_norm": 4.2284770011901855,
|
| 7006 |
+
"learning_rate": 1.968e-07,
|
| 7007 |
+
"loss": 1.6016,
|
| 7008 |
+
"step": 987
|
| 7009 |
+
},
|
| 7010 |
+
{
|
| 7011 |
+
"epoch": 988.0,
|
| 7012 |
+
"grad_norm": 14.074902534484863,
|
| 7013 |
+
"learning_rate": 1.97e-07,
|
| 7014 |
+
"loss": 1.6081,
|
| 7015 |
+
"step": 988
|
| 7016 |
+
},
|
| 7017 |
+
{
|
| 7018 |
+
"epoch": 989.0,
|
| 7019 |
+
"grad_norm": 2.105473518371582,
|
| 7020 |
+
"learning_rate": 1.972e-07,
|
| 7021 |
+
"loss": 1.6059,
|
| 7022 |
+
"step": 989
|
| 7023 |
+
},
|
| 7024 |
+
{
|
| 7025 |
+
"epoch": 990.0,
|
| 7026 |
+
"grad_norm": 0.8988717198371887,
|
| 7027 |
+
"learning_rate": 1.974e-07,
|
| 7028 |
+
"loss": 1.6033,
|
| 7029 |
+
"step": 990
|
| 7030 |
+
},
|
| 7031 |
+
{
|
| 7032 |
+
"epoch": 991.0,
|
| 7033 |
+
"grad_norm": 1.6289899349212646,
|
| 7034 |
+
"learning_rate": 1.9760000000000002e-07,
|
| 7035 |
+
"loss": 1.612,
|
| 7036 |
+
"step": 991
|
| 7037 |
+
},
|
| 7038 |
+
{
|
| 7039 |
+
"epoch": 992.0,
|
| 7040 |
+
"grad_norm": 0.9097650051116943,
|
| 7041 |
+
"learning_rate": 1.9779999999999998e-07,
|
| 7042 |
+
"loss": 1.6095,
|
| 7043 |
+
"step": 992
|
| 7044 |
+
},
|
| 7045 |
+
{
|
| 7046 |
+
"epoch": 993.0,
|
| 7047 |
+
"grad_norm": 1.135284423828125,
|
| 7048 |
+
"learning_rate": 1.98e-07,
|
| 7049 |
+
"loss": 1.6074,
|
| 7050 |
+
"step": 993
|
| 7051 |
+
},
|
| 7052 |
+
{
|
| 7053 |
+
"epoch": 994.0,
|
| 7054 |
+
"grad_norm": 0.9815622568130493,
|
| 7055 |
+
"learning_rate": 1.9820000000000001e-07,
|
| 7056 |
+
"loss": 1.5999,
|
| 7057 |
+
"step": 994
|
| 7058 |
+
},
|
| 7059 |
+
{
|
| 7060 |
+
"epoch": 995.0,
|
| 7061 |
+
"grad_norm": 1.0774178504943848,
|
| 7062 |
+
"learning_rate": 1.984e-07,
|
| 7063 |
+
"loss": 1.622,
|
| 7064 |
+
"step": 995
|
| 7065 |
+
},
|
| 7066 |
+
{
|
| 7067 |
+
"epoch": 996.0,
|
| 7068 |
+
"grad_norm": 1.3044596910476685,
|
| 7069 |
+
"learning_rate": 1.9860000000000002e-07,
|
| 7070 |
+
"loss": 1.6003,
|
| 7071 |
+
"step": 996
|
| 7072 |
+
},
|
| 7073 |
+
{
|
| 7074 |
+
"epoch": 997.0,
|
| 7075 |
+
"grad_norm": 5.724979400634766,
|
| 7076 |
+
"learning_rate": 1.988e-07,
|
| 7077 |
+
"loss": 1.6026,
|
| 7078 |
+
"step": 997
|
| 7079 |
+
},
|
| 7080 |
+
{
|
| 7081 |
+
"epoch": 998.0,
|
| 7082 |
+
"grad_norm": 7.915839195251465,
|
| 7083 |
+
"learning_rate": 1.99e-07,
|
| 7084 |
+
"loss": 1.6014,
|
| 7085 |
+
"step": 998
|
| 7086 |
+
},
|
| 7087 |
+
{
|
| 7088 |
+
"epoch": 999.0,
|
| 7089 |
+
"grad_norm": 0.9617994427680969,
|
| 7090 |
+
"learning_rate": 1.992e-07,
|
| 7091 |
+
"loss": 1.6004,
|
| 7092 |
+
"step": 999
|
| 7093 |
+
},
|
| 7094 |
+
{
|
| 7095 |
+
"epoch": 1000.0,
|
| 7096 |
+
"grad_norm": 1.0033584833145142,
|
| 7097 |
+
"learning_rate": 1.9940000000000003e-07,
|
| 7098 |
+
"loss": 1.595,
|
| 7099 |
+
"step": 1000
|
| 7100 |
+
},
|
| 7101 |
+
{
|
| 7102 |
+
"epoch": 1000.0,
|
| 7103 |
+
"eval_cer": 0.9977481887605247,
|
| 7104 |
+
"eval_loss": 3.4487671852111816,
|
| 7105 |
+
"eval_runtime": 14.7295,
|
| 7106 |
+
"eval_samples_per_second": 66.737,
|
| 7107 |
+
"eval_steps_per_second": 0.475,
|
| 7108 |
+
"eval_wer": 1.0,
|
| 7109 |
+
"step": 1000
|
| 7110 |
+
},
|
| 7111 |
+
{
|
| 7112 |
+
"epoch": 1001.0,
|
| 7113 |
+
"grad_norm": 1.1320922374725342,
|
| 7114 |
+
"learning_rate": 1.996e-07,
|
| 7115 |
+
"loss": 1.5978,
|
| 7116 |
+
"step": 1001
|
| 7117 |
+
},
|
| 7118 |
+
{
|
| 7119 |
+
"epoch": 1002.0,
|
| 7120 |
+
"grad_norm": 10.318326950073242,
|
| 7121 |
+
"learning_rate": 1.998e-07,
|
| 7122 |
+
"loss": 1.6008,
|
| 7123 |
+
"step": 1002
|
| 7124 |
+
},
|
| 7125 |
+
{
|
| 7126 |
+
"epoch": 1003.0,
|
| 7127 |
+
"grad_norm": 5.446964263916016,
|
| 7128 |
+
"learning_rate": 2.0000000000000002e-07,
|
| 7129 |
+
"loss": 1.5938,
|
| 7130 |
+
"step": 1003
|
| 7131 |
+
},
|
| 7132 |
+
{
|
| 7133 |
+
"epoch": 1004.0,
|
| 7134 |
+
"grad_norm": 1.3103594779968262,
|
| 7135 |
+
"learning_rate": 2.002e-07,
|
| 7136 |
+
"loss": 1.6014,
|
| 7137 |
+
"step": 1004
|
| 7138 |
+
},
|
| 7139 |
+
{
|
| 7140 |
+
"epoch": 1005.0,
|
| 7141 |
+
"grad_norm": 1.0951064825057983,
|
| 7142 |
+
"learning_rate": 2.0040000000000002e-07,
|
| 7143 |
+
"loss": 1.5957,
|
| 7144 |
+
"step": 1005
|
| 7145 |
+
},
|
| 7146 |
+
{
|
| 7147 |
+
"epoch": 1006.0,
|
| 7148 |
+
"grad_norm": 2.066399097442627,
|
| 7149 |
+
"learning_rate": 2.0059999999999998e-07,
|
| 7150 |
+
"loss": 1.5892,
|
| 7151 |
+
"step": 1006
|
| 7152 |
+
},
|
| 7153 |
+
{
|
| 7154 |
+
"epoch": 1007.0,
|
| 7155 |
+
"grad_norm": 1.012980341911316,
|
| 7156 |
+
"learning_rate": 2.008e-07,
|
| 7157 |
+
"loss": 1.5972,
|
| 7158 |
+
"step": 1007
|
| 7159 |
+
},
|
| 7160 |
+
{
|
| 7161 |
+
"epoch": 1008.0,
|
| 7162 |
+
"grad_norm": 1.5361199378967285,
|
| 7163 |
+
"learning_rate": 2.01e-07,
|
| 7164 |
+
"loss": 1.5916,
|
| 7165 |
+
"step": 1008
|
| 7166 |
+
},
|
| 7167 |
+
{
|
| 7168 |
+
"epoch": 1009.0,
|
| 7169 |
+
"grad_norm": 1.0442848205566406,
|
| 7170 |
+
"learning_rate": 2.012e-07,
|
| 7171 |
+
"loss": 1.5872,
|
| 7172 |
+
"step": 1009
|
| 7173 |
+
},
|
| 7174 |
+
{
|
| 7175 |
+
"epoch": 1010.0,
|
| 7176 |
+
"grad_norm": 6.148806571960449,
|
| 7177 |
+
"learning_rate": 2.0140000000000002e-07,
|
| 7178 |
+
"loss": 1.5919,
|
| 7179 |
+
"step": 1010
|
| 7180 |
+
},
|
| 7181 |
+
{
|
| 7182 |
+
"epoch": 1011.0,
|
| 7183 |
+
"grad_norm": 1.2967356443405151,
|
| 7184 |
+
"learning_rate": 2.016e-07,
|
| 7185 |
+
"loss": 1.5922,
|
| 7186 |
+
"step": 1011
|
| 7187 |
+
},
|
| 7188 |
+
{
|
| 7189 |
+
"epoch": 1012.0,
|
| 7190 |
+
"grad_norm": 6.414045810699463,
|
| 7191 |
+
"learning_rate": 2.018e-07,
|
| 7192 |
+
"loss": 1.5913,
|
| 7193 |
+
"step": 1012
|
| 7194 |
+
},
|
| 7195 |
+
{
|
| 7196 |
+
"epoch": 1013.0,
|
| 7197 |
+
"grad_norm": 1.1772363185882568,
|
| 7198 |
+
"learning_rate": 2.02e-07,
|
| 7199 |
+
"loss": 1.5828,
|
| 7200 |
+
"step": 1013
|
| 7201 |
+
},
|
| 7202 |
+
{
|
| 7203 |
+
"epoch": 1014.0,
|
| 7204 |
+
"grad_norm": 3.105180501937866,
|
| 7205 |
+
"learning_rate": 2.0220000000000002e-07,
|
| 7206 |
+
"loss": 1.594,
|
| 7207 |
+
"step": 1014
|
| 7208 |
+
},
|
| 7209 |
+
{
|
| 7210 |
+
"epoch": 1015.0,
|
| 7211 |
+
"grad_norm": 1.3187018632888794,
|
| 7212 |
+
"learning_rate": 2.0239999999999999e-07,
|
| 7213 |
+
"loss": 1.5925,
|
| 7214 |
+
"step": 1015
|
| 7215 |
+
},
|
| 7216 |
+
{
|
| 7217 |
+
"epoch": 1016.0,
|
| 7218 |
+
"grad_norm": 4.919627666473389,
|
| 7219 |
+
"learning_rate": 2.026e-07,
|
| 7220 |
+
"loss": 1.5964,
|
| 7221 |
+
"step": 1016
|
| 7222 |
+
},
|
| 7223 |
+
{
|
| 7224 |
+
"epoch": 1017.0,
|
| 7225 |
+
"grad_norm": 6.910722732543945,
|
| 7226 |
+
"learning_rate": 2.0280000000000002e-07,
|
| 7227 |
+
"loss": 1.5861,
|
| 7228 |
+
"step": 1017
|
| 7229 |
+
},
|
| 7230 |
+
{
|
| 7231 |
+
"epoch": 1018.0,
|
| 7232 |
+
"grad_norm": 1.2795530557632446,
|
| 7233 |
+
"learning_rate": 2.03e-07,
|
| 7234 |
+
"loss": 1.5862,
|
| 7235 |
+
"step": 1018
|
| 7236 |
+
},
|
| 7237 |
+
{
|
| 7238 |
+
"epoch": 1019.0,
|
| 7239 |
+
"grad_norm": 0.9177622199058533,
|
| 7240 |
+
"learning_rate": 2.0320000000000002e-07,
|
| 7241 |
+
"loss": 1.5864,
|
| 7242 |
+
"step": 1019
|
| 7243 |
+
},
|
| 7244 |
+
{
|
| 7245 |
+
"epoch": 1020.0,
|
| 7246 |
+
"grad_norm": 1.5381174087524414,
|
| 7247 |
+
"learning_rate": 2.0339999999999998e-07,
|
| 7248 |
+
"loss": 1.5891,
|
| 7249 |
+
"step": 1020
|
| 7250 |
+
},
|
| 7251 |
+
{
|
| 7252 |
+
"epoch": 1021.0,
|
| 7253 |
+
"grad_norm": 1.4343268871307373,
|
| 7254 |
+
"learning_rate": 2.036e-07,
|
| 7255 |
+
"loss": 1.5872,
|
| 7256 |
+
"step": 1021
|
| 7257 |
+
},
|
| 7258 |
+
{
|
| 7259 |
+
"epoch": 1022.0,
|
| 7260 |
+
"grad_norm": 1.11500883102417,
|
| 7261 |
+
"learning_rate": 2.038e-07,
|
| 7262 |
+
"loss": 1.5895,
|
| 7263 |
+
"step": 1022
|
| 7264 |
+
},
|
| 7265 |
+
{
|
| 7266 |
+
"epoch": 1023.0,
|
| 7267 |
+
"grad_norm": 0.9539472460746765,
|
| 7268 |
+
"learning_rate": 2.04e-07,
|
| 7269 |
+
"loss": 1.5838,
|
| 7270 |
+
"step": 1023
|
| 7271 |
+
},
|
| 7272 |
+
{
|
| 7273 |
+
"epoch": 1024.0,
|
| 7274 |
+
"grad_norm": 8.19465446472168,
|
| 7275 |
+
"learning_rate": 2.0420000000000002e-07,
|
| 7276 |
+
"loss": 1.5815,
|
| 7277 |
+
"step": 1024
|
| 7278 |
+
},
|
| 7279 |
+
{
|
| 7280 |
+
"epoch": 1025.0,
|
| 7281 |
+
"grad_norm": 2.1362464427948,
|
| 7282 |
+
"learning_rate": 2.044e-07,
|
| 7283 |
+
"loss": 1.5767,
|
| 7284 |
+
"step": 1025
|
| 7285 |
+
},
|
| 7286 |
+
{
|
| 7287 |
+
"epoch": 1026.0,
|
| 7288 |
+
"grad_norm": 1.9000413417816162,
|
| 7289 |
+
"learning_rate": 2.046e-07,
|
| 7290 |
+
"loss": 1.5812,
|
| 7291 |
+
"step": 1026
|
| 7292 |
+
},
|
| 7293 |
+
{
|
| 7294 |
+
"epoch": 1027.0,
|
| 7295 |
+
"grad_norm": 1.1212537288665771,
|
| 7296 |
+
"learning_rate": 2.048e-07,
|
| 7297 |
+
"loss": 1.584,
|
| 7298 |
+
"step": 1027
|
| 7299 |
+
},
|
| 7300 |
+
{
|
| 7301 |
+
"epoch": 1028.0,
|
| 7302 |
+
"grad_norm": 1.5567606687545776,
|
| 7303 |
+
"learning_rate": 2.0500000000000002e-07,
|
| 7304 |
+
"loss": 1.5798,
|
| 7305 |
+
"step": 1028
|
| 7306 |
+
},
|
| 7307 |
+
{
|
| 7308 |
+
"epoch": 1029.0,
|
| 7309 |
+
"grad_norm": 1.4290876388549805,
|
| 7310 |
+
"learning_rate": 2.0519999999999998e-07,
|
| 7311 |
+
"loss": 1.5807,
|
| 7312 |
+
"step": 1029
|
| 7313 |
+
},
|
| 7314 |
+
{
|
| 7315 |
+
"epoch": 1030.0,
|
| 7316 |
+
"grad_norm": 1.4025850296020508,
|
| 7317 |
+
"learning_rate": 2.054e-07,
|
| 7318 |
+
"loss": 1.5844,
|
| 7319 |
+
"step": 1030
|
| 7320 |
+
},
|
| 7321 |
+
{
|
| 7322 |
+
"epoch": 1031.0,
|
| 7323 |
+
"grad_norm": 1.0360734462738037,
|
| 7324 |
+
"learning_rate": 2.0560000000000001e-07,
|
| 7325 |
+
"loss": 1.5745,
|
| 7326 |
+
"step": 1031
|
| 7327 |
+
},
|
| 7328 |
+
{
|
| 7329 |
+
"epoch": 1032.0,
|
| 7330 |
+
"grad_norm": 0.90553218126297,
|
| 7331 |
+
"learning_rate": 2.058e-07,
|
| 7332 |
+
"loss": 1.5893,
|
| 7333 |
+
"step": 1032
|
| 7334 |
+
},
|
| 7335 |
+
{
|
| 7336 |
+
"epoch": 1033.0,
|
| 7337 |
+
"grad_norm": 1.8398399353027344,
|
| 7338 |
+
"learning_rate": 2.0600000000000002e-07,
|
| 7339 |
+
"loss": 1.5742,
|
| 7340 |
+
"step": 1033
|
| 7341 |
+
},
|
| 7342 |
+
{
|
| 7343 |
+
"epoch": 1034.0,
|
| 7344 |
+
"grad_norm": 3.009005069732666,
|
| 7345 |
+
"learning_rate": 2.062e-07,
|
| 7346 |
+
"loss": 1.5723,
|
| 7347 |
+
"step": 1034
|
| 7348 |
+
},
|
| 7349 |
+
{
|
| 7350 |
+
"epoch": 1035.0,
|
| 7351 |
+
"grad_norm": 1.2227106094360352,
|
| 7352 |
+
"learning_rate": 2.064e-07,
|
| 7353 |
+
"loss": 1.5815,
|
| 7354 |
+
"step": 1035
|
| 7355 |
+
},
|
| 7356 |
+
{
|
| 7357 |
+
"epoch": 1036.0,
|
| 7358 |
+
"grad_norm": 0.9489397406578064,
|
| 7359 |
+
"learning_rate": 2.066e-07,
|
| 7360 |
+
"loss": 1.5773,
|
| 7361 |
+
"step": 1036
|
| 7362 |
+
},
|
| 7363 |
+
{
|
| 7364 |
+
"epoch": 1037.0,
|
| 7365 |
+
"grad_norm": 2.97885799407959,
|
| 7366 |
+
"learning_rate": 2.068e-07,
|
| 7367 |
+
"loss": 1.5804,
|
| 7368 |
+
"step": 1037
|
| 7369 |
+
},
|
| 7370 |
+
{
|
| 7371 |
+
"epoch": 1038.0,
|
| 7372 |
+
"grad_norm": 2.4349982738494873,
|
| 7373 |
+
"learning_rate": 2.0700000000000001e-07,
|
| 7374 |
+
"loss": 1.5712,
|
| 7375 |
+
"step": 1038
|
| 7376 |
+
},
|
| 7377 |
+
{
|
| 7378 |
+
"epoch": 1039.0,
|
| 7379 |
+
"grad_norm": 1.095255970954895,
|
| 7380 |
+
"learning_rate": 2.072e-07,
|
| 7381 |
+
"loss": 1.5758,
|
| 7382 |
+
"step": 1039
|
| 7383 |
+
},
|
| 7384 |
+
{
|
| 7385 |
+
"epoch": 1040.0,
|
| 7386 |
+
"grad_norm": 1.583194375038147,
|
| 7387 |
+
"learning_rate": 2.074e-07,
|
| 7388 |
+
"loss": 1.5709,
|
| 7389 |
+
"step": 1040
|
| 7390 |
+
},
|
| 7391 |
+
{
|
| 7392 |
+
"epoch": 1041.0,
|
| 7393 |
+
"grad_norm": 1.6994518041610718,
|
| 7394 |
+
"learning_rate": 2.076e-07,
|
| 7395 |
+
"loss": 1.5672,
|
| 7396 |
+
"step": 1041
|
| 7397 |
+
},
|
| 7398 |
+
{
|
| 7399 |
+
"epoch": 1042.0,
|
| 7400 |
+
"grad_norm": 1.1236746311187744,
|
| 7401 |
+
"learning_rate": 2.0780000000000002e-07,
|
| 7402 |
+
"loss": 1.5731,
|
| 7403 |
+
"step": 1042
|
| 7404 |
+
},
|
| 7405 |
+
{
|
| 7406 |
+
"epoch": 1043.0,
|
| 7407 |
+
"grad_norm": 6.768857002258301,
|
| 7408 |
+
"learning_rate": 2.0799999999999998e-07,
|
| 7409 |
+
"loss": 1.5644,
|
| 7410 |
+
"step": 1043
|
| 7411 |
+
},
|
| 7412 |
+
{
|
| 7413 |
+
"epoch": 1044.0,
|
| 7414 |
+
"grad_norm": 1.107519507408142,
|
| 7415 |
+
"learning_rate": 2.082e-07,
|
| 7416 |
+
"loss": 1.5749,
|
| 7417 |
+
"step": 1044
|
| 7418 |
+
},
|
| 7419 |
+
{
|
| 7420 |
+
"epoch": 1045.0,
|
| 7421 |
+
"grad_norm": 2.1311705112457275,
|
| 7422 |
+
"learning_rate": 2.084e-07,
|
| 7423 |
+
"loss": 1.5628,
|
| 7424 |
+
"step": 1045
|
| 7425 |
+
},
|
| 7426 |
+
{
|
| 7427 |
+
"epoch": 1046.0,
|
| 7428 |
+
"grad_norm": 1.3456166982650757,
|
| 7429 |
+
"learning_rate": 2.086e-07,
|
| 7430 |
+
"loss": 1.5715,
|
| 7431 |
+
"step": 1046
|
| 7432 |
+
},
|
| 7433 |
+
{
|
| 7434 |
+
"epoch": 1047.0,
|
| 7435 |
+
"grad_norm": 1.005059838294983,
|
| 7436 |
+
"learning_rate": 2.0880000000000002e-07,
|
| 7437 |
+
"loss": 1.5679,
|
| 7438 |
+
"step": 1047
|
| 7439 |
+
},
|
| 7440 |
+
{
|
| 7441 |
+
"epoch": 1048.0,
|
| 7442 |
+
"grad_norm": 1.5581361055374146,
|
| 7443 |
+
"learning_rate": 2.09e-07,
|
| 7444 |
+
"loss": 1.5668,
|
| 7445 |
+
"step": 1048
|
| 7446 |
+
},
|
| 7447 |
+
{
|
| 7448 |
+
"epoch": 1049.0,
|
| 7449 |
+
"grad_norm": 0.9271217584609985,
|
| 7450 |
+
"learning_rate": 2.092e-07,
|
| 7451 |
+
"loss": 1.5665,
|
| 7452 |
+
"step": 1049
|
| 7453 |
+
},
|
| 7454 |
+
{
|
| 7455 |
+
"epoch": 1050.0,
|
| 7456 |
+
"grad_norm": 8.531341552734375,
|
| 7457 |
+
"learning_rate": 2.094e-07,
|
| 7458 |
+
"loss": 1.5749,
|
| 7459 |
+
"step": 1050
|
| 7460 |
+
},
|
| 7461 |
+
{
|
| 7462 |
+
"epoch": 1051.0,
|
| 7463 |
+
"grad_norm": 1.0843509435653687,
|
| 7464 |
+
"learning_rate": 2.0960000000000002e-07,
|
| 7465 |
+
"loss": 1.5618,
|
| 7466 |
+
"step": 1051
|
| 7467 |
+
},
|
| 7468 |
+
{
|
| 7469 |
+
"epoch": 1052.0,
|
| 7470 |
+
"grad_norm": 3.8642165660858154,
|
| 7471 |
+
"learning_rate": 2.098e-07,
|
| 7472 |
+
"loss": 1.5685,
|
| 7473 |
+
"step": 1052
|
| 7474 |
+
},
|
| 7475 |
+
{
|
| 7476 |
+
"epoch": 1053.0,
|
| 7477 |
+
"grad_norm": 1.2413549423217773,
|
| 7478 |
+
"learning_rate": 2.1e-07,
|
| 7479 |
+
"loss": 1.5627,
|
| 7480 |
+
"step": 1053
|
| 7481 |
+
},
|
| 7482 |
+
{
|
| 7483 |
+
"epoch": 1054.0,
|
| 7484 |
+
"grad_norm": 1.2524449825286865,
|
| 7485 |
+
"learning_rate": 2.102e-07,
|
| 7486 |
+
"loss": 1.5636,
|
| 7487 |
+
"step": 1054
|
| 7488 |
+
},
|
| 7489 |
+
{
|
| 7490 |
+
"epoch": 1055.0,
|
| 7491 |
+
"grad_norm": 1.2952991724014282,
|
| 7492 |
+
"learning_rate": 2.104e-07,
|
| 7493 |
+
"loss": 1.5645,
|
| 7494 |
+
"step": 1055
|
| 7495 |
+
},
|
| 7496 |
+
{
|
| 7497 |
+
"epoch": 1056.0,
|
| 7498 |
+
"grad_norm": 1.4905788898468018,
|
| 7499 |
+
"learning_rate": 2.1060000000000002e-07,
|
| 7500 |
+
"loss": 1.5655,
|
| 7501 |
+
"step": 1056
|
| 7502 |
+
},
|
| 7503 |
+
{
|
| 7504 |
+
"epoch": 1057.0,
|
| 7505 |
+
"grad_norm": 1.1086138486862183,
|
| 7506 |
+
"learning_rate": 2.1079999999999998e-07,
|
| 7507 |
+
"loss": 1.5673,
|
| 7508 |
+
"step": 1057
|
| 7509 |
+
},
|
| 7510 |
+
{
|
| 7511 |
+
"epoch": 1058.0,
|
| 7512 |
+
"grad_norm": 6.657322883605957,
|
| 7513 |
+
"learning_rate": 2.11e-07,
|
| 7514 |
+
"loss": 1.5617,
|
| 7515 |
+
"step": 1058
|
| 7516 |
+
},
|
| 7517 |
+
{
|
| 7518 |
+
"epoch": 1059.0,
|
| 7519 |
+
"grad_norm": 34.03983688354492,
|
| 7520 |
+
"learning_rate": 2.112e-07,
|
| 7521 |
+
"loss": 1.564,
|
| 7522 |
+
"step": 1059
|
| 7523 |
+
},
|
| 7524 |
+
{
|
| 7525 |
+
"epoch": 1060.0,
|
| 7526 |
+
"grad_norm": 1.2597378492355347,
|
| 7527 |
+
"learning_rate": 2.114e-07,
|
| 7528 |
+
"loss": 1.5608,
|
| 7529 |
+
"step": 1060
|
| 7530 |
+
},
|
| 7531 |
+
{
|
| 7532 |
+
"epoch": 1061.0,
|
| 7533 |
+
"grad_norm": 1.1377840042114258,
|
| 7534 |
+
"learning_rate": 2.1160000000000002e-07,
|
| 7535 |
+
"loss": 1.5626,
|
| 7536 |
+
"step": 1061
|
| 7537 |
+
},
|
| 7538 |
+
{
|
| 7539 |
+
"epoch": 1062.0,
|
| 7540 |
+
"grad_norm": 1.9253413677215576,
|
| 7541 |
+
"learning_rate": 2.118e-07,
|
| 7542 |
+
"loss": 1.5591,
|
| 7543 |
+
"step": 1062
|
| 7544 |
+
},
|
| 7545 |
+
{
|
| 7546 |
+
"epoch": 1063.0,
|
| 7547 |
+
"grad_norm": 2.157513380050659,
|
| 7548 |
+
"learning_rate": 2.12e-07,
|
| 7549 |
+
"loss": 1.5539,
|
| 7550 |
+
"step": 1063
|
| 7551 |
+
},
|
| 7552 |
+
{
|
| 7553 |
+
"epoch": 1064.0,
|
| 7554 |
+
"grad_norm": 1.3998243808746338,
|
| 7555 |
+
"learning_rate": 2.122e-07,
|
| 7556 |
+
"loss": 1.564,
|
| 7557 |
+
"step": 1064
|
| 7558 |
+
},
|
| 7559 |
+
{
|
| 7560 |
+
"epoch": 1065.0,
|
| 7561 |
+
"grad_norm": 1.0151389837265015,
|
| 7562 |
+
"learning_rate": 2.1240000000000002e-07,
|
| 7563 |
+
"loss": 1.561,
|
| 7564 |
+
"step": 1065
|
| 7565 |
+
},
|
| 7566 |
+
{
|
| 7567 |
+
"epoch": 1066.0,
|
| 7568 |
+
"grad_norm": 1.3415422439575195,
|
| 7569 |
+
"learning_rate": 2.126e-07,
|
| 7570 |
+
"loss": 1.5611,
|
| 7571 |
+
"step": 1066
|
| 7572 |
+
},
|
| 7573 |
+
{
|
| 7574 |
+
"epoch": 1067.0,
|
| 7575 |
+
"grad_norm": 2.3141579627990723,
|
| 7576 |
+
"learning_rate": 2.128e-07,
|
| 7577 |
+
"loss": 1.5573,
|
| 7578 |
+
"step": 1067
|
| 7579 |
+
},
|
| 7580 |
+
{
|
| 7581 |
+
"epoch": 1068.0,
|
| 7582 |
+
"grad_norm": 0.9580351114273071,
|
| 7583 |
+
"learning_rate": 2.1300000000000001e-07,
|
| 7584 |
+
"loss": 1.5578,
|
| 7585 |
+
"step": 1068
|
| 7586 |
+
},
|
| 7587 |
+
{
|
| 7588 |
+
"epoch": 1069.0,
|
| 7589 |
+
"grad_norm": 1.0505666732788086,
|
| 7590 |
+
"learning_rate": 2.132e-07,
|
| 7591 |
+
"loss": 1.5562,
|
| 7592 |
+
"step": 1069
|
| 7593 |
+
},
|
| 7594 |
+
{
|
| 7595 |
+
"epoch": 1070.0,
|
| 7596 |
+
"grad_norm": 1.6809712648391724,
|
| 7597 |
+
"learning_rate": 2.1340000000000002e-07,
|
| 7598 |
+
"loss": 1.557,
|
| 7599 |
+
"step": 1070
|
| 7600 |
+
},
|
| 7601 |
+
{
|
| 7602 |
+
"epoch": 1071.0,
|
| 7603 |
+
"grad_norm": 3.6099202632904053,
|
| 7604 |
+
"learning_rate": 2.1359999999999998e-07,
|
| 7605 |
+
"loss": 1.5535,
|
| 7606 |
+
"step": 1071
|
| 7607 |
+
},
|
| 7608 |
+
{
|
| 7609 |
+
"epoch": 1072.0,
|
| 7610 |
+
"grad_norm": 5.737199783325195,
|
| 7611 |
+
"learning_rate": 2.138e-07,
|
| 7612 |
+
"loss": 1.5559,
|
| 7613 |
+
"step": 1072
|
| 7614 |
+
},
|
| 7615 |
+
{
|
| 7616 |
+
"epoch": 1073.0,
|
| 7617 |
+
"grad_norm": 1.131452202796936,
|
| 7618 |
+
"learning_rate": 2.14e-07,
|
| 7619 |
+
"loss": 1.5557,
|
| 7620 |
+
"step": 1073
|
| 7621 |
+
},
|
| 7622 |
+
{
|
| 7623 |
+
"epoch": 1074.0,
|
| 7624 |
+
"grad_norm": 1.0757285356521606,
|
| 7625 |
+
"learning_rate": 2.142e-07,
|
| 7626 |
+
"loss": 1.551,
|
| 7627 |
+
"step": 1074
|
| 7628 |
+
},
|
| 7629 |
+
{
|
| 7630 |
+
"epoch": 1075.0,
|
| 7631 |
+
"grad_norm": 1.316859245300293,
|
| 7632 |
+
"learning_rate": 2.1440000000000001e-07,
|
| 7633 |
+
"loss": 1.5541,
|
| 7634 |
+
"step": 1075
|
| 7635 |
+
},
|
| 7636 |
+
{
|
| 7637 |
+
"epoch": 1076.0,
|
| 7638 |
+
"grad_norm": 6.291731357574463,
|
| 7639 |
+
"learning_rate": 2.146e-07,
|
| 7640 |
+
"loss": 1.552,
|
| 7641 |
+
"step": 1076
|
| 7642 |
+
},
|
| 7643 |
+
{
|
| 7644 |
+
"epoch": 1077.0,
|
| 7645 |
+
"grad_norm": 0.980509340763092,
|
| 7646 |
+
"learning_rate": 2.148e-07,
|
| 7647 |
+
"loss": 1.5531,
|
| 7648 |
+
"step": 1077
|
| 7649 |
+
},
|
| 7650 |
+
{
|
| 7651 |
+
"epoch": 1078.0,
|
| 7652 |
+
"grad_norm": 1.1346079111099243,
|
| 7653 |
+
"learning_rate": 2.15e-07,
|
| 7654 |
+
"loss": 1.5503,
|
| 7655 |
+
"step": 1078
|
| 7656 |
+
},
|
| 7657 |
+
{
|
| 7658 |
+
"epoch": 1079.0,
|
| 7659 |
+
"grad_norm": 1.938717246055603,
|
| 7660 |
+
"learning_rate": 2.1520000000000002e-07,
|
| 7661 |
+
"loss": 1.5571,
|
| 7662 |
+
"step": 1079
|
| 7663 |
+
},
|
| 7664 |
+
{
|
| 7665 |
+
"epoch": 1080.0,
|
| 7666 |
+
"grad_norm": 1.836732268333435,
|
| 7667 |
+
"learning_rate": 2.154e-07,
|
| 7668 |
+
"loss": 1.5536,
|
| 7669 |
+
"step": 1080
|
| 7670 |
+
},
|
| 7671 |
+
{
|
| 7672 |
+
"epoch": 1081.0,
|
| 7673 |
+
"grad_norm": 1.5794588327407837,
|
| 7674 |
+
"learning_rate": 2.156e-07,
|
| 7675 |
+
"loss": 1.5484,
|
| 7676 |
+
"step": 1081
|
| 7677 |
+
},
|
| 7678 |
+
{
|
| 7679 |
+
"epoch": 1082.0,
|
| 7680 |
+
"grad_norm": 1.154757022857666,
|
| 7681 |
+
"learning_rate": 2.1580000000000001e-07,
|
| 7682 |
+
"loss": 1.5477,
|
| 7683 |
+
"step": 1082
|
| 7684 |
+
},
|
| 7685 |
+
{
|
| 7686 |
+
"epoch": 1083.0,
|
| 7687 |
+
"grad_norm": 1.6351189613342285,
|
| 7688 |
+
"learning_rate": 2.16e-07,
|
| 7689 |
+
"loss": 1.5512,
|
| 7690 |
+
"step": 1083
|
| 7691 |
+
},
|
| 7692 |
+
{
|
| 7693 |
+
"epoch": 1084.0,
|
| 7694 |
+
"grad_norm": 2.203552007675171,
|
| 7695 |
+
"learning_rate": 2.1620000000000002e-07,
|
| 7696 |
+
"loss": 1.5461,
|
| 7697 |
+
"step": 1084
|
| 7698 |
+
},
|
| 7699 |
+
{
|
| 7700 |
+
"epoch": 1085.0,
|
| 7701 |
+
"grad_norm": 1.813345193862915,
|
| 7702 |
+
"learning_rate": 2.164e-07,
|
| 7703 |
+
"loss": 1.5516,
|
| 7704 |
+
"step": 1085
|
| 7705 |
+
},
|
| 7706 |
+
{
|
| 7707 |
+
"epoch": 1086.0,
|
| 7708 |
+
"grad_norm": 0.8774526715278625,
|
| 7709 |
+
"learning_rate": 2.166e-07,
|
| 7710 |
+
"loss": 1.5511,
|
| 7711 |
+
"step": 1086
|
| 7712 |
+
},
|
| 7713 |
+
{
|
| 7714 |
+
"epoch": 1087.0,
|
| 7715 |
+
"grad_norm": 3.1274983882904053,
|
| 7716 |
+
"learning_rate": 2.168e-07,
|
| 7717 |
+
"loss": 1.5443,
|
| 7718 |
+
"step": 1087
|
| 7719 |
+
},
|
| 7720 |
+
{
|
| 7721 |
+
"epoch": 1088.0,
|
| 7722 |
+
"grad_norm": 3.257859945297241,
|
| 7723 |
+
"learning_rate": 2.17e-07,
|
| 7724 |
+
"loss": 1.5454,
|
| 7725 |
+
"step": 1088
|
| 7726 |
+
},
|
| 7727 |
+
{
|
| 7728 |
+
"epoch": 1089.0,
|
| 7729 |
+
"grad_norm": 5.167294979095459,
|
| 7730 |
+
"learning_rate": 2.172e-07,
|
| 7731 |
+
"loss": 1.5372,
|
| 7732 |
+
"step": 1089
|
| 7733 |
+
},
|
| 7734 |
+
{
|
| 7735 |
+
"epoch": 1090.0,
|
| 7736 |
+
"grad_norm": 1.0005639791488647,
|
| 7737 |
+
"learning_rate": 2.174e-07,
|
| 7738 |
+
"loss": 1.5416,
|
| 7739 |
+
"step": 1090
|
| 7740 |
+
},
|
| 7741 |
+
{
|
| 7742 |
+
"epoch": 1091.0,
|
| 7743 |
+
"grad_norm": 1.842504620552063,
|
| 7744 |
+
"learning_rate": 2.176e-07,
|
| 7745 |
+
"loss": 1.5441,
|
| 7746 |
+
"step": 1091
|
| 7747 |
+
},
|
| 7748 |
+
{
|
| 7749 |
+
"epoch": 1092.0,
|
| 7750 |
+
"grad_norm": 0.9027210474014282,
|
| 7751 |
+
"learning_rate": 2.178e-07,
|
| 7752 |
+
"loss": 1.5359,
|
| 7753 |
+
"step": 1092
|
| 7754 |
+
},
|
| 7755 |
+
{
|
| 7756 |
+
"epoch": 1093.0,
|
| 7757 |
+
"grad_norm": 1.4683235883712769,
|
| 7758 |
+
"learning_rate": 2.1800000000000002e-07,
|
| 7759 |
+
"loss": 1.5467,
|
| 7760 |
+
"step": 1093
|
| 7761 |
+
},
|
| 7762 |
+
{
|
| 7763 |
+
"epoch": 1094.0,
|
| 7764 |
+
"grad_norm": 0.9885283708572388,
|
| 7765 |
+
"learning_rate": 2.182e-07,
|
| 7766 |
+
"loss": 1.5436,
|
| 7767 |
+
"step": 1094
|
| 7768 |
+
},
|
| 7769 |
+
{
|
| 7770 |
+
"epoch": 1095.0,
|
| 7771 |
+
"grad_norm": 1.3684940338134766,
|
| 7772 |
+
"learning_rate": 2.184e-07,
|
| 7773 |
+
"loss": 1.5419,
|
| 7774 |
+
"step": 1095
|
| 7775 |
+
},
|
| 7776 |
+
{
|
| 7777 |
+
"epoch": 1096.0,
|
| 7778 |
+
"grad_norm": 0.9934016466140747,
|
| 7779 |
+
"learning_rate": 2.186e-07,
|
| 7780 |
+
"loss": 1.54,
|
| 7781 |
+
"step": 1096
|
| 7782 |
+
},
|
| 7783 |
+
{
|
| 7784 |
+
"epoch": 1097.0,
|
| 7785 |
+
"grad_norm": 1.8801573514938354,
|
| 7786 |
+
"learning_rate": 2.188e-07,
|
| 7787 |
+
"loss": 1.5404,
|
| 7788 |
+
"step": 1097
|
| 7789 |
+
},
|
| 7790 |
+
{
|
| 7791 |
+
"epoch": 1098.0,
|
| 7792 |
+
"grad_norm": 1.0297327041625977,
|
| 7793 |
+
"learning_rate": 2.1900000000000002e-07,
|
| 7794 |
+
"loss": 1.5412,
|
| 7795 |
+
"step": 1098
|
| 7796 |
+
},
|
| 7797 |
+
{
|
| 7798 |
+
"epoch": 1099.0,
|
| 7799 |
+
"grad_norm": 1.1018619537353516,
|
| 7800 |
+
"learning_rate": 2.192e-07,
|
| 7801 |
+
"loss": 1.5419,
|
| 7802 |
+
"step": 1099
|
| 7803 |
+
},
|
| 7804 |
+
{
|
| 7805 |
+
"epoch": 1100.0,
|
| 7806 |
+
"grad_norm": 4.425602912902832,
|
| 7807 |
+
"learning_rate": 2.194e-07,
|
| 7808 |
+
"loss": 1.5454,
|
| 7809 |
+
"step": 1100
|
| 7810 |
+
},
|
| 7811 |
+
{
|
| 7812 |
+
"epoch": 1100.0,
|
| 7813 |
+
"eval_cer": 0.9915312316428432,
|
| 7814 |
+
"eval_loss": 3.433760166168213,
|
| 7815 |
+
"eval_runtime": 14.4715,
|
| 7816 |
+
"eval_samples_per_second": 67.927,
|
| 7817 |
+
"eval_steps_per_second": 0.484,
|
| 7818 |
+
"eval_wer": 0.9996560421921578,
|
| 7819 |
+
"step": 1100
|
| 7820 |
}
|
| 7821 |
],
|
| 7822 |
"logging_steps": 1.0,
|
|
|
|
| 7845 |
"attributes": {}
|
| 7846 |
}
|
| 7847 |
},
|
| 7848 |
+
"total_flos": 4.440971061660672e+18,
|
| 7849 |
"train_batch_size": 160,
|
| 7850 |
"trial_name": null,
|
| 7851 |
"trial_params": null
|