ayymen commited on
Commit
8de49fc
·
verified ·
1 Parent(s): 0e1187a

Training in progress, step 1100, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd152a0533685ffc6fb75ba6c9939a96cf0a2958b3f9cbcf334ef70655226e50
3
  size 2423056460
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17da929491964a5f7f99889aaa103f6cf49cd29bb8fc13a170212805274c07d5
3
  size 2423056460
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:00aa8fde532272e6408ab17c4cdc0116f5fb70b67559f70dd48fc8c969cbb9eb
3
  size 4846590727
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9ad4522ff26c74ce5cf69802ba9845044b45448be6840c74e8202de52eff047
3
  size 4846590727
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26a10261ac28360a9d18be3421670e3c8b1cde81b3240e9470fb4a4b6653676a
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efe32a646280b36133122c6a4087e90d85fcd1cec8818af4943c2006708455b0
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f96da6c00c37f96a6a782ce432fb44dc4c58f094e640be4e0926a89b28aca277
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a30c8e8fd04d409bda330535b6ff99d9919b3adf898382237213c6c96c77dd2
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 3.5061514377593994,
3
- "best_model_checkpoint": "./w2v-bert-2.0-yoruba_naijavoices_1m/checkpoint-900",
4
- "epoch": 900.0,
5
  "eval_steps": 100,
6
- "global_step": 900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6397,6 +6397,1426 @@
6397
  "eval_steps_per_second": 0.469,
6398
  "eval_wer": 1.0,
6399
  "step": 900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6400
  }
6401
  ],
6402
  "logging_steps": 1.0,
@@ -6425,7 +7845,7 @@
6425
  "attributes": {}
6426
  }
6427
  },
6428
- "total_flos": 3.633521777722368e+18,
6429
  "train_batch_size": 160,
6430
  "trial_name": null,
6431
  "trial_params": null
 
1
  {
2
+ "best_metric": 3.433760166168213,
3
+ "best_model_checkpoint": "./w2v-bert-2.0-yoruba_naijavoices_1m/checkpoint-1100",
4
+ "epoch": 1100.0,
5
  "eval_steps": 100,
6
+ "global_step": 1100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6397
  "eval_steps_per_second": 0.469,
6398
  "eval_wer": 1.0,
6399
  "step": 900
6400
+ },
6401
+ {
6402
+ "epoch": 901.0,
6403
+ "grad_norm": 7.289846897125244,
6404
+ "learning_rate": 1.7960000000000002e-07,
6405
+ "loss": 1.6777,
6406
+ "step": 901
6407
+ },
6408
+ {
6409
+ "epoch": 902.0,
6410
+ "grad_norm": 1.5632303953170776,
6411
+ "learning_rate": 1.798e-07,
6412
+ "loss": 1.6843,
6413
+ "step": 902
6414
+ },
6415
+ {
6416
+ "epoch": 903.0,
6417
+ "grad_norm": 1.2164356708526611,
6418
+ "learning_rate": 1.8e-07,
6419
+ "loss": 1.6713,
6420
+ "step": 903
6421
+ },
6422
+ {
6423
+ "epoch": 904.0,
6424
+ "grad_norm": 1.7437841892242432,
6425
+ "learning_rate": 1.802e-07,
6426
+ "loss": 1.6749,
6427
+ "step": 904
6428
+ },
6429
+ {
6430
+ "epoch": 905.0,
6431
+ "grad_norm": 1.2455863952636719,
6432
+ "learning_rate": 1.804e-07,
6433
+ "loss": 1.6721,
6434
+ "step": 905
6435
+ },
6436
+ {
6437
+ "epoch": 906.0,
6438
+ "grad_norm": 1.3489329814910889,
6439
+ "learning_rate": 1.8060000000000002e-07,
6440
+ "loss": 1.6771,
6441
+ "step": 906
6442
+ },
6443
+ {
6444
+ "epoch": 907.0,
6445
+ "grad_norm": 1.2651866674423218,
6446
+ "learning_rate": 1.808e-07,
6447
+ "loss": 1.6682,
6448
+ "step": 907
6449
+ },
6450
+ {
6451
+ "epoch": 908.0,
6452
+ "grad_norm": 1.280983328819275,
6453
+ "learning_rate": 1.81e-07,
6454
+ "loss": 1.6765,
6455
+ "step": 908
6456
+ },
6457
+ {
6458
+ "epoch": 909.0,
6459
+ "grad_norm": 1.3800833225250244,
6460
+ "learning_rate": 1.812e-07,
6461
+ "loss": 1.6697,
6462
+ "step": 909
6463
+ },
6464
+ {
6465
+ "epoch": 910.0,
6466
+ "grad_norm": 1.18763267993927,
6467
+ "learning_rate": 1.814e-07,
6468
+ "loss": 1.6612,
6469
+ "step": 910
6470
+ },
6471
+ {
6472
+ "epoch": 911.0,
6473
+ "grad_norm": 1.9815888404846191,
6474
+ "learning_rate": 1.816e-07,
6475
+ "loss": 1.6715,
6476
+ "step": 911
6477
+ },
6478
+ {
6479
+ "epoch": 912.0,
6480
+ "grad_norm": 2.0720643997192383,
6481
+ "learning_rate": 1.818e-07,
6482
+ "loss": 1.6681,
6483
+ "step": 912
6484
+ },
6485
+ {
6486
+ "epoch": 913.0,
6487
+ "grad_norm": 1.1363974809646606,
6488
+ "learning_rate": 1.82e-07,
6489
+ "loss": 1.6675,
6490
+ "step": 913
6491
+ },
6492
+ {
6493
+ "epoch": 914.0,
6494
+ "grad_norm": 1.1961008310317993,
6495
+ "learning_rate": 1.822e-07,
6496
+ "loss": 1.6712,
6497
+ "step": 914
6498
+ },
6499
+ {
6500
+ "epoch": 915.0,
6501
+ "grad_norm": 3.5271594524383545,
6502
+ "learning_rate": 1.8240000000000002e-07,
6503
+ "loss": 1.6629,
6504
+ "step": 915
6505
+ },
6506
+ {
6507
+ "epoch": 916.0,
6508
+ "grad_norm": 1.2886525392532349,
6509
+ "learning_rate": 1.826e-07,
6510
+ "loss": 1.6638,
6511
+ "step": 916
6512
+ },
6513
+ {
6514
+ "epoch": 917.0,
6515
+ "grad_norm": 3.6112558841705322,
6516
+ "learning_rate": 1.828e-07,
6517
+ "loss": 1.667,
6518
+ "step": 917
6519
+ },
6520
+ {
6521
+ "epoch": 918.0,
6522
+ "grad_norm": 1.1059443950653076,
6523
+ "learning_rate": 1.83e-07,
6524
+ "loss": 1.6659,
6525
+ "step": 918
6526
+ },
6527
+ {
6528
+ "epoch": 919.0,
6529
+ "grad_norm": 1.2059946060180664,
6530
+ "learning_rate": 1.832e-07,
6531
+ "loss": 1.6583,
6532
+ "step": 919
6533
+ },
6534
+ {
6535
+ "epoch": 920.0,
6536
+ "grad_norm": 1.0824497938156128,
6537
+ "learning_rate": 1.8340000000000001e-07,
6538
+ "loss": 1.6599,
6539
+ "step": 920
6540
+ },
6541
+ {
6542
+ "epoch": 921.0,
6543
+ "grad_norm": 1.2805767059326172,
6544
+ "learning_rate": 1.836e-07,
6545
+ "loss": 1.6527,
6546
+ "step": 921
6547
+ },
6548
+ {
6549
+ "epoch": 922.0,
6550
+ "grad_norm": 16.977970123291016,
6551
+ "learning_rate": 1.838e-07,
6552
+ "loss": 1.6621,
6553
+ "step": 922
6554
+ },
6555
+ {
6556
+ "epoch": 923.0,
6557
+ "grad_norm": 1.8109819889068604,
6558
+ "learning_rate": 1.84e-07,
6559
+ "loss": 1.6573,
6560
+ "step": 923
6561
+ },
6562
+ {
6563
+ "epoch": 924.0,
6564
+ "grad_norm": 4.374696254730225,
6565
+ "learning_rate": 1.842e-07,
6566
+ "loss": 1.6646,
6567
+ "step": 924
6568
+ },
6569
+ {
6570
+ "epoch": 925.0,
6571
+ "grad_norm": 1.1373530626296997,
6572
+ "learning_rate": 1.844e-07,
6573
+ "loss": 1.6555,
6574
+ "step": 925
6575
+ },
6576
+ {
6577
+ "epoch": 926.0,
6578
+ "grad_norm": 1.135043978691101,
6579
+ "learning_rate": 1.846e-07,
6580
+ "loss": 1.6594,
6581
+ "step": 926
6582
+ },
6583
+ {
6584
+ "epoch": 927.0,
6585
+ "grad_norm": 1.1703171730041504,
6586
+ "learning_rate": 1.848e-07,
6587
+ "loss": 1.6546,
6588
+ "step": 927
6589
+ },
6590
+ {
6591
+ "epoch": 928.0,
6592
+ "grad_norm": 1.405674695968628,
6593
+ "learning_rate": 1.85e-07,
6594
+ "loss": 1.6552,
6595
+ "step": 928
6596
+ },
6597
+ {
6598
+ "epoch": 929.0,
6599
+ "grad_norm": 1.2653117179870605,
6600
+ "learning_rate": 1.8520000000000002e-07,
6601
+ "loss": 1.6531,
6602
+ "step": 929
6603
+ },
6604
+ {
6605
+ "epoch": 930.0,
6606
+ "grad_norm": 1.0446969270706177,
6607
+ "learning_rate": 1.854e-07,
6608
+ "loss": 1.6535,
6609
+ "step": 930
6610
+ },
6611
+ {
6612
+ "epoch": 931.0,
6613
+ "grad_norm": 1.2374356985092163,
6614
+ "learning_rate": 1.856e-07,
6615
+ "loss": 1.6471,
6616
+ "step": 931
6617
+ },
6618
+ {
6619
+ "epoch": 932.0,
6620
+ "grad_norm": 1.827778935432434,
6621
+ "learning_rate": 1.858e-07,
6622
+ "loss": 1.6569,
6623
+ "step": 932
6624
+ },
6625
+ {
6626
+ "epoch": 933.0,
6627
+ "grad_norm": 1.4429855346679688,
6628
+ "learning_rate": 1.86e-07,
6629
+ "loss": 1.6523,
6630
+ "step": 933
6631
+ },
6632
+ {
6633
+ "epoch": 934.0,
6634
+ "grad_norm": 1.0403034687042236,
6635
+ "learning_rate": 1.862e-07,
6636
+ "loss": 1.6476,
6637
+ "step": 934
6638
+ },
6639
+ {
6640
+ "epoch": 935.0,
6641
+ "grad_norm": 1.3327019214630127,
6642
+ "learning_rate": 1.8640000000000003e-07,
6643
+ "loss": 1.6511,
6644
+ "step": 935
6645
+ },
6646
+ {
6647
+ "epoch": 936.0,
6648
+ "grad_norm": 3.2895419597625732,
6649
+ "learning_rate": 1.866e-07,
6650
+ "loss": 1.6423,
6651
+ "step": 936
6652
+ },
6653
+ {
6654
+ "epoch": 937.0,
6655
+ "grad_norm": 1.3313167095184326,
6656
+ "learning_rate": 1.868e-07,
6657
+ "loss": 1.6475,
6658
+ "step": 937
6659
+ },
6660
+ {
6661
+ "epoch": 938.0,
6662
+ "grad_norm": 1.0129790306091309,
6663
+ "learning_rate": 1.87e-07,
6664
+ "loss": 1.6489,
6665
+ "step": 938
6666
+ },
6667
+ {
6668
+ "epoch": 939.0,
6669
+ "grad_norm": 4.673969268798828,
6670
+ "learning_rate": 1.872e-07,
6671
+ "loss": 1.6496,
6672
+ "step": 939
6673
+ },
6674
+ {
6675
+ "epoch": 940.0,
6676
+ "grad_norm": 4.099658012390137,
6677
+ "learning_rate": 1.8740000000000002e-07,
6678
+ "loss": 1.6458,
6679
+ "step": 940
6680
+ },
6681
+ {
6682
+ "epoch": 941.0,
6683
+ "grad_norm": 1.4593976736068726,
6684
+ "learning_rate": 1.8759999999999999e-07,
6685
+ "loss": 1.6477,
6686
+ "step": 941
6687
+ },
6688
+ {
6689
+ "epoch": 942.0,
6690
+ "grad_norm": 1.2744107246398926,
6691
+ "learning_rate": 1.878e-07,
6692
+ "loss": 1.6503,
6693
+ "step": 942
6694
+ },
6695
+ {
6696
+ "epoch": 943.0,
6697
+ "grad_norm": 1.1063960790634155,
6698
+ "learning_rate": 1.8800000000000002e-07,
6699
+ "loss": 1.6401,
6700
+ "step": 943
6701
+ },
6702
+ {
6703
+ "epoch": 944.0,
6704
+ "grad_norm": 2.0892364978790283,
6705
+ "learning_rate": 1.882e-07,
6706
+ "loss": 1.6417,
6707
+ "step": 944
6708
+ },
6709
+ {
6710
+ "epoch": 945.0,
6711
+ "grad_norm": 1.0549347400665283,
6712
+ "learning_rate": 1.884e-07,
6713
+ "loss": 1.6428,
6714
+ "step": 945
6715
+ },
6716
+ {
6717
+ "epoch": 946.0,
6718
+ "grad_norm": 1.0397493839263916,
6719
+ "learning_rate": 1.886e-07,
6720
+ "loss": 1.6375,
6721
+ "step": 946
6722
+ },
6723
+ {
6724
+ "epoch": 947.0,
6725
+ "grad_norm": 1.138031244277954,
6726
+ "learning_rate": 1.888e-07,
6727
+ "loss": 1.641,
6728
+ "step": 947
6729
+ },
6730
+ {
6731
+ "epoch": 948.0,
6732
+ "grad_norm": 1.2404905557632446,
6733
+ "learning_rate": 1.89e-07,
6734
+ "loss": 1.6408,
6735
+ "step": 948
6736
+ },
6737
+ {
6738
+ "epoch": 949.0,
6739
+ "grad_norm": 1.1873743534088135,
6740
+ "learning_rate": 1.8920000000000003e-07,
6741
+ "loss": 1.6333,
6742
+ "step": 949
6743
+ },
6744
+ {
6745
+ "epoch": 950.0,
6746
+ "grad_norm": 1.2409088611602783,
6747
+ "learning_rate": 1.894e-07,
6748
+ "loss": 1.6389,
6749
+ "step": 950
6750
+ },
6751
+ {
6752
+ "epoch": 951.0,
6753
+ "grad_norm": 4.043206691741943,
6754
+ "learning_rate": 1.896e-07,
6755
+ "loss": 1.6314,
6756
+ "step": 951
6757
+ },
6758
+ {
6759
+ "epoch": 952.0,
6760
+ "grad_norm": 0.9756543040275574,
6761
+ "learning_rate": 1.8980000000000002e-07,
6762
+ "loss": 1.6365,
6763
+ "step": 952
6764
+ },
6765
+ {
6766
+ "epoch": 953.0,
6767
+ "grad_norm": 1.0446370840072632,
6768
+ "learning_rate": 1.9e-07,
6769
+ "loss": 1.6328,
6770
+ "step": 953
6771
+ },
6772
+ {
6773
+ "epoch": 954.0,
6774
+ "grad_norm": 1.219887137413025,
6775
+ "learning_rate": 1.9020000000000002e-07,
6776
+ "loss": 1.6364,
6777
+ "step": 954
6778
+ },
6779
+ {
6780
+ "epoch": 955.0,
6781
+ "grad_norm": 0.9210452437400818,
6782
+ "learning_rate": 1.9039999999999998e-07,
6783
+ "loss": 1.6361,
6784
+ "step": 955
6785
+ },
6786
+ {
6787
+ "epoch": 956.0,
6788
+ "grad_norm": 1.0765300989151,
6789
+ "learning_rate": 1.906e-07,
6790
+ "loss": 1.63,
6791
+ "step": 956
6792
+ },
6793
+ {
6794
+ "epoch": 957.0,
6795
+ "grad_norm": 2.0179483890533447,
6796
+ "learning_rate": 1.9080000000000001e-07,
6797
+ "loss": 1.6323,
6798
+ "step": 957
6799
+ },
6800
+ {
6801
+ "epoch": 958.0,
6802
+ "grad_norm": 1.5964845418930054,
6803
+ "learning_rate": 1.91e-07,
6804
+ "loss": 1.6281,
6805
+ "step": 958
6806
+ },
6807
+ {
6808
+ "epoch": 959.0,
6809
+ "grad_norm": 2.0451290607452393,
6810
+ "learning_rate": 1.912e-07,
6811
+ "loss": 1.6306,
6812
+ "step": 959
6813
+ },
6814
+ {
6815
+ "epoch": 960.0,
6816
+ "grad_norm": 0.9319092631340027,
6817
+ "learning_rate": 1.914e-07,
6818
+ "loss": 1.6248,
6819
+ "step": 960
6820
+ },
6821
+ {
6822
+ "epoch": 961.0,
6823
+ "grad_norm": 40.506675720214844,
6824
+ "learning_rate": 1.916e-07,
6825
+ "loss": 1.6355,
6826
+ "step": 961
6827
+ },
6828
+ {
6829
+ "epoch": 962.0,
6830
+ "grad_norm": 0.9858155846595764,
6831
+ "learning_rate": 1.918e-07,
6832
+ "loss": 1.6226,
6833
+ "step": 962
6834
+ },
6835
+ {
6836
+ "epoch": 963.0,
6837
+ "grad_norm": 1.0020569562911987,
6838
+ "learning_rate": 1.9200000000000003e-07,
6839
+ "loss": 1.6272,
6840
+ "step": 963
6841
+ },
6842
+ {
6843
+ "epoch": 964.0,
6844
+ "grad_norm": 1.4338146448135376,
6845
+ "learning_rate": 1.922e-07,
6846
+ "loss": 1.6289,
6847
+ "step": 964
6848
+ },
6849
+ {
6850
+ "epoch": 965.0,
6851
+ "grad_norm": 1.052070140838623,
6852
+ "learning_rate": 1.924e-07,
6853
+ "loss": 1.6252,
6854
+ "step": 965
6855
+ },
6856
+ {
6857
+ "epoch": 966.0,
6858
+ "grad_norm": 1.4018009901046753,
6859
+ "learning_rate": 1.9260000000000002e-07,
6860
+ "loss": 1.6173,
6861
+ "step": 966
6862
+ },
6863
+ {
6864
+ "epoch": 967.0,
6865
+ "grad_norm": 1.1901110410690308,
6866
+ "learning_rate": 1.928e-07,
6867
+ "loss": 1.6237,
6868
+ "step": 967
6869
+ },
6870
+ {
6871
+ "epoch": 968.0,
6872
+ "grad_norm": 0.9189120531082153,
6873
+ "learning_rate": 1.9300000000000002e-07,
6874
+ "loss": 1.6199,
6875
+ "step": 968
6876
+ },
6877
+ {
6878
+ "epoch": 969.0,
6879
+ "grad_norm": 0.9501894116401672,
6880
+ "learning_rate": 1.932e-07,
6881
+ "loss": 1.6202,
6882
+ "step": 969
6883
+ },
6884
+ {
6885
+ "epoch": 970.0,
6886
+ "grad_norm": 1.4390616416931152,
6887
+ "learning_rate": 1.934e-07,
6888
+ "loss": 1.6176,
6889
+ "step": 970
6890
+ },
6891
+ {
6892
+ "epoch": 971.0,
6893
+ "grad_norm": 2.0271224975585938,
6894
+ "learning_rate": 1.936e-07,
6895
+ "loss": 1.6155,
6896
+ "step": 971
6897
+ },
6898
+ {
6899
+ "epoch": 972.0,
6900
+ "grad_norm": 0.9586737751960754,
6901
+ "learning_rate": 1.938e-07,
6902
+ "loss": 1.616,
6903
+ "step": 972
6904
+ },
6905
+ {
6906
+ "epoch": 973.0,
6907
+ "grad_norm": 1.2440752983093262,
6908
+ "learning_rate": 1.94e-07,
6909
+ "loss": 1.6205,
6910
+ "step": 973
6911
+ },
6912
+ {
6913
+ "epoch": 974.0,
6914
+ "grad_norm": 1.0020045042037964,
6915
+ "learning_rate": 1.942e-07,
6916
+ "loss": 1.6218,
6917
+ "step": 974
6918
+ },
6919
+ {
6920
+ "epoch": 975.0,
6921
+ "grad_norm": 2.2769696712493896,
6922
+ "learning_rate": 1.944e-07,
6923
+ "loss": 1.6198,
6924
+ "step": 975
6925
+ },
6926
+ {
6927
+ "epoch": 976.0,
6928
+ "grad_norm": 0.9601196646690369,
6929
+ "learning_rate": 1.946e-07,
6930
+ "loss": 1.6167,
6931
+ "step": 976
6932
+ },
6933
+ {
6934
+ "epoch": 977.0,
6935
+ "grad_norm": 0.9332568049430847,
6936
+ "learning_rate": 1.9480000000000002e-07,
6937
+ "loss": 1.6089,
6938
+ "step": 977
6939
+ },
6940
+ {
6941
+ "epoch": 978.0,
6942
+ "grad_norm": 0.9011194109916687,
6943
+ "learning_rate": 1.9499999999999999e-07,
6944
+ "loss": 1.618,
6945
+ "step": 978
6946
+ },
6947
+ {
6948
+ "epoch": 979.0,
6949
+ "grad_norm": 1.0645091533660889,
6950
+ "learning_rate": 1.952e-07,
6951
+ "loss": 1.6082,
6952
+ "step": 979
6953
+ },
6954
+ {
6955
+ "epoch": 980.0,
6956
+ "grad_norm": 1.2063056230545044,
6957
+ "learning_rate": 1.9540000000000002e-07,
6958
+ "loss": 1.6059,
6959
+ "step": 980
6960
+ },
6961
+ {
6962
+ "epoch": 981.0,
6963
+ "grad_norm": 2.6769473552703857,
6964
+ "learning_rate": 1.956e-07,
6965
+ "loss": 1.6117,
6966
+ "step": 981
6967
+ },
6968
+ {
6969
+ "epoch": 982.0,
6970
+ "grad_norm": 1.0495208501815796,
6971
+ "learning_rate": 1.9580000000000002e-07,
6972
+ "loss": 1.6072,
6973
+ "step": 982
6974
+ },
6975
+ {
6976
+ "epoch": 983.0,
6977
+ "grad_norm": 0.9004780054092407,
6978
+ "learning_rate": 1.96e-07,
6979
+ "loss": 1.6097,
6980
+ "step": 983
6981
+ },
6982
+ {
6983
+ "epoch": 984.0,
6984
+ "grad_norm": 2.2003862857818604,
6985
+ "learning_rate": 1.962e-07,
6986
+ "loss": 1.6124,
6987
+ "step": 984
6988
+ },
6989
+ {
6990
+ "epoch": 985.0,
6991
+ "grad_norm": 1.6124346256256104,
6992
+ "learning_rate": 1.964e-07,
6993
+ "loss": 1.607,
6994
+ "step": 985
6995
+ },
6996
+ {
6997
+ "epoch": 986.0,
6998
+ "grad_norm": 3.331295967102051,
6999
+ "learning_rate": 1.9660000000000003e-07,
7000
+ "loss": 1.6101,
7001
+ "step": 986
7002
+ },
7003
+ {
7004
+ "epoch": 987.0,
7005
+ "grad_norm": 4.2284770011901855,
7006
+ "learning_rate": 1.968e-07,
7007
+ "loss": 1.6016,
7008
+ "step": 987
7009
+ },
7010
+ {
7011
+ "epoch": 988.0,
7012
+ "grad_norm": 14.074902534484863,
7013
+ "learning_rate": 1.97e-07,
7014
+ "loss": 1.6081,
7015
+ "step": 988
7016
+ },
7017
+ {
7018
+ "epoch": 989.0,
7019
+ "grad_norm": 2.105473518371582,
7020
+ "learning_rate": 1.972e-07,
7021
+ "loss": 1.6059,
7022
+ "step": 989
7023
+ },
7024
+ {
7025
+ "epoch": 990.0,
7026
+ "grad_norm": 0.8988717198371887,
7027
+ "learning_rate": 1.974e-07,
7028
+ "loss": 1.6033,
7029
+ "step": 990
7030
+ },
7031
+ {
7032
+ "epoch": 991.0,
7033
+ "grad_norm": 1.6289899349212646,
7034
+ "learning_rate": 1.9760000000000002e-07,
7035
+ "loss": 1.612,
7036
+ "step": 991
7037
+ },
7038
+ {
7039
+ "epoch": 992.0,
7040
+ "grad_norm": 0.9097650051116943,
7041
+ "learning_rate": 1.9779999999999998e-07,
7042
+ "loss": 1.6095,
7043
+ "step": 992
7044
+ },
7045
+ {
7046
+ "epoch": 993.0,
7047
+ "grad_norm": 1.135284423828125,
7048
+ "learning_rate": 1.98e-07,
7049
+ "loss": 1.6074,
7050
+ "step": 993
7051
+ },
7052
+ {
7053
+ "epoch": 994.0,
7054
+ "grad_norm": 0.9815622568130493,
7055
+ "learning_rate": 1.9820000000000001e-07,
7056
+ "loss": 1.5999,
7057
+ "step": 994
7058
+ },
7059
+ {
7060
+ "epoch": 995.0,
7061
+ "grad_norm": 1.0774178504943848,
7062
+ "learning_rate": 1.984e-07,
7063
+ "loss": 1.622,
7064
+ "step": 995
7065
+ },
7066
+ {
7067
+ "epoch": 996.0,
7068
+ "grad_norm": 1.3044596910476685,
7069
+ "learning_rate": 1.9860000000000002e-07,
7070
+ "loss": 1.6003,
7071
+ "step": 996
7072
+ },
7073
+ {
7074
+ "epoch": 997.0,
7075
+ "grad_norm": 5.724979400634766,
7076
+ "learning_rate": 1.988e-07,
7077
+ "loss": 1.6026,
7078
+ "step": 997
7079
+ },
7080
+ {
7081
+ "epoch": 998.0,
7082
+ "grad_norm": 7.915839195251465,
7083
+ "learning_rate": 1.99e-07,
7084
+ "loss": 1.6014,
7085
+ "step": 998
7086
+ },
7087
+ {
7088
+ "epoch": 999.0,
7089
+ "grad_norm": 0.9617994427680969,
7090
+ "learning_rate": 1.992e-07,
7091
+ "loss": 1.6004,
7092
+ "step": 999
7093
+ },
7094
+ {
7095
+ "epoch": 1000.0,
7096
+ "grad_norm": 1.0033584833145142,
7097
+ "learning_rate": 1.9940000000000003e-07,
7098
+ "loss": 1.595,
7099
+ "step": 1000
7100
+ },
7101
+ {
7102
+ "epoch": 1000.0,
7103
+ "eval_cer": 0.9977481887605247,
7104
+ "eval_loss": 3.4487671852111816,
7105
+ "eval_runtime": 14.7295,
7106
+ "eval_samples_per_second": 66.737,
7107
+ "eval_steps_per_second": 0.475,
7108
+ "eval_wer": 1.0,
7109
+ "step": 1000
7110
+ },
7111
+ {
7112
+ "epoch": 1001.0,
7113
+ "grad_norm": 1.1320922374725342,
7114
+ "learning_rate": 1.996e-07,
7115
+ "loss": 1.5978,
7116
+ "step": 1001
7117
+ },
7118
+ {
7119
+ "epoch": 1002.0,
7120
+ "grad_norm": 10.318326950073242,
7121
+ "learning_rate": 1.998e-07,
7122
+ "loss": 1.6008,
7123
+ "step": 1002
7124
+ },
7125
+ {
7126
+ "epoch": 1003.0,
7127
+ "grad_norm": 5.446964263916016,
7128
+ "learning_rate": 2.0000000000000002e-07,
7129
+ "loss": 1.5938,
7130
+ "step": 1003
7131
+ },
7132
+ {
7133
+ "epoch": 1004.0,
7134
+ "grad_norm": 1.3103594779968262,
7135
+ "learning_rate": 2.002e-07,
7136
+ "loss": 1.6014,
7137
+ "step": 1004
7138
+ },
7139
+ {
7140
+ "epoch": 1005.0,
7141
+ "grad_norm": 1.0951064825057983,
7142
+ "learning_rate": 2.0040000000000002e-07,
7143
+ "loss": 1.5957,
7144
+ "step": 1005
7145
+ },
7146
+ {
7147
+ "epoch": 1006.0,
7148
+ "grad_norm": 2.066399097442627,
7149
+ "learning_rate": 2.0059999999999998e-07,
7150
+ "loss": 1.5892,
7151
+ "step": 1006
7152
+ },
7153
+ {
7154
+ "epoch": 1007.0,
7155
+ "grad_norm": 1.012980341911316,
7156
+ "learning_rate": 2.008e-07,
7157
+ "loss": 1.5972,
7158
+ "step": 1007
7159
+ },
7160
+ {
7161
+ "epoch": 1008.0,
7162
+ "grad_norm": 1.5361199378967285,
7163
+ "learning_rate": 2.01e-07,
7164
+ "loss": 1.5916,
7165
+ "step": 1008
7166
+ },
7167
+ {
7168
+ "epoch": 1009.0,
7169
+ "grad_norm": 1.0442848205566406,
7170
+ "learning_rate": 2.012e-07,
7171
+ "loss": 1.5872,
7172
+ "step": 1009
7173
+ },
7174
+ {
7175
+ "epoch": 1010.0,
7176
+ "grad_norm": 6.148806571960449,
7177
+ "learning_rate": 2.0140000000000002e-07,
7178
+ "loss": 1.5919,
7179
+ "step": 1010
7180
+ },
7181
+ {
7182
+ "epoch": 1011.0,
7183
+ "grad_norm": 1.2967356443405151,
7184
+ "learning_rate": 2.016e-07,
7185
+ "loss": 1.5922,
7186
+ "step": 1011
7187
+ },
7188
+ {
7189
+ "epoch": 1012.0,
7190
+ "grad_norm": 6.414045810699463,
7191
+ "learning_rate": 2.018e-07,
7192
+ "loss": 1.5913,
7193
+ "step": 1012
7194
+ },
7195
+ {
7196
+ "epoch": 1013.0,
7197
+ "grad_norm": 1.1772363185882568,
7198
+ "learning_rate": 2.02e-07,
7199
+ "loss": 1.5828,
7200
+ "step": 1013
7201
+ },
7202
+ {
7203
+ "epoch": 1014.0,
7204
+ "grad_norm": 3.105180501937866,
7205
+ "learning_rate": 2.0220000000000002e-07,
7206
+ "loss": 1.594,
7207
+ "step": 1014
7208
+ },
7209
+ {
7210
+ "epoch": 1015.0,
7211
+ "grad_norm": 1.3187018632888794,
7212
+ "learning_rate": 2.0239999999999999e-07,
7213
+ "loss": 1.5925,
7214
+ "step": 1015
7215
+ },
7216
+ {
7217
+ "epoch": 1016.0,
7218
+ "grad_norm": 4.919627666473389,
7219
+ "learning_rate": 2.026e-07,
7220
+ "loss": 1.5964,
7221
+ "step": 1016
7222
+ },
7223
+ {
7224
+ "epoch": 1017.0,
7225
+ "grad_norm": 6.910722732543945,
7226
+ "learning_rate": 2.0280000000000002e-07,
7227
+ "loss": 1.5861,
7228
+ "step": 1017
7229
+ },
7230
+ {
7231
+ "epoch": 1018.0,
7232
+ "grad_norm": 1.2795530557632446,
7233
+ "learning_rate": 2.03e-07,
7234
+ "loss": 1.5862,
7235
+ "step": 1018
7236
+ },
7237
+ {
7238
+ "epoch": 1019.0,
7239
+ "grad_norm": 0.9177622199058533,
7240
+ "learning_rate": 2.0320000000000002e-07,
7241
+ "loss": 1.5864,
7242
+ "step": 1019
7243
+ },
7244
+ {
7245
+ "epoch": 1020.0,
7246
+ "grad_norm": 1.5381174087524414,
7247
+ "learning_rate": 2.0339999999999998e-07,
7248
+ "loss": 1.5891,
7249
+ "step": 1020
7250
+ },
7251
+ {
7252
+ "epoch": 1021.0,
7253
+ "grad_norm": 1.4343268871307373,
7254
+ "learning_rate": 2.036e-07,
7255
+ "loss": 1.5872,
7256
+ "step": 1021
7257
+ },
7258
+ {
7259
+ "epoch": 1022.0,
7260
+ "grad_norm": 1.11500883102417,
7261
+ "learning_rate": 2.038e-07,
7262
+ "loss": 1.5895,
7263
+ "step": 1022
7264
+ },
7265
+ {
7266
+ "epoch": 1023.0,
7267
+ "grad_norm": 0.9539472460746765,
7268
+ "learning_rate": 2.04e-07,
7269
+ "loss": 1.5838,
7270
+ "step": 1023
7271
+ },
7272
+ {
7273
+ "epoch": 1024.0,
7274
+ "grad_norm": 8.19465446472168,
7275
+ "learning_rate": 2.0420000000000002e-07,
7276
+ "loss": 1.5815,
7277
+ "step": 1024
7278
+ },
7279
+ {
7280
+ "epoch": 1025.0,
7281
+ "grad_norm": 2.1362464427948,
7282
+ "learning_rate": 2.044e-07,
7283
+ "loss": 1.5767,
7284
+ "step": 1025
7285
+ },
7286
+ {
7287
+ "epoch": 1026.0,
7288
+ "grad_norm": 1.9000413417816162,
7289
+ "learning_rate": 2.046e-07,
7290
+ "loss": 1.5812,
7291
+ "step": 1026
7292
+ },
7293
+ {
7294
+ "epoch": 1027.0,
7295
+ "grad_norm": 1.1212537288665771,
7296
+ "learning_rate": 2.048e-07,
7297
+ "loss": 1.584,
7298
+ "step": 1027
7299
+ },
7300
+ {
7301
+ "epoch": 1028.0,
7302
+ "grad_norm": 1.5567606687545776,
7303
+ "learning_rate": 2.0500000000000002e-07,
7304
+ "loss": 1.5798,
7305
+ "step": 1028
7306
+ },
7307
+ {
7308
+ "epoch": 1029.0,
7309
+ "grad_norm": 1.4290876388549805,
7310
+ "learning_rate": 2.0519999999999998e-07,
7311
+ "loss": 1.5807,
7312
+ "step": 1029
7313
+ },
7314
+ {
7315
+ "epoch": 1030.0,
7316
+ "grad_norm": 1.4025850296020508,
7317
+ "learning_rate": 2.054e-07,
7318
+ "loss": 1.5844,
7319
+ "step": 1030
7320
+ },
7321
+ {
7322
+ "epoch": 1031.0,
7323
+ "grad_norm": 1.0360734462738037,
7324
+ "learning_rate": 2.0560000000000001e-07,
7325
+ "loss": 1.5745,
7326
+ "step": 1031
7327
+ },
7328
+ {
7329
+ "epoch": 1032.0,
7330
+ "grad_norm": 0.90553218126297,
7331
+ "learning_rate": 2.058e-07,
7332
+ "loss": 1.5893,
7333
+ "step": 1032
7334
+ },
7335
+ {
7336
+ "epoch": 1033.0,
7337
+ "grad_norm": 1.8398399353027344,
7338
+ "learning_rate": 2.0600000000000002e-07,
7339
+ "loss": 1.5742,
7340
+ "step": 1033
7341
+ },
7342
+ {
7343
+ "epoch": 1034.0,
7344
+ "grad_norm": 3.009005069732666,
7345
+ "learning_rate": 2.062e-07,
7346
+ "loss": 1.5723,
7347
+ "step": 1034
7348
+ },
7349
+ {
7350
+ "epoch": 1035.0,
7351
+ "grad_norm": 1.2227106094360352,
7352
+ "learning_rate": 2.064e-07,
7353
+ "loss": 1.5815,
7354
+ "step": 1035
7355
+ },
7356
+ {
7357
+ "epoch": 1036.0,
7358
+ "grad_norm": 0.9489397406578064,
7359
+ "learning_rate": 2.066e-07,
7360
+ "loss": 1.5773,
7361
+ "step": 1036
7362
+ },
7363
+ {
7364
+ "epoch": 1037.0,
7365
+ "grad_norm": 2.97885799407959,
7366
+ "learning_rate": 2.068e-07,
7367
+ "loss": 1.5804,
7368
+ "step": 1037
7369
+ },
7370
+ {
7371
+ "epoch": 1038.0,
7372
+ "grad_norm": 2.4349982738494873,
7373
+ "learning_rate": 2.0700000000000001e-07,
7374
+ "loss": 1.5712,
7375
+ "step": 1038
7376
+ },
7377
+ {
7378
+ "epoch": 1039.0,
7379
+ "grad_norm": 1.095255970954895,
7380
+ "learning_rate": 2.072e-07,
7381
+ "loss": 1.5758,
7382
+ "step": 1039
7383
+ },
7384
+ {
7385
+ "epoch": 1040.0,
7386
+ "grad_norm": 1.583194375038147,
7387
+ "learning_rate": 2.074e-07,
7388
+ "loss": 1.5709,
7389
+ "step": 1040
7390
+ },
7391
+ {
7392
+ "epoch": 1041.0,
7393
+ "grad_norm": 1.6994518041610718,
7394
+ "learning_rate": 2.076e-07,
7395
+ "loss": 1.5672,
7396
+ "step": 1041
7397
+ },
7398
+ {
7399
+ "epoch": 1042.0,
7400
+ "grad_norm": 1.1236746311187744,
7401
+ "learning_rate": 2.0780000000000002e-07,
7402
+ "loss": 1.5731,
7403
+ "step": 1042
7404
+ },
7405
+ {
7406
+ "epoch": 1043.0,
7407
+ "grad_norm": 6.768857002258301,
7408
+ "learning_rate": 2.0799999999999998e-07,
7409
+ "loss": 1.5644,
7410
+ "step": 1043
7411
+ },
7412
+ {
7413
+ "epoch": 1044.0,
7414
+ "grad_norm": 1.107519507408142,
7415
+ "learning_rate": 2.082e-07,
7416
+ "loss": 1.5749,
7417
+ "step": 1044
7418
+ },
7419
+ {
7420
+ "epoch": 1045.0,
7421
+ "grad_norm": 2.1311705112457275,
7422
+ "learning_rate": 2.084e-07,
7423
+ "loss": 1.5628,
7424
+ "step": 1045
7425
+ },
7426
+ {
7427
+ "epoch": 1046.0,
7428
+ "grad_norm": 1.3456166982650757,
7429
+ "learning_rate": 2.086e-07,
7430
+ "loss": 1.5715,
7431
+ "step": 1046
7432
+ },
7433
+ {
7434
+ "epoch": 1047.0,
7435
+ "grad_norm": 1.005059838294983,
7436
+ "learning_rate": 2.0880000000000002e-07,
7437
+ "loss": 1.5679,
7438
+ "step": 1047
7439
+ },
7440
+ {
7441
+ "epoch": 1048.0,
7442
+ "grad_norm": 1.5581361055374146,
7443
+ "learning_rate": 2.09e-07,
7444
+ "loss": 1.5668,
7445
+ "step": 1048
7446
+ },
7447
+ {
7448
+ "epoch": 1049.0,
7449
+ "grad_norm": 0.9271217584609985,
7450
+ "learning_rate": 2.092e-07,
7451
+ "loss": 1.5665,
7452
+ "step": 1049
7453
+ },
7454
+ {
7455
+ "epoch": 1050.0,
7456
+ "grad_norm": 8.531341552734375,
7457
+ "learning_rate": 2.094e-07,
7458
+ "loss": 1.5749,
7459
+ "step": 1050
7460
+ },
7461
+ {
7462
+ "epoch": 1051.0,
7463
+ "grad_norm": 1.0843509435653687,
7464
+ "learning_rate": 2.0960000000000002e-07,
7465
+ "loss": 1.5618,
7466
+ "step": 1051
7467
+ },
7468
+ {
7469
+ "epoch": 1052.0,
7470
+ "grad_norm": 3.8642165660858154,
7471
+ "learning_rate": 2.098e-07,
7472
+ "loss": 1.5685,
7473
+ "step": 1052
7474
+ },
7475
+ {
7476
+ "epoch": 1053.0,
7477
+ "grad_norm": 1.2413549423217773,
7478
+ "learning_rate": 2.1e-07,
7479
+ "loss": 1.5627,
7480
+ "step": 1053
7481
+ },
7482
+ {
7483
+ "epoch": 1054.0,
7484
+ "grad_norm": 1.2524449825286865,
7485
+ "learning_rate": 2.102e-07,
7486
+ "loss": 1.5636,
7487
+ "step": 1054
7488
+ },
7489
+ {
7490
+ "epoch": 1055.0,
7491
+ "grad_norm": 1.2952991724014282,
7492
+ "learning_rate": 2.104e-07,
7493
+ "loss": 1.5645,
7494
+ "step": 1055
7495
+ },
7496
+ {
7497
+ "epoch": 1056.0,
7498
+ "grad_norm": 1.4905788898468018,
7499
+ "learning_rate": 2.1060000000000002e-07,
7500
+ "loss": 1.5655,
7501
+ "step": 1056
7502
+ },
7503
+ {
7504
+ "epoch": 1057.0,
7505
+ "grad_norm": 1.1086138486862183,
7506
+ "learning_rate": 2.1079999999999998e-07,
7507
+ "loss": 1.5673,
7508
+ "step": 1057
7509
+ },
7510
+ {
7511
+ "epoch": 1058.0,
7512
+ "grad_norm": 6.657322883605957,
7513
+ "learning_rate": 2.11e-07,
7514
+ "loss": 1.5617,
7515
+ "step": 1058
7516
+ },
7517
+ {
7518
+ "epoch": 1059.0,
7519
+ "grad_norm": 34.03983688354492,
7520
+ "learning_rate": 2.112e-07,
7521
+ "loss": 1.564,
7522
+ "step": 1059
7523
+ },
7524
+ {
7525
+ "epoch": 1060.0,
7526
+ "grad_norm": 1.2597378492355347,
7527
+ "learning_rate": 2.114e-07,
7528
+ "loss": 1.5608,
7529
+ "step": 1060
7530
+ },
7531
+ {
7532
+ "epoch": 1061.0,
7533
+ "grad_norm": 1.1377840042114258,
7534
+ "learning_rate": 2.1160000000000002e-07,
7535
+ "loss": 1.5626,
7536
+ "step": 1061
7537
+ },
7538
+ {
7539
+ "epoch": 1062.0,
7540
+ "grad_norm": 1.9253413677215576,
7541
+ "learning_rate": 2.118e-07,
7542
+ "loss": 1.5591,
7543
+ "step": 1062
7544
+ },
7545
+ {
7546
+ "epoch": 1063.0,
7547
+ "grad_norm": 2.157513380050659,
7548
+ "learning_rate": 2.12e-07,
7549
+ "loss": 1.5539,
7550
+ "step": 1063
7551
+ },
7552
+ {
7553
+ "epoch": 1064.0,
7554
+ "grad_norm": 1.3998243808746338,
7555
+ "learning_rate": 2.122e-07,
7556
+ "loss": 1.564,
7557
+ "step": 1064
7558
+ },
7559
+ {
7560
+ "epoch": 1065.0,
7561
+ "grad_norm": 1.0151389837265015,
7562
+ "learning_rate": 2.1240000000000002e-07,
7563
+ "loss": 1.561,
7564
+ "step": 1065
7565
+ },
7566
+ {
7567
+ "epoch": 1066.0,
7568
+ "grad_norm": 1.3415422439575195,
7569
+ "learning_rate": 2.126e-07,
7570
+ "loss": 1.5611,
7571
+ "step": 1066
7572
+ },
7573
+ {
7574
+ "epoch": 1067.0,
7575
+ "grad_norm": 2.3141579627990723,
7576
+ "learning_rate": 2.128e-07,
7577
+ "loss": 1.5573,
7578
+ "step": 1067
7579
+ },
7580
+ {
7581
+ "epoch": 1068.0,
7582
+ "grad_norm": 0.9580351114273071,
7583
+ "learning_rate": 2.1300000000000001e-07,
7584
+ "loss": 1.5578,
7585
+ "step": 1068
7586
+ },
7587
+ {
7588
+ "epoch": 1069.0,
7589
+ "grad_norm": 1.0505666732788086,
7590
+ "learning_rate": 2.132e-07,
7591
+ "loss": 1.5562,
7592
+ "step": 1069
7593
+ },
7594
+ {
7595
+ "epoch": 1070.0,
7596
+ "grad_norm": 1.6809712648391724,
7597
+ "learning_rate": 2.1340000000000002e-07,
7598
+ "loss": 1.557,
7599
+ "step": 1070
7600
+ },
7601
+ {
7602
+ "epoch": 1071.0,
7603
+ "grad_norm": 3.6099202632904053,
7604
+ "learning_rate": 2.1359999999999998e-07,
7605
+ "loss": 1.5535,
7606
+ "step": 1071
7607
+ },
7608
+ {
7609
+ "epoch": 1072.0,
7610
+ "grad_norm": 5.737199783325195,
7611
+ "learning_rate": 2.138e-07,
7612
+ "loss": 1.5559,
7613
+ "step": 1072
7614
+ },
7615
+ {
7616
+ "epoch": 1073.0,
7617
+ "grad_norm": 1.131452202796936,
7618
+ "learning_rate": 2.14e-07,
7619
+ "loss": 1.5557,
7620
+ "step": 1073
7621
+ },
7622
+ {
7623
+ "epoch": 1074.0,
7624
+ "grad_norm": 1.0757285356521606,
7625
+ "learning_rate": 2.142e-07,
7626
+ "loss": 1.551,
7627
+ "step": 1074
7628
+ },
7629
+ {
7630
+ "epoch": 1075.0,
7631
+ "grad_norm": 1.316859245300293,
7632
+ "learning_rate": 2.1440000000000001e-07,
7633
+ "loss": 1.5541,
7634
+ "step": 1075
7635
+ },
7636
+ {
7637
+ "epoch": 1076.0,
7638
+ "grad_norm": 6.291731357574463,
7639
+ "learning_rate": 2.146e-07,
7640
+ "loss": 1.552,
7641
+ "step": 1076
7642
+ },
7643
+ {
7644
+ "epoch": 1077.0,
7645
+ "grad_norm": 0.980509340763092,
7646
+ "learning_rate": 2.148e-07,
7647
+ "loss": 1.5531,
7648
+ "step": 1077
7649
+ },
7650
+ {
7651
+ "epoch": 1078.0,
7652
+ "grad_norm": 1.1346079111099243,
7653
+ "learning_rate": 2.15e-07,
7654
+ "loss": 1.5503,
7655
+ "step": 1078
7656
+ },
7657
+ {
7658
+ "epoch": 1079.0,
7659
+ "grad_norm": 1.938717246055603,
7660
+ "learning_rate": 2.1520000000000002e-07,
7661
+ "loss": 1.5571,
7662
+ "step": 1079
7663
+ },
7664
+ {
7665
+ "epoch": 1080.0,
7666
+ "grad_norm": 1.836732268333435,
7667
+ "learning_rate": 2.154e-07,
7668
+ "loss": 1.5536,
7669
+ "step": 1080
7670
+ },
7671
+ {
7672
+ "epoch": 1081.0,
7673
+ "grad_norm": 1.5794588327407837,
7674
+ "learning_rate": 2.156e-07,
7675
+ "loss": 1.5484,
7676
+ "step": 1081
7677
+ },
7678
+ {
7679
+ "epoch": 1082.0,
7680
+ "grad_norm": 1.154757022857666,
7681
+ "learning_rate": 2.1580000000000001e-07,
7682
+ "loss": 1.5477,
7683
+ "step": 1082
7684
+ },
7685
+ {
7686
+ "epoch": 1083.0,
7687
+ "grad_norm": 1.6351189613342285,
7688
+ "learning_rate": 2.16e-07,
7689
+ "loss": 1.5512,
7690
+ "step": 1083
7691
+ },
7692
+ {
7693
+ "epoch": 1084.0,
7694
+ "grad_norm": 2.203552007675171,
7695
+ "learning_rate": 2.1620000000000002e-07,
7696
+ "loss": 1.5461,
7697
+ "step": 1084
7698
+ },
7699
+ {
7700
+ "epoch": 1085.0,
7701
+ "grad_norm": 1.813345193862915,
7702
+ "learning_rate": 2.164e-07,
7703
+ "loss": 1.5516,
7704
+ "step": 1085
7705
+ },
7706
+ {
7707
+ "epoch": 1086.0,
7708
+ "grad_norm": 0.8774526715278625,
7709
+ "learning_rate": 2.166e-07,
7710
+ "loss": 1.5511,
7711
+ "step": 1086
7712
+ },
7713
+ {
7714
+ "epoch": 1087.0,
7715
+ "grad_norm": 3.1274983882904053,
7716
+ "learning_rate": 2.168e-07,
7717
+ "loss": 1.5443,
7718
+ "step": 1087
7719
+ },
7720
+ {
7721
+ "epoch": 1088.0,
7722
+ "grad_norm": 3.257859945297241,
7723
+ "learning_rate": 2.17e-07,
7724
+ "loss": 1.5454,
7725
+ "step": 1088
7726
+ },
7727
+ {
7728
+ "epoch": 1089.0,
7729
+ "grad_norm": 5.167294979095459,
7730
+ "learning_rate": 2.172e-07,
7731
+ "loss": 1.5372,
7732
+ "step": 1089
7733
+ },
7734
+ {
7735
+ "epoch": 1090.0,
7736
+ "grad_norm": 1.0005639791488647,
7737
+ "learning_rate": 2.174e-07,
7738
+ "loss": 1.5416,
7739
+ "step": 1090
7740
+ },
7741
+ {
7742
+ "epoch": 1091.0,
7743
+ "grad_norm": 1.842504620552063,
7744
+ "learning_rate": 2.176e-07,
7745
+ "loss": 1.5441,
7746
+ "step": 1091
7747
+ },
7748
+ {
7749
+ "epoch": 1092.0,
7750
+ "grad_norm": 0.9027210474014282,
7751
+ "learning_rate": 2.178e-07,
7752
+ "loss": 1.5359,
7753
+ "step": 1092
7754
+ },
7755
+ {
7756
+ "epoch": 1093.0,
7757
+ "grad_norm": 1.4683235883712769,
7758
+ "learning_rate": 2.1800000000000002e-07,
7759
+ "loss": 1.5467,
7760
+ "step": 1093
7761
+ },
7762
+ {
7763
+ "epoch": 1094.0,
7764
+ "grad_norm": 0.9885283708572388,
7765
+ "learning_rate": 2.182e-07,
7766
+ "loss": 1.5436,
7767
+ "step": 1094
7768
+ },
7769
+ {
7770
+ "epoch": 1095.0,
7771
+ "grad_norm": 1.3684940338134766,
7772
+ "learning_rate": 2.184e-07,
7773
+ "loss": 1.5419,
7774
+ "step": 1095
7775
+ },
7776
+ {
7777
+ "epoch": 1096.0,
7778
+ "grad_norm": 0.9934016466140747,
7779
+ "learning_rate": 2.186e-07,
7780
+ "loss": 1.54,
7781
+ "step": 1096
7782
+ },
7783
+ {
7784
+ "epoch": 1097.0,
7785
+ "grad_norm": 1.8801573514938354,
7786
+ "learning_rate": 2.188e-07,
7787
+ "loss": 1.5404,
7788
+ "step": 1097
7789
+ },
7790
+ {
7791
+ "epoch": 1098.0,
7792
+ "grad_norm": 1.0297327041625977,
7793
+ "learning_rate": 2.1900000000000002e-07,
7794
+ "loss": 1.5412,
7795
+ "step": 1098
7796
+ },
7797
+ {
7798
+ "epoch": 1099.0,
7799
+ "grad_norm": 1.1018619537353516,
7800
+ "learning_rate": 2.192e-07,
7801
+ "loss": 1.5419,
7802
+ "step": 1099
7803
+ },
7804
+ {
7805
+ "epoch": 1100.0,
7806
+ "grad_norm": 4.425602912902832,
7807
+ "learning_rate": 2.194e-07,
7808
+ "loss": 1.5454,
7809
+ "step": 1100
7810
+ },
7811
+ {
7812
+ "epoch": 1100.0,
7813
+ "eval_cer": 0.9915312316428432,
7814
+ "eval_loss": 3.433760166168213,
7815
+ "eval_runtime": 14.4715,
7816
+ "eval_samples_per_second": 67.927,
7817
+ "eval_steps_per_second": 0.484,
7818
+ "eval_wer": 0.9996560421921578,
7819
+ "step": 1100
7820
  }
7821
  ],
7822
  "logging_steps": 1.0,
 
7845
  "attributes": {}
7846
  }
7847
  },
7848
+ "total_flos": 4.440971061660672e+18,
7849
  "train_batch_size": 160,
7850
  "trial_name": null,
7851
  "trial_params": null