Romain-XV commited on
Commit
b5b3ed2
·
verified ·
1 Parent(s): fcf49f8

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0410eafb2c10452c59c1eabf6707cf5867fd03b17df7175f116cef4af978ed0b
3
  size 1001465824
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e4effa8bbc0ed474583ba3f2837281343afc09606c2ccb38becde90989e4c46
3
  size 1001465824
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c00468c5fe4b4a58292527a6dcd12b800b9870e557165f75d368f135dd7ae01e
3
  size 509177556
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9b5fa21c522699eddafd48526976af6455055539ca8a6ef38f2e4198cf9d572
3
  size 509177556
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:899dab2ad0c467bf42c9149332e349b375e6bd6e5462a670f417b832077aaf69
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:801e94b1ec798c0825658670c62086f2c0cee7dce743b57ecfc35839c1add960
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:086a6b138dffaa31cd51bc2e3012c400e78917f70ad348fe17b70bdd71187b10
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9162f2e18a7003bc5be2bf4c68f833fcf607f8f6f9d53d603787f1bed5f4ce08
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.593366265296936,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-1350",
4
- "epoch": 0.4616568350859195,
5
  "eval_steps": 150,
6
- "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -9537,6 +9537,1064 @@
9537
  "eval_samples_per_second": 5.323,
9538
  "eval_steps_per_second": 2.662,
9539
  "step": 1350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9540
  }
9541
  ],
9542
  "logging_steps": 1,
@@ -9565,7 +10623,7 @@
9565
  "attributes": {}
9566
  }
9567
  },
9568
- "total_flos": 1.190770851792814e+18,
9569
  "train_batch_size": 2,
9570
  "trial_name": null,
9571
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.5886463522911072,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-1500",
4
+ "epoch": 0.5129520389843549,
5
  "eval_steps": 150,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
9537
  "eval_samples_per_second": 5.323,
9538
  "eval_steps_per_second": 2.662,
9539
  "step": 1350
9540
+ },
9541
+ {
9542
+ "epoch": 0.46199880311190905,
9543
+ "grad_norm": 0.46458661556243896,
9544
+ "learning_rate": 1.4448861670281588e-05,
9545
+ "loss": 0.5774,
9546
+ "step": 1351
9547
+ },
9548
+ {
9549
+ "epoch": 0.4623407711378986,
9550
+ "grad_norm": 0.42334747314453125,
9551
+ "learning_rate": 1.4348734507629114e-05,
9552
+ "loss": 0.7102,
9553
+ "step": 1352
9554
+ },
9555
+ {
9556
+ "epoch": 0.4626827391638882,
9557
+ "grad_norm": 0.39918336272239685,
9558
+ "learning_rate": 1.4248928660489846e-05,
9559
+ "loss": 0.5146,
9560
+ "step": 1353
9561
+ },
9562
+ {
9563
+ "epoch": 0.46302470718987776,
9564
+ "grad_norm": 0.36537671089172363,
9565
+ "learning_rate": 1.4149444503279297e-05,
9566
+ "loss": 0.5339,
9567
+ "step": 1354
9568
+ },
9569
+ {
9570
+ "epoch": 0.46336667521586733,
9571
+ "grad_norm": 0.4066685438156128,
9572
+ "learning_rate": 1.4050282409206273e-05,
9573
+ "loss": 0.6559,
9574
+ "step": 1355
9575
+ },
9576
+ {
9577
+ "epoch": 0.4637086432418569,
9578
+ "grad_norm": 0.2991009056568146,
9579
+ "learning_rate": 1.3951442750271349e-05,
9580
+ "loss": 0.4116,
9581
+ "step": 1356
9582
+ },
9583
+ {
9584
+ "epoch": 0.46405061126784647,
9585
+ "grad_norm": 0.37864071130752563,
9586
+ "learning_rate": 1.3852925897265456e-05,
9587
+ "loss": 0.4171,
9588
+ "step": 1357
9589
+ },
9590
+ {
9591
+ "epoch": 0.46439257929383604,
9592
+ "grad_norm": 0.6100689768791199,
9593
+ "learning_rate": 1.3754732219768619e-05,
9594
+ "loss": 0.658,
9595
+ "step": 1358
9596
+ },
9597
+ {
9598
+ "epoch": 0.4647345473198256,
9599
+ "grad_norm": 0.38960012793540955,
9600
+ "learning_rate": 1.3656862086148391e-05,
9601
+ "loss": 0.4975,
9602
+ "step": 1359
9603
+ },
9604
+ {
9605
+ "epoch": 0.4650765153458152,
9606
+ "grad_norm": 0.5630696415901184,
9607
+ "learning_rate": 1.3559315863558697e-05,
9608
+ "loss": 0.6081,
9609
+ "step": 1360
9610
+ },
9611
+ {
9612
+ "epoch": 0.46541848337180475,
9613
+ "grad_norm": 0.39844122529029846,
9614
+ "learning_rate": 1.3462093917938179e-05,
9615
+ "loss": 0.5646,
9616
+ "step": 1361
9617
+ },
9618
+ {
9619
+ "epoch": 0.4657604513977943,
9620
+ "grad_norm": 0.344722718000412,
9621
+ "learning_rate": 1.33651966140091e-05,
9622
+ "loss": 0.4201,
9623
+ "step": 1362
9624
+ },
9625
+ {
9626
+ "epoch": 0.4661024194237839,
9627
+ "grad_norm": 0.28442439436912537,
9628
+ "learning_rate": 1.3268624315275823e-05,
9629
+ "loss": 0.5606,
9630
+ "step": 1363
9631
+ },
9632
+ {
9633
+ "epoch": 0.46644438744977346,
9634
+ "grad_norm": 0.4574268162250519,
9635
+ "learning_rate": 1.3172377384023393e-05,
9636
+ "loss": 0.5332,
9637
+ "step": 1364
9638
+ },
9639
+ {
9640
+ "epoch": 0.466786355475763,
9641
+ "grad_norm": 0.3338494598865509,
9642
+ "learning_rate": 1.3076456181316354e-05,
9643
+ "loss": 0.4792,
9644
+ "step": 1365
9645
+ },
9646
+ {
9647
+ "epoch": 0.4671283235017526,
9648
+ "grad_norm": 0.4591316878795624,
9649
+ "learning_rate": 1.2980861066997297e-05,
9650
+ "loss": 0.5861,
9651
+ "step": 1366
9652
+ },
9653
+ {
9654
+ "epoch": 0.46747029152774217,
9655
+ "grad_norm": 0.3527957797050476,
9656
+ "learning_rate": 1.2885592399685431e-05,
9657
+ "loss": 0.5774,
9658
+ "step": 1367
9659
+ },
9660
+ {
9661
+ "epoch": 0.46781225955373174,
9662
+ "grad_norm": 0.3617342412471771,
9663
+ "learning_rate": 1.279065053677536e-05,
9664
+ "loss": 0.5277,
9665
+ "step": 1368
9666
+ },
9667
+ {
9668
+ "epoch": 0.4681542275797213,
9669
+ "grad_norm": 0.3831739127635956,
9670
+ "learning_rate": 1.2696035834435749e-05,
9671
+ "loss": 0.6971,
9672
+ "step": 1369
9673
+ },
9674
+ {
9675
+ "epoch": 0.4684961956057109,
9676
+ "grad_norm": 0.4345672130584717,
9677
+ "learning_rate": 1.2601748647607859e-05,
9678
+ "loss": 0.5492,
9679
+ "step": 1370
9680
+ },
9681
+ {
9682
+ "epoch": 0.46883816363170044,
9683
+ "grad_norm": 0.36282750964164734,
9684
+ "learning_rate": 1.2507789330004349e-05,
9685
+ "loss": 0.5571,
9686
+ "step": 1371
9687
+ },
9688
+ {
9689
+ "epoch": 0.46918013165769,
9690
+ "grad_norm": 0.4751930832862854,
9691
+ "learning_rate": 1.241415823410792e-05,
9692
+ "loss": 0.626,
9693
+ "step": 1372
9694
+ },
9695
+ {
9696
+ "epoch": 0.4695220996836796,
9697
+ "grad_norm": 0.3625487685203552,
9698
+ "learning_rate": 1.2320855711169887e-05,
9699
+ "loss": 0.627,
9700
+ "step": 1373
9701
+ },
9702
+ {
9703
+ "epoch": 0.46986406770966915,
9704
+ "grad_norm": 0.3447592854499817,
9705
+ "learning_rate": 1.222788211120901e-05,
9706
+ "loss": 0.5964,
9707
+ "step": 1374
9708
+ },
9709
+ {
9710
+ "epoch": 0.4702060357356587,
9711
+ "grad_norm": 0.39788079261779785,
9712
+ "learning_rate": 1.21352377830101e-05,
9713
+ "loss": 0.4275,
9714
+ "step": 1375
9715
+ },
9716
+ {
9717
+ "epoch": 0.4705480037616483,
9718
+ "grad_norm": 0.33655157685279846,
9719
+ "learning_rate": 1.2042923074122702e-05,
9720
+ "loss": 0.49,
9721
+ "step": 1376
9722
+ },
9723
+ {
9724
+ "epoch": 0.47088997178763786,
9725
+ "grad_norm": 0.5279715061187744,
9726
+ "learning_rate": 1.1950938330859861e-05,
9727
+ "loss": 0.6838,
9728
+ "step": 1377
9729
+ },
9730
+ {
9731
+ "epoch": 0.47123193981362743,
9732
+ "grad_norm": 0.5920473337173462,
9733
+ "learning_rate": 1.1859283898296735e-05,
9734
+ "loss": 0.6467,
9735
+ "step": 1378
9736
+ },
9737
+ {
9738
+ "epoch": 0.471573907839617,
9739
+ "grad_norm": 0.2907107472419739,
9740
+ "learning_rate": 1.1767960120269328e-05,
9741
+ "loss": 0.4565,
9742
+ "step": 1379
9743
+ },
9744
+ {
9745
+ "epoch": 0.47191587586560657,
9746
+ "grad_norm": 0.37272509932518005,
9747
+ "learning_rate": 1.1676967339373245e-05,
9748
+ "loss": 0.5862,
9749
+ "step": 1380
9750
+ },
9751
+ {
9752
+ "epoch": 0.47225784389159614,
9753
+ "grad_norm": 0.479973167181015,
9754
+ "learning_rate": 1.1586305896962412e-05,
9755
+ "loss": 0.7466,
9756
+ "step": 1381
9757
+ },
9758
+ {
9759
+ "epoch": 0.4725998119175857,
9760
+ "grad_norm": 0.33694228529930115,
9761
+ "learning_rate": 1.1495976133147668e-05,
9762
+ "loss": 0.456,
9763
+ "step": 1382
9764
+ },
9765
+ {
9766
+ "epoch": 0.4729417799435753,
9767
+ "grad_norm": 0.4428293704986572,
9768
+ "learning_rate": 1.1405978386795636e-05,
9769
+ "loss": 0.5808,
9770
+ "step": 1383
9771
+ },
9772
+ {
9773
+ "epoch": 0.47328374796956485,
9774
+ "grad_norm": 0.49617043137550354,
9775
+ "learning_rate": 1.1316312995527423e-05,
9776
+ "loss": 0.5857,
9777
+ "step": 1384
9778
+ },
9779
+ {
9780
+ "epoch": 0.4736257159955544,
9781
+ "grad_norm": 0.37273940443992615,
9782
+ "learning_rate": 1.1226980295717248e-05,
9783
+ "loss": 0.4798,
9784
+ "step": 1385
9785
+ },
9786
+ {
9787
+ "epoch": 0.473967684021544,
9788
+ "grad_norm": 0.3818013668060303,
9789
+ "learning_rate": 1.113798062249134e-05,
9790
+ "loss": 0.5763,
9791
+ "step": 1386
9792
+ },
9793
+ {
9794
+ "epoch": 0.47430965204753356,
9795
+ "grad_norm": 0.31985703110694885,
9796
+ "learning_rate": 1.1049314309726533e-05,
9797
+ "loss": 0.4325,
9798
+ "step": 1387
9799
+ },
9800
+ {
9801
+ "epoch": 0.47465162007352313,
9802
+ "grad_norm": 0.4047496020793915,
9803
+ "learning_rate": 1.0960981690049099e-05,
9804
+ "loss": 0.6111,
9805
+ "step": 1388
9806
+ },
9807
+ {
9808
+ "epoch": 0.4749935880995127,
9809
+ "grad_norm": 0.4212690591812134,
9810
+ "learning_rate": 1.08729830948335e-05,
9811
+ "loss": 0.5258,
9812
+ "step": 1389
9813
+ },
9814
+ {
9815
+ "epoch": 0.47533555612550227,
9816
+ "grad_norm": 0.34776535630226135,
9817
+ "learning_rate": 1.0785318854201142e-05,
9818
+ "loss": 0.5997,
9819
+ "step": 1390
9820
+ },
9821
+ {
9822
+ "epoch": 0.47567752415149184,
9823
+ "grad_norm": 0.5136944055557251,
9824
+ "learning_rate": 1.069798929701904e-05,
9825
+ "loss": 0.5356,
9826
+ "step": 1391
9827
+ },
9828
+ {
9829
+ "epoch": 0.4760194921774814,
9830
+ "grad_norm": 0.3962900936603546,
9831
+ "learning_rate": 1.0610994750898739e-05,
9832
+ "loss": 0.5057,
9833
+ "step": 1392
9834
+ },
9835
+ {
9836
+ "epoch": 0.476361460203471,
9837
+ "grad_norm": 0.49814558029174805,
9838
+ "learning_rate": 1.0524335542194996e-05,
9839
+ "loss": 0.4861,
9840
+ "step": 1393
9841
+ },
9842
+ {
9843
+ "epoch": 0.47670342822946055,
9844
+ "grad_norm": 0.3843591511249542,
9845
+ "learning_rate": 1.0438011996004581e-05,
9846
+ "loss": 0.5785,
9847
+ "step": 1394
9848
+ },
9849
+ {
9850
+ "epoch": 0.4770453962554501,
9851
+ "grad_norm": 0.32255926728248596,
9852
+ "learning_rate": 1.0352024436164975e-05,
9853
+ "loss": 0.5117,
9854
+ "step": 1395
9855
+ },
9856
+ {
9857
+ "epoch": 0.4773873642814397,
9858
+ "grad_norm": 0.4465799927711487,
9859
+ "learning_rate": 1.026637318525333e-05,
9860
+ "loss": 0.68,
9861
+ "step": 1396
9862
+ },
9863
+ {
9864
+ "epoch": 0.47772933230742926,
9865
+ "grad_norm": 0.3948836922645569,
9866
+ "learning_rate": 1.0181058564585088e-05,
9867
+ "loss": 0.7162,
9868
+ "step": 1397
9869
+ },
9870
+ {
9871
+ "epoch": 0.4780713003334188,
9872
+ "grad_norm": 0.7754755616188049,
9873
+ "learning_rate": 1.0096080894212833e-05,
9874
+ "loss": 0.6089,
9875
+ "step": 1398
9876
+ },
9877
+ {
9878
+ "epoch": 0.4784132683594084,
9879
+ "grad_norm": 0.37649857997894287,
9880
+ "learning_rate": 1.001144049292514e-05,
9881
+ "loss": 0.5256,
9882
+ "step": 1399
9883
+ },
9884
+ {
9885
+ "epoch": 0.47875523638539796,
9886
+ "grad_norm": 0.4943767786026001,
9887
+ "learning_rate": 9.927137678245357e-06,
9888
+ "loss": 0.4086,
9889
+ "step": 1400
9890
+ },
9891
+ {
9892
+ "epoch": 0.47909720441138753,
9893
+ "grad_norm": 0.6237683892250061,
9894
+ "learning_rate": 9.843172766430331e-06,
9895
+ "loss": 0.6381,
9896
+ "step": 1401
9897
+ },
9898
+ {
9899
+ "epoch": 0.4794391724373771,
9900
+ "grad_norm": 0.4005008935928345,
9901
+ "learning_rate": 9.759546072469328e-06,
9902
+ "loss": 0.5005,
9903
+ "step": 1402
9904
+ },
9905
+ {
9906
+ "epoch": 0.4797811404633667,
9907
+ "grad_norm": 0.3757031559944153,
9908
+ "learning_rate": 9.67625791008283e-06,
9909
+ "loss": 0.507,
9910
+ "step": 1403
9911
+ },
9912
+ {
9913
+ "epoch": 0.48012310848935624,
9914
+ "grad_norm": 0.32949626445770264,
9915
+ "learning_rate": 9.593308591721273e-06,
9916
+ "loss": 0.4857,
9917
+ "step": 1404
9918
+ },
9919
+ {
9920
+ "epoch": 0.4804650765153458,
9921
+ "grad_norm": 0.31370505690574646,
9922
+ "learning_rate": 9.510698428564014e-06,
9923
+ "loss": 0.5032,
9924
+ "step": 1405
9925
+ },
9926
+ {
9927
+ "epoch": 0.4808070445413354,
9928
+ "grad_norm": 0.45027247071266174,
9929
+ "learning_rate": 9.428427730518053e-06,
9930
+ "loss": 0.7439,
9931
+ "step": 1406
9932
+ },
9933
+ {
9934
+ "epoch": 0.48114901256732495,
9935
+ "grad_norm": 0.28915834426879883,
9936
+ "learning_rate": 9.346496806216864e-06,
9937
+ "loss": 0.4361,
9938
+ "step": 1407
9939
+ },
9940
+ {
9941
+ "epoch": 0.4814909805933145,
9942
+ "grad_norm": 0.3978510797023773,
9943
+ "learning_rate": 9.264905963019376e-06,
9944
+ "loss": 0.566,
9945
+ "step": 1408
9946
+ },
9947
+ {
9948
+ "epoch": 0.4818329486193041,
9949
+ "grad_norm": 0.6509686708450317,
9950
+ "learning_rate": 9.18365550700867e-06,
9951
+ "loss": 0.6359,
9952
+ "step": 1409
9953
+ },
9954
+ {
9955
+ "epoch": 0.48217491664529366,
9956
+ "grad_norm": 0.34217292070388794,
9957
+ "learning_rate": 9.10274574299087e-06,
9958
+ "loss": 0.5832,
9959
+ "step": 1410
9960
+ },
9961
+ {
9962
+ "epoch": 0.48251688467128323,
9963
+ "grad_norm": 0.36280882358551025,
9964
+ "learning_rate": 9.022176974494034e-06,
9965
+ "loss": 0.6527,
9966
+ "step": 1411
9967
+ },
9968
+ {
9969
+ "epoch": 0.4828588526972728,
9970
+ "grad_norm": 0.31427791714668274,
9971
+ "learning_rate": 8.941949503767034e-06,
9972
+ "loss": 0.4786,
9973
+ "step": 1412
9974
+ },
9975
+ {
9976
+ "epoch": 0.48320082072326237,
9977
+ "grad_norm": 0.2932129204273224,
9978
+ "learning_rate": 8.862063631778306e-06,
9979
+ "loss": 0.3879,
9980
+ "step": 1413
9981
+ },
9982
+ {
9983
+ "epoch": 0.48354278874925194,
9984
+ "grad_norm": 0.3400621712207794,
9985
+ "learning_rate": 8.78251965821485e-06,
9986
+ "loss": 0.3863,
9987
+ "step": 1414
9988
+ },
9989
+ {
9990
+ "epoch": 0.4838847567752415,
9991
+ "grad_norm": 0.4857083559036255,
9992
+ "learning_rate": 8.703317881481066e-06,
9993
+ "loss": 0.4673,
9994
+ "step": 1415
9995
+ },
9996
+ {
9997
+ "epoch": 0.4842267248012311,
9998
+ "grad_norm": 0.480949342250824,
9999
+ "learning_rate": 8.624458598697582e-06,
10000
+ "loss": 0.493,
10001
+ "step": 1416
10002
+ },
10003
+ {
10004
+ "epoch": 0.48456869282722065,
10005
+ "grad_norm": 0.42186641693115234,
10006
+ "learning_rate": 8.545942105700188e-06,
10007
+ "loss": 0.4314,
10008
+ "step": 1417
10009
+ },
10010
+ {
10011
+ "epoch": 0.4849106608532102,
10012
+ "grad_norm": 0.3342297077178955,
10013
+ "learning_rate": 8.467768697038769e-06,
10014
+ "loss": 0.6254,
10015
+ "step": 1418
10016
+ },
10017
+ {
10018
+ "epoch": 0.4852526288791998,
10019
+ "grad_norm": 0.33367452025413513,
10020
+ "learning_rate": 8.389938665976083e-06,
10021
+ "loss": 0.6116,
10022
+ "step": 1419
10023
+ },
10024
+ {
10025
+ "epoch": 0.48559459690518936,
10026
+ "grad_norm": 0.3980502784252167,
10027
+ "learning_rate": 8.312452304486761e-06,
10028
+ "loss": 0.5527,
10029
+ "step": 1420
10030
+ },
10031
+ {
10032
+ "epoch": 0.4859365649311789,
10033
+ "grad_norm": 0.3666388690471649,
10034
+ "learning_rate": 8.235309903256206e-06,
10035
+ "loss": 0.4088,
10036
+ "step": 1421
10037
+ },
10038
+ {
10039
+ "epoch": 0.4862785329571685,
10040
+ "grad_norm": 0.45284703373908997,
10041
+ "learning_rate": 8.158511751679399e-06,
10042
+ "loss": 0.5529,
10043
+ "step": 1422
10044
+ },
10045
+ {
10046
+ "epoch": 0.48662050098315807,
10047
+ "grad_norm": 0.385841965675354,
10048
+ "learning_rate": 8.08205813785996e-06,
10049
+ "loss": 0.4688,
10050
+ "step": 1423
10051
+ },
10052
+ {
10053
+ "epoch": 0.48696246900914764,
10054
+ "grad_norm": 0.4751913547515869,
10055
+ "learning_rate": 8.005949348608976e-06,
10056
+ "loss": 0.5193,
10057
+ "step": 1424
10058
+ },
10059
+ {
10060
+ "epoch": 0.4873044370351372,
10061
+ "grad_norm": 0.41884270310401917,
10062
+ "learning_rate": 7.930185669443946e-06,
10063
+ "loss": 0.6109,
10064
+ "step": 1425
10065
+ },
10066
+ {
10067
+ "epoch": 0.4876464050611268,
10068
+ "grad_norm": 0.32201844453811646,
10069
+ "learning_rate": 7.854767384587669e-06,
10070
+ "loss": 0.598,
10071
+ "step": 1426
10072
+ },
10073
+ {
10074
+ "epoch": 0.48798837308711634,
10075
+ "grad_norm": 0.41289442777633667,
10076
+ "learning_rate": 7.779694776967295e-06,
10077
+ "loss": 0.5104,
10078
+ "step": 1427
10079
+ },
10080
+ {
10081
+ "epoch": 0.4883303411131059,
10082
+ "grad_norm": 0.3158438801765442,
10083
+ "learning_rate": 7.704968128213108e-06,
10084
+ "loss": 0.5495,
10085
+ "step": 1428
10086
+ },
10087
+ {
10088
+ "epoch": 0.4886723091390955,
10089
+ "grad_norm": 0.5073384642601013,
10090
+ "learning_rate": 7.6305877186576e-06,
10091
+ "loss": 0.4147,
10092
+ "step": 1429
10093
+ },
10094
+ {
10095
+ "epoch": 0.48901427716508505,
10096
+ "grad_norm": 0.33985650539398193,
10097
+ "learning_rate": 7.556553827334367e-06,
10098
+ "loss": 0.6625,
10099
+ "step": 1430
10100
+ },
10101
+ {
10102
+ "epoch": 0.4893562451910746,
10103
+ "grad_norm": 0.3775363266468048,
10104
+ "learning_rate": 7.482866731977056e-06,
10105
+ "loss": 0.6464,
10106
+ "step": 1431
10107
+ },
10108
+ {
10109
+ "epoch": 0.4896982132170642,
10110
+ "grad_norm": 0.3615787625312805,
10111
+ "learning_rate": 7.4095267090182885e-06,
10112
+ "loss": 0.5859,
10113
+ "step": 1432
10114
+ },
10115
+ {
10116
+ "epoch": 0.49004018124305376,
10117
+ "grad_norm": 0.44534218311309814,
10118
+ "learning_rate": 7.336534033588727e-06,
10119
+ "loss": 0.9093,
10120
+ "step": 1433
10121
+ },
10122
+ {
10123
+ "epoch": 0.49038214926904333,
10124
+ "grad_norm": 0.3275962471961975,
10125
+ "learning_rate": 7.263888979515954e-06,
10126
+ "loss": 0.4235,
10127
+ "step": 1434
10128
+ },
10129
+ {
10130
+ "epoch": 0.4907241172950329,
10131
+ "grad_norm": 0.4237425625324249,
10132
+ "learning_rate": 7.191591819323473e-06,
10133
+ "loss": 0.5503,
10134
+ "step": 1435
10135
+ },
10136
+ {
10137
+ "epoch": 0.49106608532102247,
10138
+ "grad_norm": 0.41267287731170654,
10139
+ "learning_rate": 7.119642824229655e-06,
10140
+ "loss": 0.6242,
10141
+ "step": 1436
10142
+ },
10143
+ {
10144
+ "epoch": 0.49140805334701204,
10145
+ "grad_norm": 0.43237781524658203,
10146
+ "learning_rate": 7.048042264146815e-06,
10147
+ "loss": 0.6045,
10148
+ "step": 1437
10149
+ },
10150
+ {
10151
+ "epoch": 0.4917500213730016,
10152
+ "grad_norm": 0.33868464827537537,
10153
+ "learning_rate": 6.976790407680067e-06,
10154
+ "loss": 0.535,
10155
+ "step": 1438
10156
+ },
10157
+ {
10158
+ "epoch": 0.4920919893989912,
10159
+ "grad_norm": 0.37420621514320374,
10160
+ "learning_rate": 6.90588752212643e-06,
10161
+ "loss": 0.5514,
10162
+ "step": 1439
10163
+ },
10164
+ {
10165
+ "epoch": 0.49243395742498075,
10166
+ "grad_norm": 0.3844612240791321,
10167
+ "learning_rate": 6.835333873473793e-06,
10168
+ "loss": 0.6164,
10169
+ "step": 1440
10170
+ },
10171
+ {
10172
+ "epoch": 0.4927759254509703,
10173
+ "grad_norm": 0.4005042016506195,
10174
+ "learning_rate": 6.765129726399844e-06,
10175
+ "loss": 0.6643,
10176
+ "step": 1441
10177
+ },
10178
+ {
10179
+ "epoch": 0.4931178934769599,
10180
+ "grad_norm": 0.3358938992023468,
10181
+ "learning_rate": 6.695275344271168e-06,
10182
+ "loss": 0.4994,
10183
+ "step": 1442
10184
+ },
10185
+ {
10186
+ "epoch": 0.49345986150294946,
10187
+ "grad_norm": 0.3401714861392975,
10188
+ "learning_rate": 6.625770989142266e-06,
10189
+ "loss": 0.6264,
10190
+ "step": 1443
10191
+ },
10192
+ {
10193
+ "epoch": 0.49380182952893903,
10194
+ "grad_norm": 0.49171924591064453,
10195
+ "learning_rate": 6.5566169217544886e-06,
10196
+ "loss": 0.2698,
10197
+ "step": 1444
10198
+ },
10199
+ {
10200
+ "epoch": 0.4941437975549286,
10201
+ "grad_norm": 0.49176937341690063,
10202
+ "learning_rate": 6.4878134015350815e-06,
10203
+ "loss": 0.5954,
10204
+ "step": 1445
10205
+ },
10206
+ {
10207
+ "epoch": 0.49448576558091817,
10208
+ "grad_norm": 0.4451077878475189,
10209
+ "learning_rate": 6.419360686596298e-06,
10210
+ "loss": 0.5165,
10211
+ "step": 1446
10212
+ },
10213
+ {
10214
+ "epoch": 0.49482773360690774,
10215
+ "grad_norm": 0.4469234049320221,
10216
+ "learning_rate": 6.351259033734314e-06,
10217
+ "loss": 0.6972,
10218
+ "step": 1447
10219
+ },
10220
+ {
10221
+ "epoch": 0.4951697016328973,
10222
+ "grad_norm": 0.39374667406082153,
10223
+ "learning_rate": 6.283508698428353e-06,
10224
+ "loss": 0.4433,
10225
+ "step": 1448
10226
+ },
10227
+ {
10228
+ "epoch": 0.4955116696588869,
10229
+ "grad_norm": 0.40193474292755127,
10230
+ "learning_rate": 6.216109934839687e-06,
10231
+ "loss": 0.5459,
10232
+ "step": 1449
10233
+ },
10234
+ {
10235
+ "epoch": 0.49585363768487645,
10236
+ "grad_norm": 0.299493670463562,
10237
+ "learning_rate": 6.149062995810639e-06,
10238
+ "loss": 0.4521,
10239
+ "step": 1450
10240
+ },
10241
+ {
10242
+ "epoch": 0.496195605710866,
10243
+ "grad_norm": 0.3158107399940491,
10244
+ "learning_rate": 6.082368132863758e-06,
10245
+ "loss": 0.4194,
10246
+ "step": 1451
10247
+ },
10248
+ {
10249
+ "epoch": 0.4965375737368556,
10250
+ "grad_norm": 0.39207759499549866,
10251
+ "learning_rate": 6.0160255962007694e-06,
10252
+ "loss": 0.6173,
10253
+ "step": 1452
10254
+ },
10255
+ {
10256
+ "epoch": 0.49687954176284516,
10257
+ "grad_norm": 0.36359909176826477,
10258
+ "learning_rate": 5.950035634701645e-06,
10259
+ "loss": 0.5063,
10260
+ "step": 1453
10261
+ },
10262
+ {
10263
+ "epoch": 0.4972215097888347,
10264
+ "grad_norm": 0.3674727976322174,
10265
+ "learning_rate": 5.884398495923727e-06,
10266
+ "loss": 0.5094,
10267
+ "step": 1454
10268
+ },
10269
+ {
10270
+ "epoch": 0.4975634778148243,
10271
+ "grad_norm": 0.48032888770103455,
10272
+ "learning_rate": 5.8191144261007465e-06,
10273
+ "loss": 0.4669,
10274
+ "step": 1455
10275
+ },
10276
+ {
10277
+ "epoch": 0.49790544584081387,
10278
+ "grad_norm": 0.3460092544555664,
10279
+ "learning_rate": 5.754183670141888e-06,
10280
+ "loss": 0.5367,
10281
+ "step": 1456
10282
+ },
10283
+ {
10284
+ "epoch": 0.49824741386680343,
10285
+ "grad_norm": 0.2881573438644409,
10286
+ "learning_rate": 5.68960647163097e-06,
10287
+ "loss": 0.4381,
10288
+ "step": 1457
10289
+ },
10290
+ {
10291
+ "epoch": 0.498589381892793,
10292
+ "grad_norm": 0.4344151020050049,
10293
+ "learning_rate": 5.625383072825429e-06,
10294
+ "loss": 0.5184,
10295
+ "step": 1458
10296
+ },
10297
+ {
10298
+ "epoch": 0.4989313499187826,
10299
+ "grad_norm": 0.40367579460144043,
10300
+ "learning_rate": 5.561513714655419e-06,
10301
+ "loss": 0.6386,
10302
+ "step": 1459
10303
+ },
10304
+ {
10305
+ "epoch": 0.49927331794477214,
10306
+ "grad_norm": 0.5705030560493469,
10307
+ "learning_rate": 5.4979986367229564e-06,
10308
+ "loss": 0.5825,
10309
+ "step": 1460
10310
+ },
10311
+ {
10312
+ "epoch": 0.4996152859707617,
10313
+ "grad_norm": 0.3498372435569763,
10314
+ "learning_rate": 5.4348380773010075e-06,
10315
+ "loss": 0.4944,
10316
+ "step": 1461
10317
+ },
10318
+ {
10319
+ "epoch": 0.4999572539967513,
10320
+ "grad_norm": 0.4295266270637512,
10321
+ "learning_rate": 5.37203227333255e-06,
10322
+ "loss": 0.4896,
10323
+ "step": 1462
10324
+ },
10325
+ {
10326
+ "epoch": 0.5002992220227409,
10327
+ "grad_norm": 0.4837034344673157,
10328
+ "learning_rate": 5.3095814604297574e-06,
10329
+ "loss": 0.5036,
10330
+ "step": 1463
10331
+ },
10332
+ {
10333
+ "epoch": 0.5006411900487304,
10334
+ "grad_norm": 0.31095609068870544,
10335
+ "learning_rate": 5.247485872873026e-06,
10336
+ "loss": 0.4891,
10337
+ "step": 1464
10338
+ },
10339
+ {
10340
+ "epoch": 0.50098315807472,
10341
+ "grad_norm": 0.3047396242618561,
10342
+ "learning_rate": 5.185745743610215e-06,
10343
+ "loss": 0.5915,
10344
+ "step": 1465
10345
+ },
10346
+ {
10347
+ "epoch": 0.5013251261007096,
10348
+ "grad_norm": 0.471611350774765,
10349
+ "learning_rate": 5.124361304255632e-06,
10350
+ "loss": 0.5939,
10351
+ "step": 1466
10352
+ },
10353
+ {
10354
+ "epoch": 0.5016670941266992,
10355
+ "grad_norm": 0.526499330997467,
10356
+ "learning_rate": 5.063332785089281e-06,
10357
+ "loss": 0.4065,
10358
+ "step": 1467
10359
+ },
10360
+ {
10361
+ "epoch": 0.5020090621526887,
10362
+ "grad_norm": 0.34760183095932007,
10363
+ "learning_rate": 5.002660415055949e-06,
10364
+ "loss": 0.3351,
10365
+ "step": 1468
10366
+ },
10367
+ {
10368
+ "epoch": 0.5023510301786783,
10369
+ "grad_norm": 0.3515227138996124,
10370
+ "learning_rate": 4.942344421764322e-06,
10371
+ "loss": 0.4433,
10372
+ "step": 1469
10373
+ },
10374
+ {
10375
+ "epoch": 0.5026929982046678,
10376
+ "grad_norm": 0.38687199354171753,
10377
+ "learning_rate": 4.8823850314861915e-06,
10378
+ "loss": 0.562,
10379
+ "step": 1470
10380
+ },
10381
+ {
10382
+ "epoch": 0.5030349662306575,
10383
+ "grad_norm": 0.517970860004425,
10384
+ "learning_rate": 4.822782469155573e-06,
10385
+ "loss": 0.4598,
10386
+ "step": 1471
10387
+ },
10388
+ {
10389
+ "epoch": 0.503376934256647,
10390
+ "grad_norm": 0.40455347299575806,
10391
+ "learning_rate": 4.7635369583678425e-06,
10392
+ "loss": 0.5014,
10393
+ "step": 1472
10394
+ },
10395
+ {
10396
+ "epoch": 0.5037189022826366,
10397
+ "grad_norm": 0.38876280188560486,
10398
+ "learning_rate": 4.7046487213789344e-06,
10399
+ "loss": 0.5155,
10400
+ "step": 1473
10401
+ },
10402
+ {
10403
+ "epoch": 0.5040608703086261,
10404
+ "grad_norm": 0.4026382565498352,
10405
+ "learning_rate": 4.646117979104481e-06,
10406
+ "loss": 0.63,
10407
+ "step": 1474
10408
+ },
10409
+ {
10410
+ "epoch": 0.5044028383346157,
10411
+ "grad_norm": 0.398532897233963,
10412
+ "learning_rate": 4.587944951118994e-06,
10413
+ "loss": 0.5957,
10414
+ "step": 1475
10415
+ },
10416
+ {
10417
+ "epoch": 0.5047448063606053,
10418
+ "grad_norm": 0.4192051291465759,
10419
+ "learning_rate": 4.5301298556550746e-06,
10420
+ "loss": 0.6054,
10421
+ "step": 1476
10422
+ },
10423
+ {
10424
+ "epoch": 0.5050867743865949,
10425
+ "grad_norm": 0.47235310077667236,
10426
+ "learning_rate": 4.472672909602527e-06,
10427
+ "loss": 0.5894,
10428
+ "step": 1477
10429
+ },
10430
+ {
10431
+ "epoch": 0.5054287424125844,
10432
+ "grad_norm": 0.33159035444259644,
10433
+ "learning_rate": 4.415574328507577e-06,
10434
+ "loss": 0.4497,
10435
+ "step": 1478
10436
+ },
10437
+ {
10438
+ "epoch": 0.505770710438574,
10439
+ "grad_norm": 0.3283701241016388,
10440
+ "learning_rate": 4.358834326572092e-06,
10441
+ "loss": 0.5675,
10442
+ "step": 1479
10443
+ },
10444
+ {
10445
+ "epoch": 0.5061126784645635,
10446
+ "grad_norm": 0.4659315347671509,
10447
+ "learning_rate": 4.3024531166527495e-06,
10448
+ "loss": 0.4861,
10449
+ "step": 1480
10450
+ },
10451
+ {
10452
+ "epoch": 0.5064546464905532,
10453
+ "grad_norm": 0.424280047416687,
10454
+ "learning_rate": 4.246430910260191e-06,
10455
+ "loss": 0.6762,
10456
+ "step": 1481
10457
+ },
10458
+ {
10459
+ "epoch": 0.5067966145165427,
10460
+ "grad_norm": 0.4000745713710785,
10461
+ "learning_rate": 4.190767917558369e-06,
10462
+ "loss": 0.6834,
10463
+ "step": 1482
10464
+ },
10465
+ {
10466
+ "epoch": 0.5071385825425323,
10467
+ "grad_norm": 0.49172914028167725,
10468
+ "learning_rate": 4.135464347363571e-06,
10469
+ "loss": 0.6214,
10470
+ "step": 1483
10471
+ },
10472
+ {
10473
+ "epoch": 0.5074805505685218,
10474
+ "grad_norm": 0.35509413480758667,
10475
+ "learning_rate": 4.080520407143795e-06,
10476
+ "loss": 0.5186,
10477
+ "step": 1484
10478
+ },
10479
+ {
10480
+ "epoch": 0.5078225185945114,
10481
+ "grad_norm": 0.3477623164653778,
10482
+ "learning_rate": 4.025936303017897e-06,
10483
+ "loss": 0.4725,
10484
+ "step": 1485
10485
+ },
10486
+ {
10487
+ "epoch": 0.508164486620501,
10488
+ "grad_norm": 0.3009217381477356,
10489
+ "learning_rate": 3.97171223975481e-06,
10490
+ "loss": 0.4228,
10491
+ "step": 1486
10492
+ },
10493
+ {
10494
+ "epoch": 0.5085064546464906,
10495
+ "grad_norm": 0.30407074093818665,
10496
+ "learning_rate": 3.917848420772818e-06,
10497
+ "loss": 0.4755,
10498
+ "step": 1487
10499
+ },
10500
+ {
10501
+ "epoch": 0.5088484226724801,
10502
+ "grad_norm": 0.5132246613502502,
10503
+ "learning_rate": 3.864345048138751e-06,
10504
+ "loss": 0.5118,
10505
+ "step": 1488
10506
+ },
10507
+ {
10508
+ "epoch": 0.5091903906984697,
10509
+ "grad_norm": 0.5252590775489807,
10510
+ "learning_rate": 3.8112023225672755e-06,
10511
+ "loss": 0.5199,
10512
+ "step": 1489
10513
+ },
10514
+ {
10515
+ "epoch": 0.5095323587244592,
10516
+ "grad_norm": 0.4393605589866638,
10517
+ "learning_rate": 3.758420443420085e-06,
10518
+ "loss": 0.5528,
10519
+ "step": 1490
10520
+ },
10521
+ {
10522
+ "epoch": 0.5098743267504489,
10523
+ "grad_norm": 0.591144323348999,
10524
+ "learning_rate": 3.7059996087051795e-06,
10525
+ "loss": 0.7475,
10526
+ "step": 1491
10527
+ },
10528
+ {
10529
+ "epoch": 0.5102162947764384,
10530
+ "grad_norm": 0.5281643867492676,
10531
+ "learning_rate": 3.6539400150761515e-06,
10532
+ "loss": 0.6121,
10533
+ "step": 1492
10534
+ },
10535
+ {
10536
+ "epoch": 0.510558262802428,
10537
+ "grad_norm": 0.241044819355011,
10538
+ "learning_rate": 3.60224185783139e-06,
10539
+ "loss": 0.3838,
10540
+ "step": 1493
10541
+ },
10542
+ {
10543
+ "epoch": 0.5109002308284175,
10544
+ "grad_norm": 0.5283997058868408,
10545
+ "learning_rate": 3.5509053309133897e-06,
10546
+ "loss": 0.4947,
10547
+ "step": 1494
10548
+ },
10549
+ {
10550
+ "epoch": 0.5112421988544071,
10551
+ "grad_norm": 0.5331889986991882,
10552
+ "learning_rate": 3.49993062690801e-06,
10553
+ "loss": 0.5312,
10554
+ "step": 1495
10555
+ },
10556
+ {
10557
+ "epoch": 0.5115841668803967,
10558
+ "grad_norm": 0.5044165253639221,
10559
+ "learning_rate": 3.449317937043728e-06,
10560
+ "loss": 0.4942,
10561
+ "step": 1496
10562
+ },
10563
+ {
10564
+ "epoch": 0.5119261349063863,
10565
+ "grad_norm": 0.38311871886253357,
10566
+ "learning_rate": 3.399067451191007e-06,
10567
+ "loss": 0.594,
10568
+ "step": 1497
10569
+ },
10570
+ {
10571
+ "epoch": 0.5122681029323758,
10572
+ "grad_norm": 0.4012651741504669,
10573
+ "learning_rate": 3.349179357861487e-06,
10574
+ "loss": 0.4338,
10575
+ "step": 1498
10576
+ },
10577
+ {
10578
+ "epoch": 0.5126100709583654,
10579
+ "grad_norm": 0.38673850893974304,
10580
+ "learning_rate": 3.2996538442072844e-06,
10581
+ "loss": 0.5927,
10582
+ "step": 1499
10583
+ },
10584
+ {
10585
+ "epoch": 0.5129520389843549,
10586
+ "grad_norm": 0.3748995363712311,
10587
+ "learning_rate": 3.2504910960203694e-06,
10588
+ "loss": 0.3867,
10589
+ "step": 1500
10590
+ },
10591
+ {
10592
+ "epoch": 0.5129520389843549,
10593
+ "eval_loss": 0.5886463522911072,
10594
+ "eval_runtime": 231.3531,
10595
+ "eval_samples_per_second": 5.325,
10596
+ "eval_steps_per_second": 2.663,
10597
+ "step": 1500
10598
  }
10599
  ],
10600
  "logging_steps": 1,
 
10623
  "attributes": {}
10624
  }
10625
  },
10626
+ "total_flos": 1.3256905345676083e+18,
10627
  "train_batch_size": 2,
10628
  "trial_name": null,
10629
  "trial_params": null