error577 commited on
Commit
aacfd93
·
verified ·
1 Parent(s): 8cd0ec3

Training in progress, step 6200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a931b6ad4a85eda42c42572924144cf0eea641774fec52d499f7972a6ccb62d2
3
  size 21253336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83a7c07e51238afaa7c40d988f34e3738452e0f7d0951dbdfd2d1a2ee920e6f4
3
  size 21253336
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e588e286d0da677e5f962330503e921f28155b2192231140e86298b423bc5703
3
  size 10952762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca245b5c5818c369c760d40c19814c70787271c71cb16b41a34ef13138831071
3
  size 10952762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6e7c43be1453c924301f96a84540b222f3e58594279f89fa4e6934480c47a73
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:004276e1bb014253bb15da64b4c20e1898f1faddc15b7d64ac8624fcece1991d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d37e9d4f4c602b5950845361f0e35ef489bd85b554b321fd8096a2d871b2de83
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48d5aa75a71817f6d3ca1f492d6f89412df01d1749b3c6a5faf3ff446d681f07
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.12408096343278885,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-5600",
4
- "epoch": 2.881498379157162,
5
  "eval_steps": 200,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -42255,6 +42255,1414 @@
42255
  "eval_samples_per_second": 157.556,
42256
  "eval_steps_per_second": 6.584,
42257
  "step": 6000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42258
  }
42259
  ],
42260
  "logging_steps": 1,
@@ -42269,7 +43677,7 @@
42269
  "early_stopping_threshold": 0.0
42270
  },
42271
  "attributes": {
42272
- "early_stopping_patience_counter": 2
42273
  }
42274
  },
42275
  "TrainerControl": {
@@ -42278,12 +43686,12 @@
42278
  "should_evaluate": false,
42279
  "should_log": false,
42280
  "should_save": true,
42281
- "should_training_stop": false
42282
  },
42283
  "attributes": {}
42284
  }
42285
  },
42286
- "total_flos": 1.4438504463335424e+17,
42287
  "train_batch_size": 24,
42288
  "trial_name": null,
42289
  "trial_params": null
 
1
  {
2
  "best_metric": 0.12408096343278885,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-5600",
4
+ "epoch": 2.9775483251290673,
5
  "eval_steps": 200,
6
+ "global_step": 6200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
42255
  "eval_samples_per_second": 157.556,
42256
  "eval_steps_per_second": 6.584,
42257
  "step": 6000
42258
+ },
42259
+ {
42260
+ "epoch": 2.8819786288870213,
42261
+ "grad_norm": 1.059691309928894,
42262
+ "learning_rate": 7.656405705057435e-07,
42263
+ "loss": 0.5278,
42264
+ "step": 6001
42265
+ },
42266
+ {
42267
+ "epoch": 2.8824588786168808,
42268
+ "grad_norm": 0.8245054483413696,
42269
+ "learning_rate": 7.594111053758623e-07,
42270
+ "loss": 0.4993,
42271
+ "step": 6002
42272
+ },
42273
+ {
42274
+ "epoch": 2.8829391283467403,
42275
+ "grad_norm": 1.2777291536331177,
42276
+ "learning_rate": 7.532069896093675e-07,
42277
+ "loss": 0.5102,
42278
+ "step": 6003
42279
+ },
42280
+ {
42281
+ "epoch": 2.8834193780765998,
42282
+ "grad_norm": 1.0950323343276978,
42283
+ "learning_rate": 7.470282247910132e-07,
42284
+ "loss": 0.4979,
42285
+ "step": 6004
42286
+ },
42287
+ {
42288
+ "epoch": 2.8838996278064593,
42289
+ "grad_norm": 0.9586261510848999,
42290
+ "learning_rate": 7.408748124990594e-07,
42291
+ "loss": 0.4817,
42292
+ "step": 6005
42293
+ },
42294
+ {
42295
+ "epoch": 2.8843798775363187,
42296
+ "grad_norm": 1.327382206916809,
42297
+ "learning_rate": 7.347467543052932e-07,
42298
+ "loss": 0.544,
42299
+ "step": 6006
42300
+ },
42301
+ {
42302
+ "epoch": 2.8848601272661787,
42303
+ "grad_norm": 0.7627050876617432,
42304
+ "learning_rate": 7.286440517750181e-07,
42305
+ "loss": 0.4609,
42306
+ "step": 6007
42307
+ },
42308
+ {
42309
+ "epoch": 2.885340376996038,
42310
+ "grad_norm": 0.9781989455223083,
42311
+ "learning_rate": 7.22566706467076e-07,
42312
+ "loss": 0.5256,
42313
+ "step": 6008
42314
+ },
42315
+ {
42316
+ "epoch": 2.8858206267258977,
42317
+ "grad_norm": 1.4245251417160034,
42318
+ "learning_rate": 7.16514719933803e-07,
42319
+ "loss": 0.5307,
42320
+ "step": 6009
42321
+ },
42322
+ {
42323
+ "epoch": 2.886300876455757,
42324
+ "grad_norm": 1.1358163356781006,
42325
+ "learning_rate": 7.104880937211178e-07,
42326
+ "loss": 0.5862,
42327
+ "step": 6010
42328
+ },
42329
+ {
42330
+ "epoch": 2.8867811261856167,
42331
+ "grad_norm": 0.8729516863822937,
42332
+ "learning_rate": 7.044868293683893e-07,
42333
+ "loss": 0.4935,
42334
+ "step": 6011
42335
+ },
42336
+ {
42337
+ "epoch": 2.887261375915476,
42338
+ "grad_norm": 1.374247670173645,
42339
+ "learning_rate": 6.985109284085578e-07,
42340
+ "loss": 0.5493,
42341
+ "step": 6012
42342
+ },
42343
+ {
42344
+ "epoch": 2.8877416256453357,
42345
+ "grad_norm": 1.301226258277893,
42346
+ "learning_rate": 6.925603923680579e-07,
42347
+ "loss": 0.5663,
42348
+ "step": 6013
42349
+ },
42350
+ {
42351
+ "epoch": 2.888221875375195,
42352
+ "grad_norm": 1.1913409233093262,
42353
+ "learning_rate": 6.866352227668626e-07,
42354
+ "loss": 0.5533,
42355
+ "step": 6014
42356
+ },
42357
+ {
42358
+ "epoch": 2.8887021251050546,
42359
+ "grad_norm": 0.9043430685997009,
42360
+ "learning_rate": 6.807354211184613e-07,
42361
+ "loss": 0.4747,
42362
+ "step": 6015
42363
+ },
42364
+ {
42365
+ "epoch": 2.889182374834914,
42366
+ "grad_norm": 0.9711902737617493,
42367
+ "learning_rate": 6.748609889298596e-07,
42368
+ "loss": 0.5061,
42369
+ "step": 6016
42370
+ },
42371
+ {
42372
+ "epoch": 2.8896626245647736,
42373
+ "grad_norm": 1.0060513019561768,
42374
+ "learning_rate": 6.690119277015683e-07,
42375
+ "loss": 0.4921,
42376
+ "step": 6017
42377
+ },
42378
+ {
42379
+ "epoch": 2.890142874294633,
42380
+ "grad_norm": 1.0134211778640747,
42381
+ "learning_rate": 6.631882389276478e-07,
42382
+ "loss": 0.5235,
42383
+ "step": 6018
42384
+ },
42385
+ {
42386
+ "epoch": 2.8906231240244926,
42387
+ "grad_norm": 0.9697439670562744,
42388
+ "learning_rate": 6.573899240956527e-07,
42389
+ "loss": 0.4913,
42390
+ "step": 6019
42391
+ },
42392
+ {
42393
+ "epoch": 2.891103373754352,
42394
+ "grad_norm": 0.8868252635002136,
42395
+ "learning_rate": 6.51616984686676e-07,
42396
+ "loss": 0.4761,
42397
+ "step": 6020
42398
+ },
42399
+ {
42400
+ "epoch": 2.8915836234842116,
42401
+ "grad_norm": 0.9749808311462402,
42402
+ "learning_rate": 6.45869422175327e-07,
42403
+ "loss": 0.4921,
42404
+ "step": 6021
42405
+ },
42406
+ {
42407
+ "epoch": 2.892063873214071,
42408
+ "grad_norm": 0.9134318232536316,
42409
+ "learning_rate": 6.401472380297091e-07,
42410
+ "loss": 0.4945,
42411
+ "step": 6022
42412
+ },
42413
+ {
42414
+ "epoch": 2.8925441229439306,
42415
+ "grad_norm": 1.094684362411499,
42416
+ "learning_rate": 6.344504337114643e-07,
42417
+ "loss": 0.5318,
42418
+ "step": 6023
42419
+ },
42420
+ {
42421
+ "epoch": 2.8930243726737905,
42422
+ "grad_norm": 1.2772841453552246,
42423
+ "learning_rate": 6.287790106757396e-07,
42424
+ "loss": 0.4856,
42425
+ "step": 6024
42426
+ },
42427
+ {
42428
+ "epoch": 2.89350462240365,
42429
+ "grad_norm": 0.9391446113586426,
42430
+ "learning_rate": 6.231329703712207e-07,
42431
+ "loss": 0.525,
42432
+ "step": 6025
42433
+ },
42434
+ {
42435
+ "epoch": 2.8939848721335095,
42436
+ "grad_norm": 1.024240493774414,
42437
+ "learning_rate": 6.175123142400985e-07,
42438
+ "loss": 0.495,
42439
+ "step": 6026
42440
+ },
42441
+ {
42442
+ "epoch": 2.894465121863369,
42443
+ "grad_norm": 0.9741129279136658,
42444
+ "learning_rate": 6.119170437180466e-07,
42445
+ "loss": 0.5282,
42446
+ "step": 6027
42447
+ },
42448
+ {
42449
+ "epoch": 2.8949453715932285,
42450
+ "grad_norm": 0.9005728363990784,
42451
+ "learning_rate": 6.063471602343218e-07,
42452
+ "loss": 0.5017,
42453
+ "step": 6028
42454
+ },
42455
+ {
42456
+ "epoch": 2.895425621323088,
42457
+ "grad_norm": 1.0421838760375977,
42458
+ "learning_rate": 6.008026652116305e-07,
42459
+ "loss": 0.4854,
42460
+ "step": 6029
42461
+ },
42462
+ {
42463
+ "epoch": 2.8959058710529475,
42464
+ "grad_norm": 0.951676070690155,
42465
+ "learning_rate": 5.952835600662288e-07,
42466
+ "loss": 0.4992,
42467
+ "step": 6030
42468
+ },
42469
+ {
42470
+ "epoch": 2.896386120782807,
42471
+ "grad_norm": 0.9691091775894165,
42472
+ "learning_rate": 5.897898462078888e-07,
42473
+ "loss": 0.5214,
42474
+ "step": 6031
42475
+ },
42476
+ {
42477
+ "epoch": 2.8968663705126665,
42478
+ "grad_norm": 0.8523342609405518,
42479
+ "learning_rate": 5.843215250398882e-07,
42480
+ "loss": 0.4728,
42481
+ "step": 6032
42482
+ },
42483
+ {
42484
+ "epoch": 2.897346620242526,
42485
+ "grad_norm": 0.9989683032035828,
42486
+ "learning_rate": 5.788785979590095e-07,
42487
+ "loss": 0.4664,
42488
+ "step": 6033
42489
+ },
42490
+ {
42491
+ "epoch": 2.8978268699723855,
42492
+ "grad_norm": 1.4147025346755981,
42493
+ "learning_rate": 5.734610663555628e-07,
42494
+ "loss": 0.5576,
42495
+ "step": 6034
42496
+ },
42497
+ {
42498
+ "epoch": 2.8983071197022454,
42499
+ "grad_norm": 1.0927692651748657,
42500
+ "learning_rate": 5.680689316133636e-07,
42501
+ "loss": 0.5354,
42502
+ "step": 6035
42503
+ },
42504
+ {
42505
+ "epoch": 2.898787369432105,
42506
+ "grad_norm": 1.0628910064697266,
42507
+ "learning_rate": 5.627021951097545e-07,
42508
+ "loss": 0.4913,
42509
+ "step": 6036
42510
+ },
42511
+ {
42512
+ "epoch": 2.8992676191619644,
42513
+ "grad_norm": 0.9283753037452698,
42514
+ "learning_rate": 5.573608582155721e-07,
42515
+ "loss": 0.5329,
42516
+ "step": 6037
42517
+ },
42518
+ {
42519
+ "epoch": 2.899747868891824,
42520
+ "grad_norm": 0.8634472489356995,
42521
+ "learning_rate": 5.520449222951585e-07,
42522
+ "loss": 0.4574,
42523
+ "step": 6038
42524
+ },
42525
+ {
42526
+ "epoch": 2.9002281186216834,
42527
+ "grad_norm": 0.9945970177650452,
42528
+ "learning_rate": 5.467543887064053e-07,
42529
+ "loss": 0.4786,
42530
+ "step": 6039
42531
+ },
42532
+ {
42533
+ "epoch": 2.900708368351543,
42534
+ "grad_norm": 1.0942494869232178,
42535
+ "learning_rate": 5.41489258800687e-07,
42536
+ "loss": 0.5294,
42537
+ "step": 6040
42538
+ },
42539
+ {
42540
+ "epoch": 2.9011886180814024,
42541
+ "grad_norm": 1.1848549842834473,
42542
+ "learning_rate": 5.362495339228834e-07,
42543
+ "loss": 0.5498,
42544
+ "step": 6041
42545
+ },
42546
+ {
42547
+ "epoch": 2.901668867811262,
42548
+ "grad_norm": 1.313307762145996,
42549
+ "learning_rate": 5.310352154113907e-07,
42550
+ "loss": 0.524,
42551
+ "step": 6042
42552
+ },
42553
+ {
42554
+ "epoch": 2.9021491175411214,
42555
+ "grad_norm": 1.1878790855407715,
42556
+ "learning_rate": 5.258463045981432e-07,
42557
+ "loss": 0.5353,
42558
+ "step": 6043
42559
+ },
42560
+ {
42561
+ "epoch": 2.902629367270981,
42562
+ "grad_norm": 1.283842921257019,
42563
+ "learning_rate": 5.206828028085364e-07,
42564
+ "loss": 0.5463,
42565
+ "step": 6044
42566
+ },
42567
+ {
42568
+ "epoch": 2.9031096170008404,
42569
+ "grad_norm": 1.003977656364441,
42570
+ "learning_rate": 5.155447113615153e-07,
42571
+ "loss": 0.4998,
42572
+ "step": 6045
42573
+ },
42574
+ {
42575
+ "epoch": 2.9035898667307,
42576
+ "grad_norm": 0.9022426009178162,
42577
+ "learning_rate": 5.104320315695188e-07,
42578
+ "loss": 0.5119,
42579
+ "step": 6046
42580
+ },
42581
+ {
42582
+ "epoch": 2.9040701164605593,
42583
+ "grad_norm": 0.957444965839386,
42584
+ "learning_rate": 5.053447647385023e-07,
42585
+ "loss": 0.4884,
42586
+ "step": 6047
42587
+ },
42588
+ {
42589
+ "epoch": 2.904550366190419,
42590
+ "grad_norm": 0.9504349827766418,
42591
+ "learning_rate": 5.002829121679153e-07,
42592
+ "loss": 0.532,
42593
+ "step": 6048
42594
+ },
42595
+ {
42596
+ "epoch": 2.9050306159202783,
42597
+ "grad_norm": 1.0048445463180542,
42598
+ "learning_rate": 4.952464751507235e-07,
42599
+ "loss": 0.5297,
42600
+ "step": 6049
42601
+ },
42602
+ {
42603
+ "epoch": 2.905510865650138,
42604
+ "grad_norm": 0.8786672949790955,
42605
+ "learning_rate": 4.902354549733978e-07,
42606
+ "loss": 0.4675,
42607
+ "step": 6050
42608
+ },
42609
+ {
42610
+ "epoch": 2.9059911153799973,
42611
+ "grad_norm": 1.1563762426376343,
42612
+ "learning_rate": 4.852498529159366e-07,
42613
+ "loss": 0.5548,
42614
+ "step": 6051
42615
+ },
42616
+ {
42617
+ "epoch": 2.9064713651098573,
42618
+ "grad_norm": 1.21589195728302,
42619
+ "learning_rate": 4.802896702518101e-07,
42620
+ "loss": 0.5141,
42621
+ "step": 6052
42622
+ },
42623
+ {
42624
+ "epoch": 2.9069516148397168,
42625
+ "grad_norm": 1.1137498617172241,
42626
+ "learning_rate": 4.7535490824802686e-07,
42627
+ "loss": 0.4656,
42628
+ "step": 6053
42629
+ },
42630
+ {
42631
+ "epoch": 2.9074318645695763,
42632
+ "grad_norm": 0.9434409737586975,
42633
+ "learning_rate": 4.704455681650788e-07,
42634
+ "loss": 0.5169,
42635
+ "step": 6054
42636
+ },
42637
+ {
42638
+ "epoch": 2.9079121142994357,
42639
+ "grad_norm": 1.3749167919158936,
42640
+ "learning_rate": 4.6556165125699604e-07,
42641
+ "loss": 0.5646,
42642
+ "step": 6055
42643
+ },
42644
+ {
42645
+ "epoch": 2.9083923640292952,
42646
+ "grad_norm": 0.9305620789527893,
42647
+ "learning_rate": 4.6070315877126957e-07,
42648
+ "loss": 0.4873,
42649
+ "step": 6056
42650
+ },
42651
+ {
42652
+ "epoch": 2.9088726137591547,
42653
+ "grad_norm": 1.0196714401245117,
42654
+ "learning_rate": 4.5587009194894004e-07,
42655
+ "loss": 0.5063,
42656
+ "step": 6057
42657
+ },
42658
+ {
42659
+ "epoch": 2.9093528634890142,
42660
+ "grad_norm": 1.0446749925613403,
42661
+ "learning_rate": 4.51062452024531e-07,
42662
+ "loss": 0.4861,
42663
+ "step": 6058
42664
+ },
42665
+ {
42666
+ "epoch": 2.9098331132188737,
42667
+ "grad_norm": 1.1384238004684448,
42668
+ "learning_rate": 4.462802402260602e-07,
42669
+ "loss": 0.5303,
42670
+ "step": 6059
42671
+ },
42672
+ {
42673
+ "epoch": 2.910313362948733,
42674
+ "grad_norm": 0.9151965975761414,
42675
+ "learning_rate": 4.415234577750726e-07,
42676
+ "loss": 0.5,
42677
+ "step": 6060
42678
+ },
42679
+ {
42680
+ "epoch": 2.9107936126785927,
42681
+ "grad_norm": 2.2920780181884766,
42682
+ "learning_rate": 4.3679210588661866e-07,
42683
+ "loss": 0.6922,
42684
+ "step": 6061
42685
+ },
42686
+ {
42687
+ "epoch": 2.9112738624084527,
42688
+ "grad_norm": 0.9247322678565979,
42689
+ "learning_rate": 4.320861857692315e-07,
42690
+ "loss": 0.5214,
42691
+ "step": 6062
42692
+ },
42693
+ {
42694
+ "epoch": 2.911754112138312,
42695
+ "grad_norm": 0.8479911088943481,
42696
+ "learning_rate": 4.2740569862497193e-07,
42697
+ "loss": 0.4689,
42698
+ "step": 6063
42699
+ },
42700
+ {
42701
+ "epoch": 2.9122343618681716,
42702
+ "grad_norm": 1.0244017839431763,
42703
+ "learning_rate": 4.227506456493835e-07,
42704
+ "loss": 0.5034,
42705
+ "step": 6064
42706
+ },
42707
+ {
42708
+ "epoch": 2.912714611598031,
42709
+ "grad_norm": 0.9050112962722778,
42710
+ "learning_rate": 4.181210280315151e-07,
42711
+ "loss": 0.4883,
42712
+ "step": 6065
42713
+ },
42714
+ {
42715
+ "epoch": 2.9131948613278906,
42716
+ "grad_norm": 0.8834241628646851,
42717
+ "learning_rate": 4.13516846953943e-07,
42718
+ "loss": 0.4912,
42719
+ "step": 6066
42720
+ },
42721
+ {
42722
+ "epoch": 2.91367511105775,
42723
+ "grad_norm": 1.1294069290161133,
42724
+ "learning_rate": 4.0893810359272645e-07,
42725
+ "loss": 0.5391,
42726
+ "step": 6067
42727
+ },
42728
+ {
42729
+ "epoch": 2.9141553607876096,
42730
+ "grad_norm": 1.0348907709121704,
42731
+ "learning_rate": 4.043847991174188e-07,
42732
+ "loss": 0.5425,
42733
+ "step": 6068
42734
+ },
42735
+ {
42736
+ "epoch": 2.914635610517469,
42737
+ "grad_norm": 0.9479727149009705,
42738
+ "learning_rate": 3.9985693469108966e-07,
42739
+ "loss": 0.4748,
42740
+ "step": 6069
42741
+ },
42742
+ {
42743
+ "epoch": 2.9151158602473286,
42744
+ "grad_norm": 1.0968918800354004,
42745
+ "learning_rate": 3.953545114703139e-07,
42746
+ "loss": 0.5308,
42747
+ "step": 6070
42748
+ },
42749
+ {
42750
+ "epoch": 2.915596109977188,
42751
+ "grad_norm": 1.1369953155517578,
42752
+ "learning_rate": 3.908775306051604e-07,
42753
+ "loss": 0.5205,
42754
+ "step": 6071
42755
+ },
42756
+ {
42757
+ "epoch": 2.9160763597070476,
42758
+ "grad_norm": 0.9642462730407715,
42759
+ "learning_rate": 3.864259932391923e-07,
42760
+ "loss": 0.5189,
42761
+ "step": 6072
42762
+ },
42763
+ {
42764
+ "epoch": 2.916556609436907,
42765
+ "grad_norm": 1.1746912002563477,
42766
+ "learning_rate": 3.819999005094776e-07,
42767
+ "loss": 0.5465,
42768
+ "step": 6073
42769
+ },
42770
+ {
42771
+ "epoch": 2.9170368591667666,
42772
+ "grad_norm": 0.9538284540176392,
42773
+ "learning_rate": 3.775992535466011e-07,
42774
+ "loss": 0.5,
42775
+ "step": 6074
42776
+ },
42777
+ {
42778
+ "epoch": 2.917517108896626,
42779
+ "grad_norm": 1.2396122217178345,
42780
+ "learning_rate": 3.732240534746301e-07,
42781
+ "loss": 0.5408,
42782
+ "step": 6075
42783
+ },
42784
+ {
42785
+ "epoch": 2.9179973586264856,
42786
+ "grad_norm": 0.9474676847457886,
42787
+ "learning_rate": 3.688743014111262e-07,
42788
+ "loss": 0.4935,
42789
+ "step": 6076
42790
+ },
42791
+ {
42792
+ "epoch": 2.918477608356345,
42793
+ "grad_norm": 1.2469604015350342,
42794
+ "learning_rate": 3.6454999846717855e-07,
42795
+ "loss": 0.5144,
42796
+ "step": 6077
42797
+ },
42798
+ {
42799
+ "epoch": 2.9189578580862046,
42800
+ "grad_norm": 1.8287394046783447,
42801
+ "learning_rate": 3.6025114574734785e-07,
42802
+ "loss": 0.5973,
42803
+ "step": 6078
42804
+ },
42805
+ {
42806
+ "epoch": 2.9194381078160645,
42807
+ "grad_norm": 1.6717603206634521,
42808
+ "learning_rate": 3.5597774434971143e-07,
42809
+ "loss": 0.4906,
42810
+ "step": 6079
42811
+ },
42812
+ {
42813
+ "epoch": 2.919918357545924,
42814
+ "grad_norm": 0.9443919658660889,
42815
+ "learning_rate": 3.517297953658405e-07,
42816
+ "loss": 0.4733,
42817
+ "step": 6080
42818
+ },
42819
+ {
42820
+ "epoch": 2.9203986072757835,
42821
+ "grad_norm": 0.8544421792030334,
42822
+ "learning_rate": 3.4750729988078934e-07,
42823
+ "loss": 0.4918,
42824
+ "step": 6081
42825
+ },
42826
+ {
42827
+ "epoch": 2.920878857005643,
42828
+ "grad_norm": 0.949880838394165,
42829
+ "learning_rate": 3.4331025897313964e-07,
42830
+ "loss": 0.4985,
42831
+ "step": 6082
42832
+ },
42833
+ {
42834
+ "epoch": 2.9213591067355025,
42835
+ "grad_norm": 1.0241985321044922,
42836
+ "learning_rate": 3.391386737149449e-07,
42837
+ "loss": 0.5004,
42838
+ "step": 6083
42839
+ },
42840
+ {
42841
+ "epoch": 2.921839356465362,
42842
+ "grad_norm": 0.9273295402526855,
42843
+ "learning_rate": 3.3499254517177503e-07,
42844
+ "loss": 0.5129,
42845
+ "step": 6084
42846
+ },
42847
+ {
42848
+ "epoch": 2.9223196061952215,
42849
+ "grad_norm": 1.095513105392456,
42850
+ "learning_rate": 3.3087187440268287e-07,
42851
+ "loss": 0.5352,
42852
+ "step": 6085
42853
+ },
42854
+ {
42855
+ "epoch": 2.922799855925081,
42856
+ "grad_norm": 0.8524777293205261,
42857
+ "learning_rate": 3.267766624602375e-07,
42858
+ "loss": 0.4843,
42859
+ "step": 6086
42860
+ },
42861
+ {
42862
+ "epoch": 2.9232801056549405,
42863
+ "grad_norm": 0.8703526258468628,
42864
+ "learning_rate": 3.2270691039048004e-07,
42865
+ "loss": 0.4754,
42866
+ "step": 6087
42867
+ },
42868
+ {
42869
+ "epoch": 2.9237603553848,
42870
+ "grad_norm": 1.0583552122116089,
42871
+ "learning_rate": 3.1866261923296783e-07,
42872
+ "loss": 0.5172,
42873
+ "step": 6088
42874
+ },
42875
+ {
42876
+ "epoch": 2.92424060511466,
42877
+ "grad_norm": 0.9731481671333313,
42878
+ "learning_rate": 3.146437900207411e-07,
42879
+ "loss": 0.5157,
42880
+ "step": 6089
42881
+ },
42882
+ {
42883
+ "epoch": 2.9247208548445194,
42884
+ "grad_norm": 0.992031455039978,
42885
+ "learning_rate": 3.1065042378034535e-07,
42886
+ "loss": 0.4894,
42887
+ "step": 6090
42888
+ },
42889
+ {
42890
+ "epoch": 2.925201104574379,
42891
+ "grad_norm": 0.9598735570907593,
42892
+ "learning_rate": 3.066825215318314e-07,
42893
+ "loss": 0.4762,
42894
+ "step": 6091
42895
+ },
42896
+ {
42897
+ "epoch": 2.9256813543042384,
42898
+ "grad_norm": 1.3992528915405273,
42899
+ "learning_rate": 3.027400842887218e-07,
42900
+ "loss": 0.5185,
42901
+ "step": 6092
42902
+ },
42903
+ {
42904
+ "epoch": 2.926161604034098,
42905
+ "grad_norm": 1.0443490743637085,
42906
+ "learning_rate": 2.988231130580554e-07,
42907
+ "loss": 0.5149,
42908
+ "step": 6093
42909
+ },
42910
+ {
42911
+ "epoch": 2.9266418537639574,
42912
+ "grad_norm": 1.0154131650924683,
42913
+ "learning_rate": 2.949316088403542e-07,
42914
+ "loss": 0.5188,
42915
+ "step": 6094
42916
+ },
42917
+ {
42918
+ "epoch": 2.927122103493817,
42919
+ "grad_norm": 0.9042947888374329,
42920
+ "learning_rate": 2.91065572629623e-07,
42921
+ "loss": 0.5095,
42922
+ "step": 6095
42923
+ },
42924
+ {
42925
+ "epoch": 2.9276023532236763,
42926
+ "grad_norm": 0.8214787244796753,
42927
+ "learning_rate": 2.872250054134051e-07,
42928
+ "loss": 0.4628,
42929
+ "step": 6096
42930
+ },
42931
+ {
42932
+ "epoch": 2.928082602953536,
42933
+ "grad_norm": 1.3600598573684692,
42934
+ "learning_rate": 2.8340990817269377e-07,
42935
+ "loss": 0.4978,
42936
+ "step": 6097
42937
+ },
42938
+ {
42939
+ "epoch": 2.9285628526833953,
42940
+ "grad_norm": 1.0531233549118042,
42941
+ "learning_rate": 2.7962028188198706e-07,
42942
+ "loss": 0.5192,
42943
+ "step": 6098
42944
+ },
42945
+ {
42946
+ "epoch": 2.929043102413255,
42947
+ "grad_norm": 2.1405086517333984,
42948
+ "learning_rate": 2.758561275092886e-07,
42949
+ "loss": 0.583,
42950
+ "step": 6099
42951
+ },
42952
+ {
42953
+ "epoch": 2.9295233521431143,
42954
+ "grad_norm": 1.4039134979248047,
42955
+ "learning_rate": 2.721174460160958e-07,
42956
+ "loss": 0.5415,
42957
+ "step": 6100
42958
+ },
42959
+ {
42960
+ "epoch": 2.930003601872974,
42961
+ "grad_norm": 0.9771497845649719,
42962
+ "learning_rate": 2.6840423835738926e-07,
42963
+ "loss": 0.4586,
42964
+ "step": 6101
42965
+ },
42966
+ {
42967
+ "epoch": 2.9304838516028333,
42968
+ "grad_norm": 1.1628845930099487,
42969
+ "learning_rate": 2.647165054816325e-07,
42970
+ "loss": 0.4963,
42971
+ "step": 6102
42972
+ },
42973
+ {
42974
+ "epoch": 2.930964101332693,
42975
+ "grad_norm": 1.2514691352844238,
42976
+ "learning_rate": 2.610542483308165e-07,
42977
+ "loss": 0.6303,
42978
+ "step": 6103
42979
+ },
42980
+ {
42981
+ "epoch": 2.9314443510625523,
42982
+ "grad_norm": 1.0992681980133057,
42983
+ "learning_rate": 2.574174678403818e-07,
42984
+ "loss": 0.5069,
42985
+ "step": 6104
42986
+ },
42987
+ {
42988
+ "epoch": 2.931924600792412,
42989
+ "grad_norm": 0.8845913410186768,
42990
+ "learning_rate": 2.5380616493930753e-07,
42991
+ "loss": 0.4808,
42992
+ "step": 6105
42993
+ },
42994
+ {
42995
+ "epoch": 2.9324048505222717,
42996
+ "grad_norm": 1.2922005653381348,
42997
+ "learning_rate": 2.5022034055003364e-07,
42998
+ "loss": 0.554,
42999
+ "step": 6106
43000
+ },
43001
+ {
43002
+ "epoch": 2.9328851002521312,
43003
+ "grad_norm": 0.9516582489013672,
43004
+ "learning_rate": 2.4665999558848296e-07,
43005
+ "loss": 0.4622,
43006
+ "step": 6107
43007
+ },
43008
+ {
43009
+ "epoch": 2.9333653499819907,
43010
+ "grad_norm": 0.8160479664802551,
43011
+ "learning_rate": 2.431251309641058e-07,
43012
+ "loss": 0.4416,
43013
+ "step": 6108
43014
+ },
43015
+ {
43016
+ "epoch": 2.93384559971185,
43017
+ "grad_norm": 1.0128464698791504,
43018
+ "learning_rate": 2.396157475798244e-07,
43019
+ "loss": 0.5294,
43020
+ "step": 6109
43021
+ },
43022
+ {
43023
+ "epoch": 2.9343258494417097,
43024
+ "grad_norm": 0.9867235422134399,
43025
+ "learning_rate": 2.361318463320439e-07,
43026
+ "loss": 0.5171,
43027
+ "step": 6110
43028
+ },
43029
+ {
43030
+ "epoch": 2.934806099171569,
43031
+ "grad_norm": 1.2857671976089478,
43032
+ "learning_rate": 2.326734281106746e-07,
43033
+ "loss": 0.4839,
43034
+ "step": 6111
43035
+ },
43036
+ {
43037
+ "epoch": 2.9352863489014287,
43038
+ "grad_norm": 1.1951898336410522,
43039
+ "learning_rate": 2.2924049379909884e-07,
43040
+ "loss": 0.5143,
43041
+ "step": 6112
43042
+ },
43043
+ {
43044
+ "epoch": 2.935766598631288,
43045
+ "grad_norm": 1.0616852045059204,
43046
+ "learning_rate": 2.2583304427421515e-07,
43047
+ "loss": 0.5287,
43048
+ "step": 6113
43049
+ },
43050
+ {
43051
+ "epoch": 2.9362468483611477,
43052
+ "grad_norm": 0.8602057099342346,
43053
+ "learning_rate": 2.2245108040640504e-07,
43054
+ "loss": 0.467,
43055
+ "step": 6114
43056
+ },
43057
+ {
43058
+ "epoch": 2.936727098091007,
43059
+ "grad_norm": 1.0095205307006836,
43060
+ "learning_rate": 2.190946030595331e-07,
43061
+ "loss": 0.5164,
43062
+ "step": 6115
43063
+ },
43064
+ {
43065
+ "epoch": 2.937207347820867,
43066
+ "grad_norm": 0.9440839886665344,
43067
+ "learning_rate": 2.1576361309093573e-07,
43068
+ "loss": 0.5245,
43069
+ "step": 6116
43070
+ },
43071
+ {
43072
+ "epoch": 2.9376875975507266,
43073
+ "grad_norm": 1.0187512636184692,
43074
+ "learning_rate": 2.1245811135148785e-07,
43075
+ "loss": 0.5355,
43076
+ "step": 6117
43077
+ },
43078
+ {
43079
+ "epoch": 2.938167847280586,
43080
+ "grad_norm": 1.1666048765182495,
43081
+ "learning_rate": 2.0917809868550298e-07,
43082
+ "loss": 0.5446,
43083
+ "step": 6118
43084
+ },
43085
+ {
43086
+ "epoch": 2.9386480970104456,
43087
+ "grad_norm": 1.482763409614563,
43088
+ "learning_rate": 2.05923575930822e-07,
43089
+ "loss": 0.5024,
43090
+ "step": 6119
43091
+ },
43092
+ {
43093
+ "epoch": 2.939128346740305,
43094
+ "grad_norm": 0.8116981387138367,
43095
+ "learning_rate": 2.0269454391874666e-07,
43096
+ "loss": 0.5036,
43097
+ "step": 6120
43098
+ },
43099
+ {
43100
+ "epoch": 2.9396085964701646,
43101
+ "grad_norm": 1.0943511724472046,
43102
+ "learning_rate": 1.9949100347409488e-07,
43103
+ "loss": 0.5927,
43104
+ "step": 6121
43105
+ },
43106
+ {
43107
+ "epoch": 2.940088846200024,
43108
+ "grad_norm": 1.011591911315918,
43109
+ "learning_rate": 1.9631295541513438e-07,
43110
+ "loss": 0.4953,
43111
+ "step": 6122
43112
+ },
43113
+ {
43114
+ "epoch": 2.9405690959298836,
43115
+ "grad_norm": 1.0537197589874268,
43116
+ "learning_rate": 1.9316040055366024e-07,
43117
+ "loss": 0.5189,
43118
+ "step": 6123
43119
+ },
43120
+ {
43121
+ "epoch": 2.941049345659743,
43122
+ "grad_norm": 0.989774227142334,
43123
+ "learning_rate": 1.9003333969493942e-07,
43124
+ "loss": 0.5258,
43125
+ "step": 6124
43126
+ },
43127
+ {
43128
+ "epoch": 2.9415295953896026,
43129
+ "grad_norm": 1.2443652153015137,
43130
+ "learning_rate": 1.8693177363773295e-07,
43131
+ "loss": 0.5496,
43132
+ "step": 6125
43133
+ },
43134
+ {
43135
+ "epoch": 2.942009845119462,
43136
+ "grad_norm": 1.1581547260284424,
43137
+ "learning_rate": 1.838557031742738e-07,
43138
+ "loss": 0.5221,
43139
+ "step": 6126
43140
+ },
43141
+ {
43142
+ "epoch": 2.9424900948493216,
43143
+ "grad_norm": 0.8082072734832764,
43144
+ "learning_rate": 1.8080512909028903e-07,
43145
+ "loss": 0.4449,
43146
+ "step": 6127
43147
+ },
43148
+ {
43149
+ "epoch": 2.942970344579181,
43150
+ "grad_norm": 0.9887663125991821,
43151
+ "learning_rate": 1.7778005216502192e-07,
43152
+ "loss": 0.5425,
43153
+ "step": 6128
43154
+ },
43155
+ {
43156
+ "epoch": 2.9434505943090405,
43157
+ "grad_norm": 1.3568744659423828,
43158
+ "learning_rate": 1.7478047317115442e-07,
43159
+ "loss": 0.5477,
43160
+ "step": 6129
43161
+ },
43162
+ {
43163
+ "epoch": 2.9439308440389,
43164
+ "grad_norm": 0.9268665909767151,
43165
+ "learning_rate": 1.7180639287488476e-07,
43166
+ "loss": 0.4908,
43167
+ "step": 6130
43168
+ },
43169
+ {
43170
+ "epoch": 2.9444110937687595,
43171
+ "grad_norm": 1.0100455284118652,
43172
+ "learning_rate": 1.6885781203589413e-07,
43173
+ "loss": 0.4889,
43174
+ "step": 6131
43175
+ },
43176
+ {
43177
+ "epoch": 2.944891343498619,
43178
+ "grad_norm": 1.098319172859192,
43179
+ "learning_rate": 1.6593473140734673e-07,
43180
+ "loss": 0.4875,
43181
+ "step": 6132
43182
+ },
43183
+ {
43184
+ "epoch": 2.945371593228479,
43185
+ "grad_norm": 1.1685726642608643,
43186
+ "learning_rate": 1.6303715173590085e-07,
43187
+ "loss": 0.519,
43188
+ "step": 6133
43189
+ },
43190
+ {
43191
+ "epoch": 2.9458518429583385,
43192
+ "grad_norm": 1.1108653545379639,
43193
+ "learning_rate": 1.6016507376169777e-07,
43194
+ "loss": 0.51,
43195
+ "step": 6134
43196
+ },
43197
+ {
43198
+ "epoch": 2.946332092688198,
43199
+ "grad_norm": 1.1678224802017212,
43200
+ "learning_rate": 1.5731849821833954e-07,
43201
+ "loss": 0.5359,
43202
+ "step": 6135
43203
+ },
43204
+ {
43205
+ "epoch": 2.9468123424180575,
43206
+ "grad_norm": 0.9436126947402954,
43207
+ "learning_rate": 1.544974258329668e-07,
43208
+ "loss": 0.4739,
43209
+ "step": 6136
43210
+ },
43211
+ {
43212
+ "epoch": 2.947292592147917,
43213
+ "grad_norm": 1.0964453220367432,
43214
+ "learning_rate": 1.5170185732615861e-07,
43215
+ "loss": 0.5157,
43216
+ "step": 6137
43217
+ },
43218
+ {
43219
+ "epoch": 2.9477728418777764,
43220
+ "grad_norm": 1.0992753505706787,
43221
+ "learning_rate": 1.4893179341199936e-07,
43222
+ "loss": 0.5308,
43223
+ "step": 6138
43224
+ },
43225
+ {
43226
+ "epoch": 2.948253091607636,
43227
+ "grad_norm": 1.032257080078125,
43228
+ "learning_rate": 1.461872347980564e-07,
43229
+ "loss": 0.5315,
43230
+ "step": 6139
43231
+ },
43232
+ {
43233
+ "epoch": 2.9487333413374954,
43234
+ "grad_norm": 1.2790552377700806,
43235
+ "learning_rate": 1.4346818218539116e-07,
43236
+ "loss": 0.5357,
43237
+ "step": 6140
43238
+ },
43239
+ {
43240
+ "epoch": 2.949213591067355,
43241
+ "grad_norm": 0.9251823425292969,
43242
+ "learning_rate": 1.4077463626852582e-07,
43243
+ "loss": 0.5084,
43244
+ "step": 6141
43245
+ },
43246
+ {
43247
+ "epoch": 2.9496938407972144,
43248
+ "grad_norm": 0.8136346936225891,
43249
+ "learning_rate": 1.3810659773547675e-07,
43250
+ "loss": 0.491,
43251
+ "step": 6142
43252
+ },
43253
+ {
43254
+ "epoch": 2.9501740905270744,
43255
+ "grad_norm": 1.1861847639083862,
43256
+ "learning_rate": 1.354640672677765e-07,
43257
+ "loss": 0.5209,
43258
+ "step": 6143
43259
+ },
43260
+ {
43261
+ "epoch": 2.950654340256934,
43262
+ "grad_norm": 1.9485915899276733,
43263
+ "learning_rate": 1.3284704554039628e-07,
43264
+ "loss": 0.5816,
43265
+ "step": 6144
43266
+ },
43267
+ {
43268
+ "epoch": 2.9511345899867933,
43269
+ "grad_norm": 1.0041749477386475,
43270
+ "learning_rate": 1.302555332218125e-07,
43271
+ "loss": 0.4833,
43272
+ "step": 6145
43273
+ },
43274
+ {
43275
+ "epoch": 2.951614839716653,
43276
+ "grad_norm": 0.9592583775520325,
43277
+ "learning_rate": 1.2768953097398451e-07,
43278
+ "loss": 0.4865,
43279
+ "step": 6146
43280
+ },
43281
+ {
43282
+ "epoch": 2.9520950894465123,
43283
+ "grad_norm": 0.9189772009849548,
43284
+ "learning_rate": 1.2514903945235468e-07,
43285
+ "loss": 0.4895,
43286
+ "step": 6147
43287
+ },
43288
+ {
43289
+ "epoch": 2.952575339176372,
43290
+ "grad_norm": 0.9006944894790649,
43291
+ "learning_rate": 1.2263405930585948e-07,
43292
+ "loss": 0.4491,
43293
+ "step": 6148
43294
+ },
43295
+ {
43296
+ "epoch": 2.9530555889062313,
43297
+ "grad_norm": 0.9726596474647522,
43298
+ "learning_rate": 1.2014459117689613e-07,
43299
+ "loss": 0.5209,
43300
+ "step": 6149
43301
+ },
43302
+ {
43303
+ "epoch": 2.953535838636091,
43304
+ "grad_norm": 1.1057935953140259,
43305
+ "learning_rate": 1.1768063570136711e-07,
43306
+ "loss": 0.561,
43307
+ "step": 6150
43308
+ },
43309
+ {
43310
+ "epoch": 2.9540160883659503,
43311
+ "grad_norm": 0.9758303761482239,
43312
+ "learning_rate": 1.1524219350863563e-07,
43313
+ "loss": 0.4964,
43314
+ "step": 6151
43315
+ },
43316
+ {
43317
+ "epoch": 2.95449633809581,
43318
+ "grad_norm": 0.9577860832214355,
43319
+ "learning_rate": 1.1282926522158122e-07,
43320
+ "loss": 0.4965,
43321
+ "step": 6152
43322
+ },
43323
+ {
43324
+ "epoch": 2.9549765878256693,
43325
+ "grad_norm": 1.118918776512146,
43326
+ "learning_rate": 1.1044185145653307e-07,
43327
+ "loss": 0.5035,
43328
+ "step": 6153
43329
+ },
43330
+ {
43331
+ "epoch": 2.955456837555529,
43332
+ "grad_norm": 1.3922710418701172,
43333
+ "learning_rate": 1.0807995282332562e-07,
43334
+ "loss": 0.543,
43335
+ "step": 6154
43336
+ },
43337
+ {
43338
+ "epoch": 2.9559370872853883,
43339
+ "grad_norm": 1.3283149003982544,
43340
+ "learning_rate": 1.0574356992525403e-07,
43341
+ "loss": 0.5835,
43342
+ "step": 6155
43343
+ },
43344
+ {
43345
+ "epoch": 2.956417337015248,
43346
+ "grad_norm": 0.9737280607223511,
43347
+ "learning_rate": 1.0343270335910759e-07,
43348
+ "loss": 0.5028,
43349
+ "step": 6156
43350
+ },
43351
+ {
43352
+ "epoch": 2.9568975867451073,
43353
+ "grad_norm": 0.8545934557914734,
43354
+ "learning_rate": 1.011473537151697e-07,
43355
+ "loss": 0.4275,
43356
+ "step": 6157
43357
+ },
43358
+ {
43359
+ "epoch": 2.9573778364749668,
43360
+ "grad_norm": 1.344193458557129,
43361
+ "learning_rate": 9.888752157719561e-08,
43362
+ "loss": 0.4945,
43363
+ "step": 6158
43364
+ },
43365
+ {
43366
+ "epoch": 2.9578580862048263,
43367
+ "grad_norm": 0.9803805351257324,
43368
+ "learning_rate": 9.665320752242357e-08,
43369
+ "loss": 0.5197,
43370
+ "step": 6159
43371
+ },
43372
+ {
43373
+ "epoch": 2.958338335934686,
43374
+ "grad_norm": 1.179153323173523,
43375
+ "learning_rate": 9.444441212155264e-08,
43376
+ "loss": 0.5044,
43377
+ "step": 6160
43378
+ },
43379
+ {
43380
+ "epoch": 2.9588185856645457,
43381
+ "grad_norm": 0.9996656179428101,
43382
+ "learning_rate": 9.226113593880925e-08,
43383
+ "loss": 0.5034,
43384
+ "step": 6161
43385
+ },
43386
+ {
43387
+ "epoch": 2.959298835394405,
43388
+ "grad_norm": 0.9858896732330322,
43389
+ "learning_rate": 9.010337953185843e-08,
43390
+ "loss": 0.4707,
43391
+ "step": 6162
43392
+ },
43393
+ {
43394
+ "epoch": 2.9597790851242647,
43395
+ "grad_norm": 1.4443416595458984,
43396
+ "learning_rate": 8.797114345185931e-08,
43397
+ "loss": 0.5373,
43398
+ "step": 6163
43399
+ },
43400
+ {
43401
+ "epoch": 2.960259334854124,
43402
+ "grad_norm": 1.2540090084075928,
43403
+ "learning_rate": 8.586442824347618e-08,
43404
+ "loss": 0.5084,
43405
+ "step": 6164
43406
+ },
43407
+ {
43408
+ "epoch": 2.9607395845839837,
43409
+ "grad_norm": 1.0520621538162231,
43410
+ "learning_rate": 8.378323444481196e-08,
43411
+ "loss": 0.5556,
43412
+ "step": 6165
43413
+ },
43414
+ {
43415
+ "epoch": 2.961219834313843,
43416
+ "grad_norm": 1.0082722902297974,
43417
+ "learning_rate": 8.172756258748581e-08,
43418
+ "loss": 0.5069,
43419
+ "step": 6166
43420
+ },
43421
+ {
43422
+ "epoch": 2.9617000840437027,
43423
+ "grad_norm": 1.133471965789795,
43424
+ "learning_rate": 7.969741319658886e-08,
43425
+ "loss": 0.5275,
43426
+ "step": 6167
43427
+ },
43428
+ {
43429
+ "epoch": 2.962180333773562,
43430
+ "grad_norm": 1.6830706596374512,
43431
+ "learning_rate": 7.769278679068404e-08,
43432
+ "loss": 0.4995,
43433
+ "step": 6168
43434
+ },
43435
+ {
43436
+ "epoch": 2.9626605835034217,
43437
+ "grad_norm": 0.9531680345535278,
43438
+ "learning_rate": 7.571368388181732e-08,
43439
+ "loss": 0.4945,
43440
+ "step": 6169
43441
+ },
43442
+ {
43443
+ "epoch": 2.963140833233281,
43444
+ "grad_norm": 1.3332419395446777,
43445
+ "learning_rate": 7.376010497551767e-08,
43446
+ "loss": 0.5482,
43447
+ "step": 6170
43448
+ },
43449
+ {
43450
+ "epoch": 2.963621082963141,
43451
+ "grad_norm": 1.0001851320266724,
43452
+ "learning_rate": 7.1832050570797e-08,
43453
+ "loss": 0.4879,
43454
+ "step": 6171
43455
+ },
43456
+ {
43457
+ "epoch": 2.9641013326930006,
43458
+ "grad_norm": 1.1255953311920166,
43459
+ "learning_rate": 6.992952116013918e-08,
43460
+ "loss": 0.5539,
43461
+ "step": 6172
43462
+ },
43463
+ {
43464
+ "epoch": 2.96458158242286,
43465
+ "grad_norm": 1.1258360147476196,
43466
+ "learning_rate": 6.805251722953321e-08,
43467
+ "loss": 0.4902,
43468
+ "step": 6173
43469
+ },
43470
+ {
43471
+ "epoch": 2.9650618321527196,
43472
+ "grad_norm": 1.3705956935882568,
43473
+ "learning_rate": 6.62010392584067e-08,
43474
+ "loss": 0.5516,
43475
+ "step": 6174
43476
+ },
43477
+ {
43478
+ "epoch": 2.965542081882579,
43479
+ "grad_norm": 0.8385779857635498,
43480
+ "learning_rate": 6.437508771969248e-08,
43481
+ "loss": 0.4961,
43482
+ "step": 6175
43483
+ },
43484
+ {
43485
+ "epoch": 2.9660223316124386,
43486
+ "grad_norm": 1.0862523317337036,
43487
+ "learning_rate": 6.25746630798063e-08,
43488
+ "loss": 0.5219,
43489
+ "step": 6176
43490
+ },
43491
+ {
43492
+ "epoch": 2.966502581342298,
43493
+ "grad_norm": 0.9276284575462341,
43494
+ "learning_rate": 6.079976579863589e-08,
43495
+ "loss": 0.4518,
43496
+ "step": 6177
43497
+ },
43498
+ {
43499
+ "epoch": 2.9669828310721575,
43500
+ "grad_norm": 1.0246821641921997,
43501
+ "learning_rate": 5.905039632954079e-08,
43502
+ "loss": 0.5438,
43503
+ "step": 6178
43504
+ },
43505
+ {
43506
+ "epoch": 2.967463080802017,
43507
+ "grad_norm": 0.8367186784744263,
43508
+ "learning_rate": 5.732655511938578e-08,
43509
+ "loss": 0.4939,
43510
+ "step": 6179
43511
+ },
43512
+ {
43513
+ "epoch": 2.9679433305318765,
43514
+ "grad_norm": 0.9959518313407898,
43515
+ "learning_rate": 5.562824260848531e-08,
43516
+ "loss": 0.4789,
43517
+ "step": 6180
43518
+ },
43519
+ {
43520
+ "epoch": 2.968423580261736,
43521
+ "grad_norm": 0.906194269657135,
43522
+ "learning_rate": 5.395545923063683e-08,
43523
+ "loss": 0.4436,
43524
+ "step": 6181
43525
+ },
43526
+ {
43527
+ "epoch": 2.9689038299915955,
43528
+ "grad_norm": 0.9833221435546875,
43529
+ "learning_rate": 5.2308205413142964e-08,
43530
+ "loss": 0.4892,
43531
+ "step": 6182
43532
+ },
43533
+ {
43534
+ "epoch": 2.969384079721455,
43535
+ "grad_norm": 0.8796568512916565,
43536
+ "learning_rate": 5.068648157675604e-08,
43537
+ "loss": 0.501,
43538
+ "step": 6183
43539
+ },
43540
+ {
43541
+ "epoch": 2.9698643294513145,
43542
+ "grad_norm": 1.5020475387573242,
43543
+ "learning_rate": 4.909028813573357e-08,
43544
+ "loss": 0.5203,
43545
+ "step": 6184
43546
+ },
43547
+ {
43548
+ "epoch": 2.970344579181174,
43549
+ "grad_norm": 0.9882216453552246,
43550
+ "learning_rate": 4.751962549777167e-08,
43551
+ "loss": 0.5011,
43552
+ "step": 6185
43553
+ },
43554
+ {
43555
+ "epoch": 2.9708248289110335,
43556
+ "grad_norm": 1.1814683675765991,
43557
+ "learning_rate": 4.597449406409382e-08,
43558
+ "loss": 0.5586,
43559
+ "step": 6186
43560
+ },
43561
+ {
43562
+ "epoch": 2.971305078640893,
43563
+ "grad_norm": 0.9532738924026489,
43564
+ "learning_rate": 4.445489422936211e-08,
43565
+ "loss": 0.5274,
43566
+ "step": 6187
43567
+ },
43568
+ {
43569
+ "epoch": 2.971785328370753,
43570
+ "grad_norm": 1.0383621454238892,
43571
+ "learning_rate": 4.296082638173271e-08,
43572
+ "loss": 0.4749,
43573
+ "step": 6188
43574
+ },
43575
+ {
43576
+ "epoch": 2.9722655781006124,
43577
+ "grad_norm": 0.7210098505020142,
43578
+ "learning_rate": 4.149229090285589e-08,
43579
+ "loss": 0.4322,
43580
+ "step": 6189
43581
+ },
43582
+ {
43583
+ "epoch": 2.972745827830472,
43584
+ "grad_norm": 1.1749037504196167,
43585
+ "learning_rate": 4.0049288167842705e-08,
43586
+ "loss": 0.5319,
43587
+ "step": 6190
43588
+ },
43589
+ {
43590
+ "epoch": 2.9732260775603314,
43591
+ "grad_norm": 1.0950450897216797,
43592
+ "learning_rate": 3.8631818545264986e-08,
43593
+ "loss": 0.4961,
43594
+ "step": 6191
43595
+ },
43596
+ {
43597
+ "epoch": 2.973706327290191,
43598
+ "grad_norm": 1.0302187204360962,
43599
+ "learning_rate": 3.7239882397210876e-08,
43600
+ "loss": 0.5125,
43601
+ "step": 6192
43602
+ },
43603
+ {
43604
+ "epoch": 2.9741865770200504,
43605
+ "grad_norm": 1.0376499891281128,
43606
+ "learning_rate": 3.58734800792071e-08,
43607
+ "loss": 0.5335,
43608
+ "step": 6193
43609
+ },
43610
+ {
43611
+ "epoch": 2.97466682674991,
43612
+ "grad_norm": 1.0481114387512207,
43613
+ "learning_rate": 3.4532611940307766e-08,
43614
+ "loss": 0.4871,
43615
+ "step": 6194
43616
+ },
43617
+ {
43618
+ "epoch": 2.9751470764797694,
43619
+ "grad_norm": 0.8409088253974915,
43620
+ "learning_rate": 3.321727832299448e-08,
43621
+ "loss": 0.4613,
43622
+ "step": 6195
43623
+ },
43624
+ {
43625
+ "epoch": 2.975627326209629,
43626
+ "grad_norm": 1.3083711862564087,
43627
+ "learning_rate": 3.1927479563254036e-08,
43628
+ "loss": 0.5265,
43629
+ "step": 6196
43630
+ },
43631
+ {
43632
+ "epoch": 2.9761075759394884,
43633
+ "grad_norm": 0.9847288727760315,
43634
+ "learning_rate": 3.0663215990534014e-08,
43635
+ "loss": 0.4811,
43636
+ "step": 6197
43637
+ },
43638
+ {
43639
+ "epoch": 2.9765878256693483,
43640
+ "grad_norm": 0.9412215352058411,
43641
+ "learning_rate": 2.942448792778718e-08,
43642
+ "loss": 0.5228,
43643
+ "step": 6198
43644
+ },
43645
+ {
43646
+ "epoch": 2.977068075399208,
43647
+ "grad_norm": 1.1179654598236084,
43648
+ "learning_rate": 2.8211295691416006e-08,
43649
+ "loss": 0.5169,
43650
+ "step": 6199
43651
+ },
43652
+ {
43653
+ "epoch": 2.9775483251290673,
43654
+ "grad_norm": 0.9434910416603088,
43655
+ "learning_rate": 2.702363959131704e-08,
43656
+ "loss": 0.47,
43657
+ "step": 6200
43658
+ },
43659
+ {
43660
+ "epoch": 2.9775483251290673,
43661
+ "eval_loss": 0.12413108348846436,
43662
+ "eval_runtime": 6.3651,
43663
+ "eval_samples_per_second": 157.892,
43664
+ "eval_steps_per_second": 6.598,
43665
+ "step": 6200
43666
  }
43667
  ],
43668
  "logging_steps": 1,
 
43677
  "early_stopping_threshold": 0.0
43678
  },
43679
  "attributes": {
43680
+ "early_stopping_patience_counter": 3
43681
  }
43682
  },
43683
  "TrainerControl": {
 
43686
  "should_evaluate": false,
43687
  "should_log": false,
43688
  "should_save": true,
43689
+ "should_training_stop": true
43690
  },
43691
  "attributes": {}
43692
  }
43693
  },
43694
+ "total_flos": 1.4918862613826765e+17,
43695
  "train_batch_size": 24,
43696
  "trial_name": null,
43697
  "trial_params": null