Upload 12 files

Browse files

Files changed (13) hide show

.gitattributes +1 -0
config.json +30 -0
generation_config.json +6 -0
model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
special_tokens_map.json +0 -0
tokenizer.json +3 -0
tokenizer_config.json +0 -0
trainer_state.json +1083 -0
training_args.bin +3 -0
vocab.json +0 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+tokenizer.json filter=lfs diff=lfs merge=lfs -text

config.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "_name_or_path": "D:\\AI_APPS\\OuteTTS\\OuteTTS-0.3-500M",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 896,
+  "initializer_range": 0.02,
+  "intermediate_size": 4864,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 24,
+  "model_type": "qwen2",
+  "num_attention_heads": 14,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "torch_dtype": "float32",
+  "transformers_version": "4.48.3",
+  "use_cache": true,
+  "use_mrope": false,
+  "use_sliding_window": false,
+  "vocab_size": 157696
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "pad_token_id": 151645,
+  "transformers_version": "4.48.3"
+}

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:440718d6f157a98fd22fc60f869ae16440cb4fe7003c91d7ecea9497a6e2ae5c
+size 1996807320

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:603296525381dbef5032aba1f1295f5eb51f183d81a3b67d62c15d625b45a048
+size 3993792954

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cdf4d700cdd7ef81bf005d2842e2f669e1810471cfce670ab79872c6ee698af7
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a49e29b758d40cfe1c153cb8b95a52af827c195e13df13a4a897c5cb79216700
+size 1064

special_tokens_map.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9dd411277ccf5ecddf0cb0c7133a0f9be0c5a5d5e9f20874a1f10512429fbcd7
+size 12371875

tokenizer_config.json ADDED Viewed

The diff for this file is too large to render. See raw diff

trainer_state.json ADDED Viewed

	@@ -0,0 +1,1083 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.03161089100564781,
+  "eval_steps": 500,
+  "global_step": 1500,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.00021073927337098542,
+      "grad_norm": 4.289033889770508,
+      "learning_rate": 4.998946303633145e-05,
+      "loss": 3.8267,
+      "step": 10
+    },
+    {
+      "epoch": 0.00042147854674197085,
+      "grad_norm": 4.060084342956543,
+      "learning_rate": 4.997997976902976e-05,
+      "loss": 3.6144,
+      "step": 20
+    },
+    {
+      "epoch": 0.0006322178201129563,
+      "grad_norm": 4.173689842224121,
+      "learning_rate": 4.996944280536121e-05,
+      "loss": 3.4868,
+      "step": 30
+    },
+    {
+      "epoch": 0.0008429570934839417,
+      "grad_norm": 5.22714376449585,
+      "learning_rate": 4.995890584169266e-05,
+      "loss": 3.4488,
+      "step": 40
+    },
+    {
+      "epoch": 0.001053696366854927,
+      "grad_norm": 5.710427284240723,
+      "learning_rate": 4.994836887802411e-05,
+      "loss": 3.3072,
+      "step": 50
+    },
+    {
+      "epoch": 0.0012644356402259126,
+      "grad_norm": 5.226071357727051,
+      "learning_rate": 4.993783191435556e-05,
+      "loss": 3.3811,
+      "step": 60
+    },
+    {
+      "epoch": 0.0014751749135968979,
+      "grad_norm": 4.471126556396484,
+      "learning_rate": 4.9927294950687014e-05,
+      "loss": 3.3294,
+      "step": 70
+    },
+    {
+      "epoch": 0.0016859141869678834,
+      "grad_norm": 3.511232376098633,
+      "learning_rate": 4.991675798701846e-05,
+      "loss": 3.274,
+      "step": 80
+    },
+    {
+      "epoch": 0.0018966534603388687,
+      "grad_norm": 3.6000168323516846,
+      "learning_rate": 4.990622102334991e-05,
+      "loss": 3.2927,
+      "step": 90
+    },
+    {
+      "epoch": 0.002107392733709854,
+      "grad_norm": 3.6504461765289307,
+      "learning_rate": 4.9895684059681366e-05,
+      "loss": 3.1517,
+      "step": 100
+    },
+    {
+      "epoch": 0.0023181320070808395,
+      "grad_norm": 5.8762288093566895,
+      "learning_rate": 4.9885147096012814e-05,
+      "loss": 3.2165,
+      "step": 110
+    },
+    {
+      "epoch": 0.002528871280451825,
+      "grad_norm": 3.862133264541626,
+      "learning_rate": 4.987461013234427e-05,
+      "loss": 3.2496,
+      "step": 120
+    },
+    {
+      "epoch": 0.0027396105538228105,
+      "grad_norm": 3.1009256839752197,
+      "learning_rate": 4.986407316867572e-05,
+      "loss": 3.1269,
+      "step": 130
+    },
+    {
+      "epoch": 0.0029503498271937958,
+      "grad_norm": 4.951446056365967,
+      "learning_rate": 4.9853536205007165e-05,
+      "loss": 3.142,
+      "step": 140
+    },
+    {
+      "epoch": 0.003161089100564781,
+      "grad_norm": 4.305763244628906,
+      "learning_rate": 4.9842999241338614e-05,
+      "loss": 3.1518,
+      "step": 150
+    },
+    {
+      "epoch": 0.003371828373935767,
+      "grad_norm": 4.36134672164917,
+      "learning_rate": 4.983246227767007e-05,
+      "loss": 3.1285,
+      "step": 160
+    },
+    {
+      "epoch": 0.003582567647306752,
+      "grad_norm": 3.9053471088409424,
+      "learning_rate": 4.9821925314001524e-05,
+      "loss": 3.0738,
+      "step": 170
+    },
+    {
+      "epoch": 0.0037933069206777374,
+      "grad_norm": 13.242171287536621,
+      "learning_rate": 4.981138835033297e-05,
+      "loss": 3.0425,
+      "step": 180
+    },
+    {
+      "epoch": 0.004004046194048723,
+      "grad_norm": 3.8637614250183105,
+      "learning_rate": 4.980085138666442e-05,
+      "loss": 3.007,
+      "step": 190
+    },
+    {
+      "epoch": 0.004214785467419708,
+      "grad_norm": 4.488512992858887,
+      "learning_rate": 4.979031442299587e-05,
+      "loss": 3.0578,
+      "step": 200
+    },
+    {
+      "epoch": 0.004425524740790694,
+      "grad_norm": 5.628246307373047,
+      "learning_rate": 4.9779777459327324e-05,
+      "loss": 3.0215,
+      "step": 210
+    },
+    {
+      "epoch": 0.004636264014161679,
+      "grad_norm": 3.368596315383911,
+      "learning_rate": 4.976924049565878e-05,
+      "loss": 3.096,
+      "step": 220
+    },
+    {
+      "epoch": 0.004847003287532664,
+      "grad_norm": 7.204585075378418,
+      "learning_rate": 4.975870353199023e-05,
+      "loss": 2.9822,
+      "step": 230
+    },
+    {
+      "epoch": 0.00505774256090365,
+      "grad_norm": 4.260004997253418,
+      "learning_rate": 4.9748166568321675e-05,
+      "loss": 3.0716,
+      "step": 240
+    },
+    {
+      "epoch": 0.005268481834274636,
+      "grad_norm": 4.109992504119873,
+      "learning_rate": 4.973762960465312e-05,
+      "loss": 2.9789,
+      "step": 250
+    },
+    {
+      "epoch": 0.005479221107645621,
+      "grad_norm": 10.826703071594238,
+      "learning_rate": 4.972709264098457e-05,
+      "loss": 2.9928,
+      "step": 260
+    },
+    {
+      "epoch": 0.005689960381016606,
+      "grad_norm": 4.802614212036133,
+      "learning_rate": 4.9716555677316027e-05,
+      "loss": 3.0421,
+      "step": 270
+    },
+    {
+      "epoch": 0.0059006996543875916,
+      "grad_norm": 4.688140392303467,
+      "learning_rate": 4.970601871364748e-05,
+      "loss": 2.8983,
+      "step": 280
+    },
+    {
+      "epoch": 0.006111438927758577,
+      "grad_norm": 4.237213134765625,
+      "learning_rate": 4.969548174997893e-05,
+      "loss": 3.0775,
+      "step": 290
+    },
+    {
+      "epoch": 0.006322178201129562,
+      "grad_norm": 6.8719000816345215,
+      "learning_rate": 4.968494478631038e-05,
+      "loss": 2.9356,
+      "step": 300
+    },
+    {
+      "epoch": 0.006532917474500548,
+      "grad_norm": 6.765329360961914,
+      "learning_rate": 4.9674407822641826e-05,
+      "loss": 3.0783,
+      "step": 310
+    },
+    {
+      "epoch": 0.006743656747871534,
+      "grad_norm": 4.296706199645996,
+      "learning_rate": 4.9664924555340135e-05,
+      "loss": 2.831,
+      "step": 320
+    },
+    {
+      "epoch": 0.006954396021242519,
+      "grad_norm": 5.505979537963867,
+      "learning_rate": 4.965438759167158e-05,
+      "loss": 3.0016,
+      "step": 330
+    },
+    {
+      "epoch": 0.007165135294613504,
+      "grad_norm": 15.19791316986084,
+      "learning_rate": 4.964385062800304e-05,
+      "loss": 2.9707,
+      "step": 340
+    },
+    {
+      "epoch": 0.0073758745679844894,
+      "grad_norm": 3.5731112957000732,
+      "learning_rate": 4.9633313664334487e-05,
+      "loss": 3.0052,
+      "step": 350
+    },
+    {
+      "epoch": 0.007586613841355475,
+      "grad_norm": 4.196691989898682,
+      "learning_rate": 4.962277670066594e-05,
+      "loss": 3.0018,
+      "step": 360
+    },
+    {
+      "epoch": 0.00779735311472646,
+      "grad_norm": 3.7209060192108154,
+      "learning_rate": 4.961223973699739e-05,
+      "loss": 2.9956,
+      "step": 370
+    },
+    {
+      "epoch": 0.008008092388097446,
+      "grad_norm": 4.67873477935791,
+      "learning_rate": 4.960170277332884e-05,
+      "loss": 3.0256,
+      "step": 380
+    },
+    {
+      "epoch": 0.008218831661468431,
+      "grad_norm": 4.471567630767822,
+      "learning_rate": 4.9591165809660286e-05,
+      "loss": 2.9656,
+      "step": 390
+    },
+    {
+      "epoch": 0.008429570934839417,
+      "grad_norm": 4.6043195724487305,
+      "learning_rate": 4.958062884599174e-05,
+      "loss": 2.9317,
+      "step": 400
+    },
+    {
+      "epoch": 0.008640310208210402,
+      "grad_norm": 5.873717308044434,
+      "learning_rate": 4.9570091882323196e-05,
+      "loss": 3.0048,
+      "step": 410
+    },
+    {
+      "epoch": 0.008851049481581387,
+      "grad_norm": 4.8152923583984375,
+      "learning_rate": 4.9559554918654645e-05,
+      "loss": 2.933,
+      "step": 420
+    },
+    {
+      "epoch": 0.009061788754952373,
+      "grad_norm": 3.463914632797241,
+      "learning_rate": 4.954901795498609e-05,
+      "loss": 2.9188,
+      "step": 430
+    },
+    {
+      "epoch": 0.009272528028323358,
+      "grad_norm": 5.305572986602783,
+      "learning_rate": 4.953848099131754e-05,
+      "loss": 3.1017,
+      "step": 440
+    },
+    {
+      "epoch": 0.009483267301694343,
+      "grad_norm": 3.027397394180298,
+      "learning_rate": 4.952794402764899e-05,
+      "loss": 2.9651,
+      "step": 450
+    },
+    {
+      "epoch": 0.009694006575065328,
+      "grad_norm": 3.619413375854492,
+      "learning_rate": 4.9517407063980444e-05,
+      "loss": 3.0447,
+      "step": 460
+    },
+    {
+      "epoch": 0.009904745848436314,
+      "grad_norm": 3.6718661785125732,
+      "learning_rate": 4.95068701003119e-05,
+      "loss": 2.9183,
+      "step": 470
+    },
+    {
+      "epoch": 0.0101154851218073,
+      "grad_norm": 3.527235984802246,
+      "learning_rate": 4.949633313664335e-05,
+      "loss": 2.9889,
+      "step": 480
+    },
+    {
+      "epoch": 0.010326224395178286,
+      "grad_norm": 3.9344675540924072,
+      "learning_rate": 4.9485796172974796e-05,
+      "loss": 2.9665,
+      "step": 490
+    },
+    {
+      "epoch": 0.010536963668549271,
+      "grad_norm": 6.459798336029053,
+      "learning_rate": 4.9475259209306244e-05,
+      "loss": 2.9079,
+      "step": 500
+    },
+    {
+      "epoch": 0.010747702941920257,
+      "grad_norm": 3.535411834716797,
+      "learning_rate": 4.94647222456377e-05,
+      "loss": 3.0064,
+      "step": 510
+    },
+    {
+      "epoch": 0.010958442215291242,
+      "grad_norm": 3.441504716873169,
+      "learning_rate": 4.9454185281969154e-05,
+      "loss": 2.8912,
+      "step": 520
+    },
+    {
+      "epoch": 0.011169181488662227,
+      "grad_norm": 6.144808292388916,
+      "learning_rate": 4.94436483183006e-05,
+      "loss": 2.9239,
+      "step": 530
+    },
+    {
+      "epoch": 0.011379920762033213,
+      "grad_norm": 3.7004387378692627,
+      "learning_rate": 4.943311135463205e-05,
+      "loss": 2.9526,
+      "step": 540
+    },
+    {
+      "epoch": 0.011590660035404198,
+      "grad_norm": 6.244487762451172,
+      "learning_rate": 4.94225743909635e-05,
+      "loss": 2.8755,
+      "step": 550
+    },
+    {
+      "epoch": 0.011801399308775183,
+      "grad_norm": 3.984456777572632,
+      "learning_rate": 4.9412037427294954e-05,
+      "loss": 2.835,
+      "step": 560
+    },
+    {
+      "epoch": 0.012012138582146168,
+      "grad_norm": 4.6722941398620605,
+      "learning_rate": 4.94015004636264e-05,
+      "loss": 2.9199,
+      "step": 570
+    },
+    {
+      "epoch": 0.012222877855517154,
+      "grad_norm": 4.149166584014893,
+      "learning_rate": 4.939096349995786e-05,
+      "loss": 3.0706,
+      "step": 580
+    },
+    {
+      "epoch": 0.012433617128888139,
+      "grad_norm": 3.566357374191284,
+      "learning_rate": 4.9380426536289305e-05,
+      "loss": 2.9011,
+      "step": 590
+    },
+    {
+      "epoch": 0.012644356402259124,
+      "grad_norm": 3.4722342491149902,
+      "learning_rate": 4.9369889572620754e-05,
+      "loss": 2.9345,
+      "step": 600
+    },
+    {
+      "epoch": 0.01285509567563011,
+      "grad_norm": 3.4608707427978516,
+      "learning_rate": 4.935935260895221e-05,
+      "loss": 2.8419,
+      "step": 610
+    },
+    {
+      "epoch": 0.013065834949001097,
+      "grad_norm": 3.4312050342559814,
+      "learning_rate": 4.934881564528366e-05,
+      "loss": 2.9087,
+      "step": 620
+    },
+    {
+      "epoch": 0.013276574222372082,
+      "grad_norm": 6.524892330169678,
+      "learning_rate": 4.9338278681615105e-05,
+      "loss": 2.8818,
+      "step": 630
+    },
+    {
+      "epoch": 0.013487313495743067,
+      "grad_norm": 5.508037567138672,
+      "learning_rate": 4.932774171794656e-05,
+      "loss": 2.7335,
+      "step": 640
+    },
+    {
+      "epoch": 0.013698052769114052,
+      "grad_norm": 4.094667911529541,
+      "learning_rate": 4.931720475427801e-05,
+      "loss": 2.9197,
+      "step": 650
+    },
+    {
+      "epoch": 0.013908792042485038,
+      "grad_norm": 3.6645474433898926,
+      "learning_rate": 4.9306667790609463e-05,
+      "loss": 2.9647,
+      "step": 660
+    },
+    {
+      "epoch": 0.014119531315856023,
+      "grad_norm": 3.711179256439209,
+      "learning_rate": 4.929613082694091e-05,
+      "loss": 2.9863,
+      "step": 670
+    },
+    {
+      "epoch": 0.014330270589227008,
+      "grad_norm": 3.904421329498291,
+      "learning_rate": 4.928559386327236e-05,
+      "loss": 2.8947,
+      "step": 680
+    },
+    {
+      "epoch": 0.014541009862597994,
+      "grad_norm": 3.3760502338409424,
+      "learning_rate": 4.927505689960381e-05,
+      "loss": 2.7375,
+      "step": 690
+    },
+    {
+      "epoch": 0.014751749135968979,
+      "grad_norm": 4.068516254425049,
+      "learning_rate": 4.926451993593526e-05,
+      "loss": 2.8502,
+      "step": 700
+    },
+    {
+      "epoch": 0.014962488409339964,
+      "grad_norm": 4.1656999588012695,
+      "learning_rate": 4.925398297226672e-05,
+      "loss": 2.8688,
+      "step": 710
+    },
+    {
+      "epoch": 0.01517322768271095,
+      "grad_norm": 3.5596961975097656,
+      "learning_rate": 4.9243446008598166e-05,
+      "loss": 2.9094,
+      "step": 720
+    },
+    {
+      "epoch": 0.015383966956081935,
+      "grad_norm": 4.3002753257751465,
+      "learning_rate": 4.9232909044929615e-05,
+      "loss": 2.8964,
+      "step": 730
+    },
+    {
+      "epoch": 0.01559470622945292,
+      "grad_norm": 5.381309509277344,
+      "learning_rate": 4.922237208126106e-05,
+      "loss": 2.8543,
+      "step": 740
+    },
+    {
+      "epoch": 0.015805445502823905,
+      "grad_norm": 5.13466739654541,
+      "learning_rate": 4.921183511759252e-05,
+      "loss": 2.8754,
+      "step": 750
+    },
+    {
+      "epoch": 0.016016184776194892,
+      "grad_norm": 4.221550464630127,
+      "learning_rate": 4.9201298153923966e-05,
+      "loss": 2.9192,
+      "step": 760
+    },
+    {
+      "epoch": 0.016226924049565876,
+      "grad_norm": 4.988082408905029,
+      "learning_rate": 4.919076119025542e-05,
+      "loss": 2.8325,
+      "step": 770
+    },
+    {
+      "epoch": 0.016437663322936863,
+      "grad_norm": 6.187538146972656,
+      "learning_rate": 4.918022422658687e-05,
+      "loss": 2.8819,
+      "step": 780
+    },
+    {
+      "epoch": 0.016648402596307846,
+      "grad_norm": 3.920051336288452,
+      "learning_rate": 4.916968726291832e-05,
+      "loss": 2.8123,
+      "step": 790
+    },
+    {
+      "epoch": 0.016859141869678834,
+      "grad_norm": 3.4514334201812744,
+      "learning_rate": 4.915915029924977e-05,
+      "loss": 2.8523,
+      "step": 800
+    },
+    {
+      "epoch": 0.017069881143049817,
+      "grad_norm": 5.007158279418945,
+      "learning_rate": 4.914861333558122e-05,
+      "loss": 2.8607,
+      "step": 810
+    },
+    {
+      "epoch": 0.017280620416420804,
+      "grad_norm": 4.900107383728027,
+      "learning_rate": 4.913807637191267e-05,
+      "loss": 2.7612,
+      "step": 820
+    },
+    {
+      "epoch": 0.01749135968979179,
+      "grad_norm": 7.45019006729126,
+      "learning_rate": 4.9127539408244124e-05,
+      "loss": 2.913,
+      "step": 830
+    },
+    {
+      "epoch": 0.017702098963162775,
+      "grad_norm": 5.6856770515441895,
+      "learning_rate": 4.911700244457557e-05,
+      "loss": 2.8877,
+      "step": 840
+    },
+    {
+      "epoch": 0.01791283823653376,
+      "grad_norm": 4.228877067565918,
+      "learning_rate": 4.910646548090703e-05,
+      "loss": 2.7294,
+      "step": 850
+    },
+    {
+      "epoch": 0.018123577509904745,
+      "grad_norm": 3.372474193572998,
+      "learning_rate": 4.9095928517238476e-05,
+      "loss": 2.7951,
+      "step": 860
+    },
+    {
+      "epoch": 0.018334316783275732,
+      "grad_norm": 3.3396689891815186,
+      "learning_rate": 4.9085391553569924e-05,
+      "loss": 2.8488,
+      "step": 870
+    },
+    {
+      "epoch": 0.018545056056646716,
+      "grad_norm": 3.912947177886963,
+      "learning_rate": 4.907485458990138e-05,
+      "loss": 2.7816,
+      "step": 880
+    },
+    {
+      "epoch": 0.018755795330017703,
+      "grad_norm": 3.7946770191192627,
+      "learning_rate": 4.906431762623283e-05,
+      "loss": 2.8841,
+      "step": 890
+    },
+    {
+      "epoch": 0.018966534603388686,
+      "grad_norm": 4.324241638183594,
+      "learning_rate": 4.905378066256428e-05,
+      "loss": 2.8666,
+      "step": 900
+    },
+    {
+      "epoch": 0.019177273876759673,
+      "grad_norm": 3.172816038131714,
+      "learning_rate": 4.904324369889573e-05,
+      "loss": 2.7794,
+      "step": 910
+    },
+    {
+      "epoch": 0.019388013150130657,
+      "grad_norm": 3.8265621662139893,
+      "learning_rate": 4.903270673522718e-05,
+      "loss": 2.8162,
+      "step": 920
+    },
+    {
+      "epoch": 0.019598752423501644,
+      "grad_norm": 3.4845385551452637,
+      "learning_rate": 4.902216977155863e-05,
+      "loss": 2.7385,
+      "step": 930
+    },
+    {
+      "epoch": 0.019809491696872628,
+      "grad_norm": 4.434839248657227,
+      "learning_rate": 4.901163280789008e-05,
+      "loss": 2.6926,
+      "step": 940
+    },
+    {
+      "epoch": 0.020020230970243615,
+      "grad_norm": 6.55767822265625,
+      "learning_rate": 4.900109584422154e-05,
+      "loss": 2.6728,
+      "step": 950
+    },
+    {
+      "epoch": 0.0202309702436146,
+      "grad_norm": 3.1376519203186035,
+      "learning_rate": 4.8990558880552985e-05,
+      "loss": 2.8854,
+      "step": 960
+    },
+    {
+      "epoch": 0.020441709516985585,
+      "grad_norm": 3.569209337234497,
+      "learning_rate": 4.8980021916884433e-05,
+      "loss": 2.7862,
+      "step": 970
+    },
+    {
+      "epoch": 0.020652448790356572,
+      "grad_norm": 16.01164436340332,
+      "learning_rate": 4.896948495321588e-05,
+      "loss": 2.8844,
+      "step": 980
+    },
+    {
+      "epoch": 0.020863188063727556,
+      "grad_norm": 3.2311861515045166,
+      "learning_rate": 4.895894798954733e-05,
+      "loss": 2.8773,
+      "step": 990
+    },
+    {
+      "epoch": 0.021073927337098543,
+      "grad_norm": 4.047968864440918,
+      "learning_rate": 4.8948411025878785e-05,
+      "loss": 2.6955,
+      "step": 1000
+    },
+    {
+      "epoch": 0.021284666610469526,
+      "grad_norm": 3.3213765621185303,
+      "learning_rate": 4.893787406221024e-05,
+      "loss": 2.8308,
+      "step": 1010
+    },
+    {
+      "epoch": 0.021495405883840513,
+      "grad_norm": 3.658327102661133,
+      "learning_rate": 4.892733709854169e-05,
+      "loss": 2.8409,
+      "step": 1020
+    },
+    {
+      "epoch": 0.021706145157211497,
+      "grad_norm": 3.6058104038238525,
+      "learning_rate": 4.8916800134873136e-05,
+      "loss": 2.8073,
+      "step": 1030
+    },
+    {
+      "epoch": 0.021916884430582484,
+      "grad_norm": 3.622807741165161,
+      "learning_rate": 4.8906263171204585e-05,
+      "loss": 2.7389,
+      "step": 1040
+    },
+    {
+      "epoch": 0.022127623703953467,
+      "grad_norm": 3.6087045669555664,
+      "learning_rate": 4.889572620753604e-05,
+      "loss": 2.7506,
+      "step": 1050
+    },
+    {
+      "epoch": 0.022338362977324455,
+      "grad_norm": 3.1047306060791016,
+      "learning_rate": 4.888518924386749e-05,
+      "loss": 2.77,
+      "step": 1060
+    },
+    {
+      "epoch": 0.022549102250695438,
+      "grad_norm": 3.4957761764526367,
+      "learning_rate": 4.887465228019894e-05,
+      "loss": 2.8526,
+      "step": 1070
+    },
+    {
+      "epoch": 0.022759841524066425,
+      "grad_norm": 5.486833095550537,
+      "learning_rate": 4.886411531653039e-05,
+      "loss": 2.8144,
+      "step": 1080
+    },
+    {
+      "epoch": 0.022970580797437412,
+      "grad_norm": 6.019243240356445,
+      "learning_rate": 4.885357835286184e-05,
+      "loss": 2.8095,
+      "step": 1090
+    },
+    {
+      "epoch": 0.023181320070808396,
+      "grad_norm": 5.631433486938477,
+      "learning_rate": 4.8843041389193294e-05,
+      "loss": 2.6904,
+      "step": 1100
+    },
+    {
+      "epoch": 0.023392059344179383,
+      "grad_norm": 3.162370204925537,
+      "learning_rate": 4.883250442552474e-05,
+      "loss": 2.6805,
+      "step": 1110
+    },
+    {
+      "epoch": 0.023602798617550366,
+      "grad_norm": 3.7855355739593506,
+      "learning_rate": 4.882196746185619e-05,
+      "loss": 2.7785,
+      "step": 1120
+    },
+    {
+      "epoch": 0.023813537890921353,
+      "grad_norm": 3.0686984062194824,
+      "learning_rate": 4.8811430498187646e-05,
+      "loss": 2.7163,
+      "step": 1130
+    },
+    {
+      "epoch": 0.024024277164292337,
+      "grad_norm": 4.290556907653809,
+      "learning_rate": 4.8800893534519094e-05,
+      "loss": 2.8549,
+      "step": 1140
+    },
+    {
+      "epoch": 0.024235016437663324,
+      "grad_norm": 4.224363327026367,
+      "learning_rate": 4.879035657085055e-05,
+      "loss": 2.7858,
+      "step": 1150
+    },
+    {
+      "epoch": 0.024445755711034307,
+      "grad_norm": 4.114129066467285,
+      "learning_rate": 4.8779819607182e-05,
+      "loss": 2.7969,
+      "step": 1160
+    },
+    {
+      "epoch": 0.024656494984405294,
+      "grad_norm": 4.066317081451416,
+      "learning_rate": 4.8769282643513446e-05,
+      "loss": 2.701,
+      "step": 1170
+    },
+    {
+      "epoch": 0.024867234257776278,
+      "grad_norm": 3.0110621452331543,
+      "learning_rate": 4.8758745679844894e-05,
+      "loss": 2.9004,
+      "step": 1180
+    },
+    {
+      "epoch": 0.025077973531147265,
+      "grad_norm": 4.144210338592529,
+      "learning_rate": 4.874820871617635e-05,
+      "loss": 2.8651,
+      "step": 1190
+    },
+    {
+      "epoch": 0.02528871280451825,
+      "grad_norm": 3.226668119430542,
+      "learning_rate": 4.8737671752507804e-05,
+      "loss": 2.8332,
+      "step": 1200
+    },
+    {
+      "epoch": 0.025499452077889236,
+      "grad_norm": 3.7419745922088623,
+      "learning_rate": 4.872713478883925e-05,
+      "loss": 2.8502,
+      "step": 1210
+    },
+    {
+      "epoch": 0.02571019135126022,
+      "grad_norm": 6.740761756896973,
+      "learning_rate": 4.87165978251707e-05,
+      "loss": 2.7892,
+      "step": 1220
+    },
+    {
+      "epoch": 0.025920930624631206,
+      "grad_norm": 4.075252056121826,
+      "learning_rate": 4.870606086150215e-05,
+      "loss": 2.7037,
+      "step": 1230
+    },
+    {
+      "epoch": 0.026131669898002193,
+      "grad_norm": 3.7171854972839355,
+      "learning_rate": 4.8695523897833604e-05,
+      "loss": 2.7885,
+      "step": 1240
+    },
+    {
+      "epoch": 0.026342409171373177,
+      "grad_norm": 3.2372097969055176,
+      "learning_rate": 4.868498693416506e-05,
+      "loss": 2.7827,
+      "step": 1250
+    },
+    {
+      "epoch": 0.026553148444744164,
+      "grad_norm": 5.390050888061523,
+      "learning_rate": 4.867444997049651e-05,
+      "loss": 2.7929,
+      "step": 1260
+    },
+    {
+      "epoch": 0.026763887718115147,
+      "grad_norm": 6.445965766906738,
+      "learning_rate": 4.8663913006827955e-05,
+      "loss": 2.7683,
+      "step": 1270
+    },
+    {
+      "epoch": 0.026974626991486134,
+      "grad_norm": 4.038309574127197,
+      "learning_rate": 4.8653376043159403e-05,
+      "loss": 2.7644,
+      "step": 1280
+    },
+    {
+      "epoch": 0.027185366264857118,
+      "grad_norm": 3.422018527984619,
+      "learning_rate": 4.864283907949085e-05,
+      "loss": 2.759,
+      "step": 1290
+    },
+    {
+      "epoch": 0.027396105538228105,
+      "grad_norm": 3.4551455974578857,
+      "learning_rate": 4.863230211582231e-05,
+      "loss": 2.6919,
+      "step": 1300
+    },
+    {
+      "epoch": 0.02760684481159909,
+      "grad_norm": 4.99464225769043,
+      "learning_rate": 4.862176515215376e-05,
+      "loss": 2.8362,
+      "step": 1310
+    },
+    {
+      "epoch": 0.027817584084970075,
+      "grad_norm": 3.759676933288574,
+      "learning_rate": 4.861122818848521e-05,
+      "loss": 2.7712,
+      "step": 1320
+    },
+    {
+      "epoch": 0.02802832335834106,
+      "grad_norm": 4.082064151763916,
+      "learning_rate": 4.860069122481666e-05,
+      "loss": 2.8224,
+      "step": 1330
+    },
+    {
+      "epoch": 0.028239062631712046,
+      "grad_norm": 3.157449960708618,
+      "learning_rate": 4.8590154261148106e-05,
+      "loss": 2.7863,
+      "step": 1340
+    },
+    {
+      "epoch": 0.02844980190508303,
+      "grad_norm": 5.148242950439453,
+      "learning_rate": 4.857961729747956e-05,
+      "loss": 2.7383,
+      "step": 1350
+    },
+    {
+      "epoch": 0.028660541178454017,
+      "grad_norm": 3.794858694076538,
+      "learning_rate": 4.856908033381101e-05,
+      "loss": 2.7118,
+      "step": 1360
+    },
+    {
+      "epoch": 0.028871280451825004,
+      "grad_norm": 3.0035576820373535,
+      "learning_rate": 4.8558543370142465e-05,
+      "loss": 2.7787,
+      "step": 1370
+    },
+    {
+      "epoch": 0.029082019725195987,
+      "grad_norm": 4.196252822875977,
+      "learning_rate": 4.854800640647391e-05,
+      "loss": 2.7501,
+      "step": 1380
+    },
+    {
+      "epoch": 0.029292758998566974,
+      "grad_norm": 7.164214611053467,
+      "learning_rate": 4.853746944280536e-05,
+      "loss": 2.7885,
+      "step": 1390
+    },
+    {
+      "epoch": 0.029503498271937958,
+      "grad_norm": 3.252230167388916,
+      "learning_rate": 4.8526932479136816e-05,
+      "loss": 2.7875,
+      "step": 1400
+    },
+    {
+      "epoch": 0.029714237545308945,
+      "grad_norm": 7.037599563598633,
+      "learning_rate": 4.8516395515468265e-05,
+      "loss": 2.7787,
+      "step": 1410
+    },
+    {
+      "epoch": 0.02992497681867993,
+      "grad_norm": 3.5410661697387695,
+      "learning_rate": 4.850585855179971e-05,
+      "loss": 2.8644,
+      "step": 1420
+    },
+    {
+      "epoch": 0.030135716092050915,
+      "grad_norm": 3.7410356998443604,
+      "learning_rate": 4.849532158813117e-05,
+      "loss": 2.8146,
+      "step": 1430
+    },
+    {
+      "epoch": 0.0303464553654219,
+      "grad_norm": 7.049551963806152,
+      "learning_rate": 4.8484784624462616e-05,
+      "loss": 2.8427,
+      "step": 1440
+    },
+    {
+      "epoch": 0.030557194638792886,
+      "grad_norm": 3.5179500579833984,
+      "learning_rate": 4.847424766079407e-05,
+      "loss": 2.7396,
+      "step": 1450
+    },
+    {
+      "epoch": 0.03076793391216387,
+      "grad_norm": 3.117222785949707,
+      "learning_rate": 4.846371069712552e-05,
+      "loss": 2.6701,
+      "step": 1460
+    },
+    {
+      "epoch": 0.030978673185534857,
+      "grad_norm": 2.9434943199157715,
+      "learning_rate": 4.845317373345697e-05,
+      "loss": 2.7827,
+      "step": 1470
+    },
+    {
+      "epoch": 0.03118941245890584,
+      "grad_norm": 7.40554666519165,
+      "learning_rate": 4.8442636769788416e-05,
+      "loss": 2.6663,
+      "step": 1480
+    },
+    {
+      "epoch": 0.031400151732276824,
+      "grad_norm": 3.6940155029296875,
+      "learning_rate": 4.843209980611987e-05,
+      "loss": 2.7505,
+      "step": 1490
+    },
+    {
+      "epoch": 0.03161089100564781,
+      "grad_norm": 3.142557382583618,
+      "learning_rate": 4.8421562842451326e-05,
+      "loss": 2.7381,
+      "step": 1500
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 47452,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 100,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 3376102996231680.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed5bb1ef57b2ac804400da520cfcbeda430ae350f09924695fa4f0acd5855679
+size 5304

vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff