ChenWu98 commited on 12 days ago

Commit

ba16a81

verified ·

1 Parent(s): e043aad

Upload folder using huggingface_hub

Browse files

Files changed (25) hide show

README.md +2 -2
checkpoint-128/model-00001-of-00004.safetensors +1 -1
checkpoint-128/model-00002-of-00004.safetensors +1 -1
checkpoint-128/model-00003-of-00004.safetensors +1 -1
checkpoint-128/model-00004-of-00004.safetensors +1 -1
checkpoint-128/trainer_state.json +37 -37
checkpoint-128/training_args.bin +1 -1
checkpoint-32/model-00001-of-00004.safetensors +1 -1
checkpoint-32/model-00002-of-00004.safetensors +1 -1
checkpoint-32/model-00003-of-00004.safetensors +1 -1
checkpoint-32/model-00004-of-00004.safetensors +1 -1
checkpoint-32/trainer_state.json +10 -10
checkpoint-32/training_args.bin +1 -1
checkpoint-64/model-00001-of-00004.safetensors +1 -1
checkpoint-64/model-00002-of-00004.safetensors +1 -1
checkpoint-64/model-00003-of-00004.safetensors +1 -1
checkpoint-64/model-00004-of-00004.safetensors +1 -1
checkpoint-64/trainer_state.json +19 -19
checkpoint-64/training_args.bin +1 -1
checkpoint-96/model-00001-of-00004.safetensors +1 -1
checkpoint-96/model-00002-of-00004.safetensors +1 -1
checkpoint-96/model-00003-of-00004.safetensors +1 -1
checkpoint-96/model-00004-of-00004.safetensors +1 -1
checkpoint-96/trainer_state.json +28 -28
checkpoint-96/training_args.bin +1 -1

README.md CHANGED Viewed

@@ -4,8 +4,8 @@ library_name: transformers
 model_name: d1_train_1024_no_reasoning_llama3_8B
 tags:
 - generated_from_trainer
-- trl
 - sft
 licence: license
 ---
@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/chenwu/huggingface/runs/bomjniwi)
 This model was trained with SFT.

 model_name: d1_train_1024_no_reasoning_llama3_8B
 tags:
 - generated_from_trainer
 - sft
+- trl
 licence: license
 ---
 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/chenwu/huggingface/runs/l01140ez)
 This model was trained with SFT.

checkpoint-128/model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b8f0ef39e33ae7eb14ee7cc666dfa9b94278355748bf86d44ad981d087111580
 size 4976698672

 version https://git-lfs.github.com/spec/v1
+oid sha256:40fa941dc7eda6dd2d9d5ec937b0f9a23e8642f4622f7f1dd49a2fcd475960a5
 size 4976698672

checkpoint-128/model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9acf7eb8792a82212fdd297a6fd0a954ac9f43ddf9e98022cf450f1f12c8a6ec
 size 4999802720

 version https://git-lfs.github.com/spec/v1
+oid sha256:13fe6aadd034f0361cb93df64a5a9ee6539c37ac3bf09a10984a1b9f27ec96c1
 size 4999802720

checkpoint-128/model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:10a687f40a20e1c92dfb329ff45456bc4e50c130752ce770f39ed82311fbf8a8
 size 4915916176

 version https://git-lfs.github.com/spec/v1
+oid sha256:d5094186aa8995114e74046358974b9cced60317442e262f194885388756bd5a
 size 4915916176

checkpoint-128/model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8e22ed6f07f17fa0258c60559e6844d93b48b58cfe068a4dc4c95f7b4849b42a
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:677385a5dc6b6fb78ebfef1f574dbb84568cfe5cc534db5377d92146be10f265
 size 1168138808

checkpoint-128/trainer_state.json CHANGED Viewed

@@ -11,7 +11,7 @@
   "log_history": [
     {
       "epoch": 0.0625,
-      "grad_norm": 5.229090624867237,
       "learning_rate": 1e-05,
       "loss": 0.553,
       "mean_token_accuracy": 0.8401249051094055,
@@ -20,109 +20,109 @@
     },
     {
       "epoch": 0.625,
-      "grad_norm": 1.8775022993488002,
       "learning_rate": 1e-05,
-      "loss": 0.443,
-      "mean_token_accuracy": 0.8661629954973856,
       "num_tokens": 401401.0,
       "step": 10
     },
     {
       "epoch": 1.25,
-      "grad_norm": 1.4054008068425676,
       "learning_rate": 1e-05,
-      "loss": 0.3346,
-      "mean_token_accuracy": 0.8969329178333283,
       "num_tokens": 799732.0,
       "step": 20
     },
     {
       "epoch": 1.875,
-      "grad_norm": 1.4215176512700638,
       "learning_rate": 1e-05,
-      "loss": 0.2071,
-      "mean_token_accuracy": 0.9320787608623504,
       "num_tokens": 1199159.0,
       "step": 30
     },
     {
       "epoch": 2.5,
-      "grad_norm": 2.0853381701848304,
       "learning_rate": 1e-05,
-      "loss": 0.1034,
-      "mean_token_accuracy": 0.9672571659088135,
       "num_tokens": 1592321.0,
       "step": 40
     },
     {
       "epoch": 3.125,
-      "grad_norm": 0.9298876909455216,
       "learning_rate": 1e-05,
-      "loss": 0.0614,
-      "mean_token_accuracy": 0.9805558681488037,
       "num_tokens": 1998955.0,
       "step": 50
     },
     {
       "epoch": 3.75,
-      "grad_norm": 1.184064034490825,
       "learning_rate": 1e-05,
-      "loss": 0.0298,
-      "mean_token_accuracy": 0.9916574656963348,
       "num_tokens": 2397408.0,
       "step": 60
     },
     {
       "epoch": 4.375,
-      "grad_norm": 0.8214791537068664,
       "learning_rate": 1e-05,
-      "loss": 0.019,
-      "mean_token_accuracy": 0.9946628630161285,
       "num_tokens": 2798080.0,
       "step": 70
     },
     {
       "epoch": 5.0,
-      "grad_norm": 1.0482853792795606,
       "learning_rate": 1e-05,
-      "loss": 0.0168,
-      "mean_token_accuracy": 0.9956359148025513,
       "num_tokens": 3199940.0,
       "step": 80
     },
     {
       "epoch": 5.625,
-      "grad_norm": 0.8533738215069255,
       "learning_rate": 1e-05,
-      "loss": 0.0088,
-      "mean_token_accuracy": 0.9977155506610871,
       "num_tokens": 3601545.0,
       "step": 90
     },
     {
       "epoch": 6.25,
-      "grad_norm": 0.6832402024714068,
       "learning_rate": 1e-05,
-      "loss": 0.0086,
-      "mean_token_accuracy": 0.9978549599647522,
       "num_tokens": 3998166.0,
       "step": 100
     },
     {
       "epoch": 6.875,
-      "grad_norm": 0.7633873323361593,
       "learning_rate": 1e-05,
-      "loss": 0.0079,
-      "mean_token_accuracy": 0.9980584681034088,
       "num_tokens": 4400384.0,
       "step": 110
     },
     {
       "epoch": 7.5,
-      "grad_norm": 0.7715988435304115,
       "learning_rate": 1e-05,
-      "loss": 0.0052,
-      "mean_token_accuracy": 0.9987416326999664,
       "num_tokens": 4799604.0,
       "step": 120
     }

   "log_history": [
     {
       "epoch": 0.0625,
+      "grad_norm": 5.229332748167166,
       "learning_rate": 1e-05,
       "loss": 0.553,
       "mean_token_accuracy": 0.8401249051094055,
     },
     {
       "epoch": 0.625,
+      "grad_norm": 1.878449792522089,
       "learning_rate": 1e-05,
+      "loss": 0.4425,
+      "mean_token_accuracy": 0.8662860658433702,
       "num_tokens": 401401.0,
       "step": 10
     },
     {
       "epoch": 1.25,
+      "grad_norm": 1.4215514241135978,
       "learning_rate": 1e-05,
+      "loss": 0.3341,
+      "mean_token_accuracy": 0.8969074487686157,
       "num_tokens": 799732.0,
       "step": 20
     },
     {
       "epoch": 1.875,
+      "grad_norm": 1.4575405337602632,
       "learning_rate": 1e-05,
+      "loss": 0.2066,
+      "mean_token_accuracy": 0.9323108971118927,
       "num_tokens": 1199159.0,
       "step": 30
     },
     {
       "epoch": 2.5,
+      "grad_norm": 2.103711519590209,
       "learning_rate": 1e-05,
+      "loss": 0.1032,
+      "mean_token_accuracy": 0.9672375440597534,
       "num_tokens": 1592321.0,
       "step": 40
     },
     {
       "epoch": 3.125,
+      "grad_norm": 0.8849927277663293,
       "learning_rate": 1e-05,
+      "loss": 0.0611,
+      "mean_token_accuracy": 0.9806811392307282,
       "num_tokens": 1998955.0,
       "step": 50
     },
     {
       "epoch": 3.75,
+      "grad_norm": 1.0988924501749624,
       "learning_rate": 1e-05,
+      "loss": 0.0292,
+      "mean_token_accuracy": 0.9918049573898315,
       "num_tokens": 2397408.0,
       "step": 60
     },
     {
       "epoch": 4.375,
+      "grad_norm": 0.9209838605387056,
       "learning_rate": 1e-05,
+      "loss": 0.0197,
+      "mean_token_accuracy": 0.9946328461170196,
       "num_tokens": 2798080.0,
       "step": 70
     },
     {
       "epoch": 5.0,
+      "grad_norm": 1.145881672413454,
       "learning_rate": 1e-05,
+      "loss": 0.0184,
+      "mean_token_accuracy": 0.9954161286354065,
       "num_tokens": 3199940.0,
       "step": 80
     },
     {
       "epoch": 5.625,
+      "grad_norm": 0.8760509498406688,
       "learning_rate": 1e-05,
+      "loss": 0.0094,
+      "mean_token_accuracy": 0.9976460933685303,
       "num_tokens": 3601545.0,
       "step": 90
     },
     {
       "epoch": 6.25,
+      "grad_norm": 0.6303656980191445,
       "learning_rate": 1e-05,
+      "loss": 0.0089,
+      "mean_token_accuracy": 0.9977393686771393,
       "num_tokens": 3998166.0,
       "step": 100
     },
     {
       "epoch": 6.875,
+      "grad_norm": 0.737583281857238,
       "learning_rate": 1e-05,
+      "loss": 0.008,
+      "mean_token_accuracy": 0.9981118857860565,
       "num_tokens": 4400384.0,
       "step": 110
     },
     {
       "epoch": 7.5,
+      "grad_norm": 0.5676177953651935,
       "learning_rate": 1e-05,
+      "loss": 0.0053,
+      "mean_token_accuracy": 0.9985986590385437,
       "num_tokens": 4799604.0,
       "step": 120
     }

checkpoint-128/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf9d951d05098026f9d17656169173d7d8769eb9865665888a43420e263ac25c
 size 8081

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c29f8fe80083b5b7c0f17189143d52e9e222441f308cf2fc22d25021e0096e3
 size 8081

checkpoint-32/model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:60d0dbb4a13be74ad990aed12e1389840238e4a5050917310a1fe798fc9069b9
 size 4976698672

 version https://git-lfs.github.com/spec/v1
+oid sha256:a28a9e5797fde7e49088949ab39301b942d08c68a323e48d48f880a2dc2f31df
 size 4976698672

checkpoint-32/model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4ede8bc15400697d2489a46a82f6103cf15d1ec4a968dab8e8633e89da4ffcec
 size 4999802720

 version https://git-lfs.github.com/spec/v1
+oid sha256:3c0d8c607046cd885ce539e1e07e480893946a0e3b8013882c90b4b84135c1d5
 size 4999802720

checkpoint-32/model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:efb727f76ceef896d0eb3e9121ee41c77b14c07e68558e959d9cf14e01caaf4a
 size 4915916176

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6a91279de971d2da10439673c5f4c919c9a1c2084b7d56cddd19f1833e3026d
 size 4915916176

checkpoint-32/model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dc5ae0f36b0762d53c699f698f9ad86c981e406cc1e623141ba101e8491dae65
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:d090159b203d889219d031869d0b7100b700e438446783e79a2fa4001ffe5635
 size 1168138808

checkpoint-32/trainer_state.json CHANGED Viewed

@@ -11,7 +11,7 @@
   "log_history": [
     {
       "epoch": 0.0625,
-      "grad_norm": 5.229090624867237,
       "learning_rate": 1e-05,
       "loss": 0.553,
       "mean_token_accuracy": 0.8401249051094055,
@@ -20,28 +20,28 @@
     },
     {
       "epoch": 0.625,
-      "grad_norm": 1.8775022993488002,
       "learning_rate": 1e-05,
-      "loss": 0.443,
-      "mean_token_accuracy": 0.8661629954973856,
       "num_tokens": 401401.0,
       "step": 10
     },
     {
       "epoch": 1.25,
-      "grad_norm": 1.4054008068425676,
       "learning_rate": 1e-05,
-      "loss": 0.3346,
-      "mean_token_accuracy": 0.8969329178333283,
       "num_tokens": 799732.0,
       "step": 20
     },
     {
       "epoch": 1.875,
-      "grad_norm": 1.4215176512700638,
       "learning_rate": 1e-05,
-      "loss": 0.2071,
-      "mean_token_accuracy": 0.9320787608623504,
       "num_tokens": 1199159.0,
       "step": 30
     }

   "log_history": [
     {
       "epoch": 0.0625,
+      "grad_norm": 5.229332748167166,
       "learning_rate": 1e-05,
       "loss": 0.553,
       "mean_token_accuracy": 0.8401249051094055,
     },
     {
       "epoch": 0.625,
+      "grad_norm": 1.878449792522089,
       "learning_rate": 1e-05,
+      "loss": 0.4425,
+      "mean_token_accuracy": 0.8662860658433702,
       "num_tokens": 401401.0,
       "step": 10
     },
     {
       "epoch": 1.25,
+      "grad_norm": 1.4215514241135978,
       "learning_rate": 1e-05,
+      "loss": 0.3341,
+      "mean_token_accuracy": 0.8969074487686157,
       "num_tokens": 799732.0,
       "step": 20
     },
     {
       "epoch": 1.875,
+      "grad_norm": 1.4575405337602632,
       "learning_rate": 1e-05,
+      "loss": 0.2066,
+      "mean_token_accuracy": 0.9323108971118927,
       "num_tokens": 1199159.0,
       "step": 30
     }

checkpoint-32/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf9d951d05098026f9d17656169173d7d8769eb9865665888a43420e263ac25c
 size 8081

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c29f8fe80083b5b7c0f17189143d52e9e222441f308cf2fc22d25021e0096e3
 size 8081

checkpoint-64/model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:db3c8ded99a1c7f831a6abde4f05102c98e68e4862834c2445f4b11ec23ef0fc
 size 4976698672

 version https://git-lfs.github.com/spec/v1
+oid sha256:00fbe653cb7ebbf34086241b234f117c83033ad1a430ee2a8293c94345910a52
 size 4976698672

checkpoint-64/model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c6a8406f4665a8362c2c29ba57f3e7f67776f8009b7451cec5dbf27d5d2ad8b7
 size 4999802720

 version https://git-lfs.github.com/spec/v1
+oid sha256:e8c8c6db9f5a0f64b053d5ed412d770f46e1b21fb22921ad3cbccc9eefc9c483
 size 4999802720

checkpoint-64/model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef15c198f1a91994edb8360f3067e52d6ab09a588208105729b7ae42551a5767
 size 4915916176

 version https://git-lfs.github.com/spec/v1
+oid sha256:3e632cf3a6e8508a9d0a2ebb86f758925169f16f34c0e1a9a9f1a0b01fd45a79
 size 4915916176

checkpoint-64/model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ef2307b648f0f62172249f08b465e0de1e3ead3b3bd37538e2238759039134c3
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:29a6f46c1fa988045aac05f3c9c3c04c706b1314a062ea50f31f7a662a19c98a
 size 1168138808

checkpoint-64/trainer_state.json CHANGED Viewed

@@ -11,7 +11,7 @@
   "log_history": [
     {
       "epoch": 0.0625,
-      "grad_norm": 5.229090624867237,
       "learning_rate": 1e-05,
       "loss": 0.553,
       "mean_token_accuracy": 0.8401249051094055,
@@ -20,55 +20,55 @@
     },
     {
       "epoch": 0.625,
-      "grad_norm": 1.8775022993488002,
       "learning_rate": 1e-05,
-      "loss": 0.443,
-      "mean_token_accuracy": 0.8661629954973856,
       "num_tokens": 401401.0,
       "step": 10
     },
     {
       "epoch": 1.25,
-      "grad_norm": 1.4054008068425676,
       "learning_rate": 1e-05,
-      "loss": 0.3346,
-      "mean_token_accuracy": 0.8969329178333283,
       "num_tokens": 799732.0,
       "step": 20
     },
     {
       "epoch": 1.875,
-      "grad_norm": 1.4215176512700638,
       "learning_rate": 1e-05,
-      "loss": 0.2071,
-      "mean_token_accuracy": 0.9320787608623504,
       "num_tokens": 1199159.0,
       "step": 30
     },
     {
       "epoch": 2.5,
-      "grad_norm": 2.0853381701848304,
       "learning_rate": 1e-05,
-      "loss": 0.1034,
-      "mean_token_accuracy": 0.9672571659088135,
       "num_tokens": 1592321.0,
       "step": 40
     },
     {
       "epoch": 3.125,
-      "grad_norm": 0.9298876909455216,
       "learning_rate": 1e-05,
-      "loss": 0.0614,
-      "mean_token_accuracy": 0.9805558681488037,
       "num_tokens": 1998955.0,
       "step": 50
     },
     {
       "epoch": 3.75,
-      "grad_norm": 1.184064034490825,
       "learning_rate": 1e-05,
-      "loss": 0.0298,
-      "mean_token_accuracy": 0.9916574656963348,
       "num_tokens": 2397408.0,
       "step": 60
     }

   "log_history": [
     {
       "epoch": 0.0625,
+      "grad_norm": 5.229332748167166,
       "learning_rate": 1e-05,
       "loss": 0.553,
       "mean_token_accuracy": 0.8401249051094055,
     },
     {
       "epoch": 0.625,
+      "grad_norm": 1.878449792522089,
       "learning_rate": 1e-05,
+      "loss": 0.4425,
+      "mean_token_accuracy": 0.8662860658433702,
       "num_tokens": 401401.0,
       "step": 10
     },
     {
       "epoch": 1.25,
+      "grad_norm": 1.4215514241135978,
       "learning_rate": 1e-05,
+      "loss": 0.3341,
+      "mean_token_accuracy": 0.8969074487686157,
       "num_tokens": 799732.0,
       "step": 20
     },
     {
       "epoch": 1.875,
+      "grad_norm": 1.4575405337602632,
       "learning_rate": 1e-05,
+      "loss": 0.2066,
+      "mean_token_accuracy": 0.9323108971118927,
       "num_tokens": 1199159.0,
       "step": 30
     },
     {
       "epoch": 2.5,
+      "grad_norm": 2.103711519590209,
       "learning_rate": 1e-05,
+      "loss": 0.1032,
+      "mean_token_accuracy": 0.9672375440597534,
       "num_tokens": 1592321.0,
       "step": 40
     },
     {
       "epoch": 3.125,
+      "grad_norm": 0.8849927277663293,
       "learning_rate": 1e-05,
+      "loss": 0.0611,
+      "mean_token_accuracy": 0.9806811392307282,
       "num_tokens": 1998955.0,
       "step": 50
     },
     {
       "epoch": 3.75,
+      "grad_norm": 1.0988924501749624,
       "learning_rate": 1e-05,
+      "loss": 0.0292,
+      "mean_token_accuracy": 0.9918049573898315,
       "num_tokens": 2397408.0,
       "step": 60
     }

checkpoint-64/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf9d951d05098026f9d17656169173d7d8769eb9865665888a43420e263ac25c
 size 8081

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c29f8fe80083b5b7c0f17189143d52e9e222441f308cf2fc22d25021e0096e3
 size 8081

checkpoint-96/model-00001-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7a0492a2f49410326e0d75a2d1989a6f07f77dad77dfa79b670dbbe3602e41df
 size 4976698672

 version https://git-lfs.github.com/spec/v1
+oid sha256:6914a1438136e282d6bcf40bef64b766ed70117fe040d52f0ec393d760378169
 size 4976698672

checkpoint-96/model-00002-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc758699380eb76d476da0a770a22d75f171ae64b0e9555b21dedf207b666c91
 size 4999802720

 version https://git-lfs.github.com/spec/v1
+oid sha256:e58cf5422d1aa176b52eb65b5c6b33963574f79372067c6312a83c597116796d
 size 4999802720

checkpoint-96/model-00003-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3b414eacb245cb1340a124e23ff980846bf4c7ea14ec96cadb6647809944bc18
 size 4915916176

 version https://git-lfs.github.com/spec/v1
+oid sha256:0a74864c894aeca1872c8398d5a779c786f09ff6a99d00ed1b05366e63dd943b
 size 4915916176

checkpoint-96/model-00004-of-00004.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:76bbea69cf63298ec2e6cd765ac41f6a1fbd54d699676a9b0638abe4b2c71eac
 size 1168138808

 version https://git-lfs.github.com/spec/v1
+oid sha256:4b5469fe62b9764063be846645c926a92b9fb00883c6b5ec1f63c697e62e5ca6
 size 1168138808

checkpoint-96/trainer_state.json CHANGED Viewed

@@ -11,7 +11,7 @@
   "log_history": [
     {
       "epoch": 0.0625,
-      "grad_norm": 5.229090624867237,
       "learning_rate": 1e-05,
       "loss": 0.553,
       "mean_token_accuracy": 0.8401249051094055,
@@ -20,82 +20,82 @@
     },
     {
       "epoch": 0.625,
-      "grad_norm": 1.8775022993488002,
       "learning_rate": 1e-05,
-      "loss": 0.443,
-      "mean_token_accuracy": 0.8661629954973856,
       "num_tokens": 401401.0,
       "step": 10
     },
     {
       "epoch": 1.25,
-      "grad_norm": 1.4054008068425676,
       "learning_rate": 1e-05,
-      "loss": 0.3346,
-      "mean_token_accuracy": 0.8969329178333283,
       "num_tokens": 799732.0,
       "step": 20
     },
     {
       "epoch": 1.875,
-      "grad_norm": 1.4215176512700638,
       "learning_rate": 1e-05,
-      "loss": 0.2071,
-      "mean_token_accuracy": 0.9320787608623504,
       "num_tokens": 1199159.0,
       "step": 30
     },
     {
       "epoch": 2.5,
-      "grad_norm": 2.0853381701848304,
       "learning_rate": 1e-05,
-      "loss": 0.1034,
-      "mean_token_accuracy": 0.9672571659088135,
       "num_tokens": 1592321.0,
       "step": 40
     },
     {
       "epoch": 3.125,
-      "grad_norm": 0.9298876909455216,
       "learning_rate": 1e-05,
-      "loss": 0.0614,
-      "mean_token_accuracy": 0.9805558681488037,
       "num_tokens": 1998955.0,
       "step": 50
     },
     {
       "epoch": 3.75,
-      "grad_norm": 1.184064034490825,
       "learning_rate": 1e-05,
-      "loss": 0.0298,
-      "mean_token_accuracy": 0.9916574656963348,
       "num_tokens": 2397408.0,
       "step": 60
     },
     {
       "epoch": 4.375,
-      "grad_norm": 0.8214791537068664,
       "learning_rate": 1e-05,
-      "loss": 0.019,
-      "mean_token_accuracy": 0.9946628630161285,
       "num_tokens": 2798080.0,
       "step": 70
     },
     {
       "epoch": 5.0,
-      "grad_norm": 1.0482853792795606,
       "learning_rate": 1e-05,
-      "loss": 0.0168,
-      "mean_token_accuracy": 0.9956359148025513,
       "num_tokens": 3199940.0,
       "step": 80
     },
     {
       "epoch": 5.625,
-      "grad_norm": 0.8533738215069255,
       "learning_rate": 1e-05,
-      "loss": 0.0088,
-      "mean_token_accuracy": 0.9977155506610871,
       "num_tokens": 3601545.0,
       "step": 90
     }

   "log_history": [
     {
       "epoch": 0.0625,
+      "grad_norm": 5.229332748167166,
       "learning_rate": 1e-05,
       "loss": 0.553,
       "mean_token_accuracy": 0.8401249051094055,
     },
     {
       "epoch": 0.625,
+      "grad_norm": 1.878449792522089,
       "learning_rate": 1e-05,
+      "loss": 0.4425,
+      "mean_token_accuracy": 0.8662860658433702,
       "num_tokens": 401401.0,
       "step": 10
     },
     {
       "epoch": 1.25,
+      "grad_norm": 1.4215514241135978,
       "learning_rate": 1e-05,
+      "loss": 0.3341,
+      "mean_token_accuracy": 0.8969074487686157,
       "num_tokens": 799732.0,
       "step": 20
     },
     {
       "epoch": 1.875,
+      "grad_norm": 1.4575405337602632,
       "learning_rate": 1e-05,
+      "loss": 0.2066,
+      "mean_token_accuracy": 0.9323108971118927,
       "num_tokens": 1199159.0,
       "step": 30
     },
     {
       "epoch": 2.5,
+      "grad_norm": 2.103711519590209,
       "learning_rate": 1e-05,
+      "loss": 0.1032,
+      "mean_token_accuracy": 0.9672375440597534,
       "num_tokens": 1592321.0,
       "step": 40
     },
     {
       "epoch": 3.125,
+      "grad_norm": 0.8849927277663293,
       "learning_rate": 1e-05,
+      "loss": 0.0611,
+      "mean_token_accuracy": 0.9806811392307282,
       "num_tokens": 1998955.0,
       "step": 50
     },
     {
       "epoch": 3.75,
+      "grad_norm": 1.0988924501749624,
       "learning_rate": 1e-05,
+      "loss": 0.0292,
+      "mean_token_accuracy": 0.9918049573898315,
       "num_tokens": 2397408.0,
       "step": 60
     },
     {
       "epoch": 4.375,
+      "grad_norm": 0.9209838605387056,
       "learning_rate": 1e-05,
+      "loss": 0.0197,
+      "mean_token_accuracy": 0.9946328461170196,
       "num_tokens": 2798080.0,
       "step": 70
     },
     {
       "epoch": 5.0,
+      "grad_norm": 1.145881672413454,
       "learning_rate": 1e-05,
+      "loss": 0.0184,
+      "mean_token_accuracy": 0.9954161286354065,
       "num_tokens": 3199940.0,
       "step": 80
     },
     {
       "epoch": 5.625,
+      "grad_norm": 0.8760509498406688,
       "learning_rate": 1e-05,
+      "loss": 0.0094,
+      "mean_token_accuracy": 0.9976460933685303,
       "num_tokens": 3601545.0,
       "step": 90
     }

checkpoint-96/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:bf9d951d05098026f9d17656169173d7d8769eb9865665888a43420e263ac25c
 size 8081

 version https://git-lfs.github.com/spec/v1
+oid sha256:1c29f8fe80083b5b7c0f17189143d52e9e222441f308cf2fc22d25021e0096e3
 size 8081