Model save

Files changed (7) hide show

README.md CHANGED Viewed

@@ -27,7 +27,7 @@ print(output["generated_text"])
 ## Training procedure
-[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yuchenl4/lmpref/runs/ultrafeedbackSkyworkAgree_alignmentZephyr7BSftFull_sdpo_score_ebs32_lr1e-06_1try1ZHsE4tnJxz0EKaVTK0TO302Ac4wIMwlOZee9lIQLEm242A)
 This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).

 ## Training procedure
+[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yuchenl4/lmpref/runs/ultrafeedbackSkyworkAgree_alignmentZephyr7BSftFull_sdpo_score_ebs32_lr1e-06_1try1ACWwU9wCOAj5dnWp5hi3dDji94EhGf9cOJhtH5MkAxuwSm)
 This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).

all_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 1.0,
     "total_flos": 0.0,
-    "train_loss": 0.4004642491922472,
-    "train_runtime": 32844.8806,
     "train_samples": 45608,
-    "train_samples_per_second": 1.389,
     "train_steps_per_second": 0.043
 }

 {
     "epoch": 1.0,
     "total_flos": 0.0,
+    "train_loss": 0.40748132328498915,
+    "train_runtime": 33297.3388,
     "train_samples": 45608,
+    "train_samples_per_second": 1.37,
     "train_steps_per_second": 0.043
 }

model-00001-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:29959acbfed77235989e1aef658e532cffb880ee969f9e159bb8c3df305b5b65
 size 4943162336

 version https://git-lfs.github.com/spec/v1
+oid sha256:c5b54cc7b0a052ca95ad5a141d3b64e73a77fa5988939965071186feebe991e1
 size 4943162336

model-00002-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:20f0a3d18ef2558a7ccebba38f5e5c5c38b714dd8b0423ea163b681d77476915
 size 4999819336

 version https://git-lfs.github.com/spec/v1
+oid sha256:72ad533d94c729abe1f8b16f0649bb03617efa3a75c431404f5948f7a5d93ae3
 size 4999819336

model-00003-of-00003.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:57cde4d07887621fc8e6c5db81cca59a21654b923704d92a0171060af2ef7bf3
 size 4540516344

 version https://git-lfs.github.com/spec/v1
+oid sha256:c118c90d91d39815dc9af806532e28cd75b19015b6e72328c331d7cea16da14e
 size 4540516344

train_results.json CHANGED Viewed

@@ -1,9 +1,9 @@
 {
     "epoch": 1.0,
     "total_flos": 0.0,
-    "train_loss": 0.4004642491922472,
-    "train_runtime": 32844.8806,
     "train_samples": 45608,
-    "train_samples_per_second": 1.389,
     "train_steps_per_second": 0.043
 }

 {
     "epoch": 1.0,
     "total_flos": 0.0,
+    "train_loss": 0.40748132328498915,
+    "train_runtime": 33297.3388,
     "train_samples": 45608,
+    "train_samples_per_second": 1.37,
     "train_steps_per_second": 0.043
 }

trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff