Model save
Browse files- README.md +1 -1
- all_results.json +4 -4
- model-00001-of-00003.safetensors +1 -1
- model-00002-of-00003.safetensors +1 -1
- model-00003-of-00003.safetensors +1 -1
- train_results.json +4 -4
- trainer_state.json +0 -0
README.md
CHANGED
|
@@ -27,7 +27,7 @@ print(output["generated_text"])
|
|
| 27 |
|
| 28 |
## Training procedure
|
| 29 |
|
| 30 |
-
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yuchenl4/lmpref/runs/ultrafeedbackSkyworkAgree_alignmentZephyr7BSftFull_sdpo_score_ebs64_lr1e-
|
| 31 |
|
| 32 |
This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
|
| 33 |
|
|
|
|
| 27 |
|
| 28 |
## Training procedure
|
| 29 |
|
| 30 |
+
[<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/yuchenl4/lmpref/runs/ultrafeedbackSkyworkAgree_alignmentZephyr7BSftFull_sdpo_score_ebs64_lr1e-07_4try1pCwYialQsfnLoJ6piXazmqyhPlDLaw9xoT2IXIGYvg4NxT)
|
| 31 |
|
| 32 |
This model was trained with DPO, a method introduced in [Direct Preference Optimization: Your Language Model is Secretly a Reward Model](https://huggingface.co/papers/2305.18290).
|
| 33 |
|
all_results.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 1.0,
|
| 3 |
"total_flos": 0.0,
|
| 4 |
-
"train_loss": 0.
|
| 5 |
-
"train_runtime":
|
| 6 |
"train_samples": 45608,
|
| 7 |
-
"train_samples_per_second": 1.
|
| 8 |
-
"train_steps_per_second": 0.
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 1.0,
|
| 3 |
"total_flos": 0.0,
|
| 4 |
+
"train_loss": 0.5639231549406118,
|
| 5 |
+
"train_runtime": 32150.7936,
|
| 6 |
"train_samples": 45608,
|
| 7 |
+
"train_samples_per_second": 1.419,
|
| 8 |
+
"train_steps_per_second": 0.022
|
| 9 |
}
|
model-00001-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4943162336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:11ef37c0ef09654331a9509f92be7f1d23e39d3516d4a25640138dc07456a79b
|
| 3 |
size 4943162336
|
model-00002-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4999819336
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b40d36216db71164cd440e7aa60cc51fd07477cb02fe8dbbc97680a985c1fa1f
|
| 3 |
size 4999819336
|
model-00003-of-00003.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 4540516344
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e2cfdf590025470c817824f065815f85bdf0876b0a3089f50eddce7dbd49272
|
| 3 |
size 4540516344
|
train_results.json
CHANGED
|
@@ -1,9 +1,9 @@
|
|
| 1 |
{
|
| 2 |
"epoch": 1.0,
|
| 3 |
"total_flos": 0.0,
|
| 4 |
-
"train_loss": 0.
|
| 5 |
-
"train_runtime":
|
| 6 |
"train_samples": 45608,
|
| 7 |
-
"train_samples_per_second": 1.
|
| 8 |
-
"train_steps_per_second": 0.
|
| 9 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"epoch": 1.0,
|
| 3 |
"total_flos": 0.0,
|
| 4 |
+
"train_loss": 0.5639231549406118,
|
| 5 |
+
"train_runtime": 32150.7936,
|
| 6 |
"train_samples": 45608,
|
| 7 |
+
"train_samples_per_second": 1.419,
|
| 8 |
+
"train_steps_per_second": 0.022
|
| 9 |
}
|
trainer_state.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|