Training in progress, step 1200, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 319876032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58e84e33b10cbbb24ba1662f22f7320ae2ca58b43ff8e8ae917cf116fc02f200
|
| 3 |
size 319876032
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 162934501
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccf8419f1ca2bff65f49e54850fabd0198bb85d3259b300b99d840aacfc85037
|
| 3 |
size 162934501
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e36620b0d3c2a332d42c0e242e0afe41f2692002d485895a0199c7f0b34e433
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1748,6 +1748,81 @@
|
|
| 1748 |
"rewards/margins": -21.650144577026367,
|
| 1749 |
"rewards/rejected": -189.57769775390625,
|
| 1750 |
"step": 1150
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1751 |
}
|
| 1752 |
],
|
| 1753 |
"logging_steps": 10,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.07537214999057848,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 1200,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1748 |
"rewards/margins": -21.650144577026367,
|
| 1749 |
"rewards/rejected": -189.57769775390625,
|
| 1750 |
"step": 1150
|
| 1751 |
+
},
|
| 1752 |
+
{
|
| 1753 |
+
"epoch": 0.07285974499089254,
|
| 1754 |
+
"grad_norm": 53.36018371582031,
|
| 1755 |
+
"learning_rate": 0.00019859878751505834,
|
| 1756 |
+
"logits/chosen": -2.3769264221191406,
|
| 1757 |
+
"logits/rejected": -2.3773722648620605,
|
| 1758 |
+
"logps/chosen": -2328.4267578125,
|
| 1759 |
+
"logps/rejected": -2146.54541015625,
|
| 1760 |
+
"loss": 35.5288,
|
| 1761 |
+
"rewards/accuracies": 0.45625001192092896,
|
| 1762 |
+
"rewards/chosen": -208.66061401367188,
|
| 1763 |
+
"rewards/margins": -17.51542854309082,
|
| 1764 |
+
"rewards/rejected": -191.1452178955078,
|
| 1765 |
+
"step": 1160
|
| 1766 |
+
},
|
| 1767 |
+
{
|
| 1768 |
+
"epoch": 0.07348784624081402,
|
| 1769 |
+
"grad_norm": 23.861251831054688,
|
| 1770 |
+
"learning_rate": 0.00019856948302398827,
|
| 1771 |
+
"logits/chosen": -2.2911629676818848,
|
| 1772 |
+
"logits/rejected": -2.291214942932129,
|
| 1773 |
+
"logps/chosen": -2545.89599609375,
|
| 1774 |
+
"logps/rejected": -2199.31396484375,
|
| 1775 |
+
"loss": 46.9012,
|
| 1776 |
+
"rewards/accuracies": 0.3687500059604645,
|
| 1777 |
+
"rewards/chosen": -227.5490264892578,
|
| 1778 |
+
"rewards/margins": -32.629981994628906,
|
| 1779 |
+
"rewards/rejected": -194.91903686523438,
|
| 1780 |
+
"step": 1170
|
| 1781 |
+
},
|
| 1782 |
+
{
|
| 1783 |
+
"epoch": 0.07411594749073551,
|
| 1784 |
+
"grad_norm": 111.14179229736328,
|
| 1785 |
+
"learning_rate": 0.00019853987747088186,
|
| 1786 |
+
"logits/chosen": -2.3458666801452637,
|
| 1787 |
+
"logits/rejected": -2.3459057807922363,
|
| 1788 |
+
"logps/chosen": -2504.62841796875,
|
| 1789 |
+
"logps/rejected": -2294.13037109375,
|
| 1790 |
+
"loss": 40.86,
|
| 1791 |
+
"rewards/accuracies": 0.3812499940395355,
|
| 1792 |
+
"rewards/chosen": -225.32571411132812,
|
| 1793 |
+
"rewards/margins": -19.68381118774414,
|
| 1794 |
+
"rewards/rejected": -205.64187622070312,
|
| 1795 |
+
"step": 1180
|
| 1796 |
+
},
|
| 1797 |
+
{
|
| 1798 |
+
"epoch": 0.07474404874065699,
|
| 1799 |
+
"grad_norm": 44.11448669433594,
|
| 1800 |
+
"learning_rate": 0.00019850997094616375,
|
| 1801 |
+
"logits/chosen": -2.415238857269287,
|
| 1802 |
+
"logits/rejected": -2.415276288986206,
|
| 1803 |
+
"logps/chosen": -2614.903564453125,
|
| 1804 |
+
"logps/rejected": -2364.593017578125,
|
| 1805 |
+
"loss": 39.6064,
|
| 1806 |
+
"rewards/accuracies": 0.3812499940395355,
|
| 1807 |
+
"rewards/chosen": -234.325439453125,
|
| 1808 |
+
"rewards/margins": -23.00688934326172,
|
| 1809 |
+
"rewards/rejected": -211.3185272216797,
|
| 1810 |
+
"step": 1190
|
| 1811 |
+
},
|
| 1812 |
+
{
|
| 1813 |
+
"epoch": 0.07537214999057848,
|
| 1814 |
+
"grad_norm": 12.600385665893555,
|
| 1815 |
+
"learning_rate": 0.00019847976354117782,
|
| 1816 |
+
"logits/chosen": -2.3000645637512207,
|
| 1817 |
+
"logits/rejected": -2.300097942352295,
|
| 1818 |
+
"logps/chosen": -2576.286865234375,
|
| 1819 |
+
"logps/rejected": -2298.492919921875,
|
| 1820 |
+
"loss": 44.5961,
|
| 1821 |
+
"rewards/accuracies": 0.4124999940395355,
|
| 1822 |
+
"rewards/chosen": -230.34500122070312,
|
| 1823 |
+
"rewards/margins": -25.56293296813965,
|
| 1824 |
+
"rewards/rejected": -204.7820587158203,
|
| 1825 |
+
"step": 1200
|
| 1826 |
}
|
| 1827 |
],
|
| 1828 |
"logging_steps": 10,
|