Training in progress, step 1250, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 319876032
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5cca72daa84dd619b2d0f5827f7875f6225bf21230ec28901571741fe05a484
|
| 3 |
size 319876032
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 162934501
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c69a2000a2d781aab5e4c2150091fa45f58d2247fcba867430eea8fe19576ed3
|
| 3 |
size 162934501
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1465
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b02196a412b7b1bbb8e243002c719eb4ea8c8114bf5d1aa4efe847cee36bb5d6
|
| 3 |
size 1465
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -2,9 +2,9 @@
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
-
"epoch": 0.
|
| 6 |
"eval_steps": 500,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -1823,6 +1823,81 @@
|
|
| 1823 |
"rewards/margins": 1.0934114456176758,
|
| 1824 |
"rewards/rejected": -171.5682373046875,
|
| 1825 |
"step": 1200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1826 |
}
|
| 1827 |
],
|
| 1828 |
"logging_steps": 10,
|
|
|
|
| 2 |
"best_global_step": null,
|
| 3 |
"best_metric": null,
|
| 4 |
"best_model_checkpoint": null,
|
| 5 |
+
"epoch": 0.06207787048073103,
|
| 6 |
"eval_steps": 500,
|
| 7 |
+
"global_step": 1250,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 1823 |
"rewards/margins": 1.0934114456176758,
|
| 1824 |
"rewards/rejected": -171.5682373046875,
|
| 1825 |
"step": 1200
|
| 1826 |
+
},
|
| 1827 |
+
{
|
| 1828 |
+
"epoch": 0.060091378625347636,
|
| 1829 |
+
"grad_norm": 18.099149703979492,
|
| 1830 |
+
"learning_rate": 0.00019892271922674633,
|
| 1831 |
+
"logits/chosen": -2.4598028659820557,
|
| 1832 |
+
"logits/rejected": -2.528168201446533,
|
| 1833 |
+
"logps/chosen": -1820.162109375,
|
| 1834 |
+
"logps/rejected": -1815.075927734375,
|
| 1835 |
+
"loss": 9.8981,
|
| 1836 |
+
"rewards/accuracies": 0.4000000059604645,
|
| 1837 |
+
"rewards/chosen": -164.43930053710938,
|
| 1838 |
+
"rewards/margins": -1.1209890842437744,
|
| 1839 |
+
"rewards/rejected": -163.31829833984375,
|
| 1840 |
+
"step": 1210
|
| 1841 |
+
},
|
| 1842 |
+
{
|
| 1843 |
+
"epoch": 0.06058800158919348,
|
| 1844 |
+
"grad_norm": 38.524173736572266,
|
| 1845 |
+
"learning_rate": 0.0001989012993586112,
|
| 1846 |
+
"logits/chosen": -2.442213535308838,
|
| 1847 |
+
"logits/rejected": -2.491849422454834,
|
| 1848 |
+
"logps/chosen": -1803.8734130859375,
|
| 1849 |
+
"logps/rejected": -1723.594970703125,
|
| 1850 |
+
"loss": 15.0076,
|
| 1851 |
+
"rewards/accuracies": 0.33125001192092896,
|
| 1852 |
+
"rewards/chosen": -163.45181274414062,
|
| 1853 |
+
"rewards/margins": -8.174293518066406,
|
| 1854 |
+
"rewards/rejected": -155.27752685546875,
|
| 1855 |
+
"step": 1220
|
| 1856 |
+
},
|
| 1857 |
+
{
|
| 1858 |
+
"epoch": 0.061084624553039334,
|
| 1859 |
+
"grad_norm": 15.730033874511719,
|
| 1860 |
+
"learning_rate": 0.00019887966980912841,
|
| 1861 |
+
"logits/chosen": -2.4136605262756348,
|
| 1862 |
+
"logits/rejected": -2.4654335975646973,
|
| 1863 |
+
"logps/chosen": -1924.3226318359375,
|
| 1864 |
+
"logps/rejected": -1878.5234375,
|
| 1865 |
+
"loss": 12.3718,
|
| 1866 |
+
"rewards/accuracies": 0.41874998807907104,
|
| 1867 |
+
"rewards/chosen": -168.52288818359375,
|
| 1868 |
+
"rewards/margins": -3.0252227783203125,
|
| 1869 |
+
"rewards/rejected": -165.49766540527344,
|
| 1870 |
+
"step": 1230
|
| 1871 |
+
},
|
| 1872 |
+
{
|
| 1873 |
+
"epoch": 0.06158124751688518,
|
| 1874 |
+
"grad_norm": 18.012292861938477,
|
| 1875 |
+
"learning_rate": 0.00019885783062415503,
|
| 1876 |
+
"logits/chosen": -2.262502431869507,
|
| 1877 |
+
"logits/rejected": -2.30684757232666,
|
| 1878 |
+
"logps/chosen": -1802.6370849609375,
|
| 1879 |
+
"logps/rejected": -1747.916748046875,
|
| 1880 |
+
"loss": 15.6914,
|
| 1881 |
+
"rewards/accuracies": 0.3375000059604645,
|
| 1882 |
+
"rewards/chosen": -159.5081024169922,
|
| 1883 |
+
"rewards/margins": -6.31906795501709,
|
| 1884 |
+
"rewards/rejected": -153.18905639648438,
|
| 1885 |
+
"step": 1240
|
| 1886 |
+
},
|
| 1887 |
+
{
|
| 1888 |
+
"epoch": 0.06207787048073103,
|
| 1889 |
+
"grad_norm": 31.46440315246582,
|
| 1890 |
+
"learning_rate": 0.00019883578184999237,
|
| 1891 |
+
"logits/chosen": -2.357743978500366,
|
| 1892 |
+
"logits/rejected": -2.4041171073913574,
|
| 1893 |
+
"logps/chosen": -1812.1968994140625,
|
| 1894 |
+
"logps/rejected": -1822.2720947265625,
|
| 1895 |
+
"loss": 11.4807,
|
| 1896 |
+
"rewards/accuracies": 0.35624998807907104,
|
| 1897 |
+
"rewards/chosen": -160.29063415527344,
|
| 1898 |
+
"rewards/margins": -0.3494691848754883,
|
| 1899 |
+
"rewards/rejected": -159.94117736816406,
|
| 1900 |
+
"step": 1250
|
| 1901 |
}
|
| 1902 |
],
|
| 1903 |
"logging_steps": 10,
|