apriasmoro commited on
Commit
a8fa959
·
verified ·
1 Parent(s): a593a63

Training in progress, step 1250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fd9927983a51bdbbbe3f6a896496fe7f34e36c8366b36cedd986c06700a1248
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5cca72daa84dd619b2d0f5827f7875f6225bf21230ec28901571741fe05a484
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:674ddc9dff63c1585f454a1014265dde25478368a5b7c4b9ac6e1d5ad8c4bf20
3
  size 162934501
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c69a2000a2d781aab5e4c2150091fa45f58d2247fcba867430eea8fe19576ed3
3
  size 162934501
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13672cd0f406fcf673996bd62cd145a1b4a99e9392245660dc5eedeaace38826
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b02196a412b7b1bbb8e243002c719eb4ea8c8114bf5d1aa4efe847cee36bb5d6
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.05959475566150179,
6
  "eval_steps": 500,
7
- "global_step": 1200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1823,6 +1823,81 @@
1823
  "rewards/margins": 1.0934114456176758,
1824
  "rewards/rejected": -171.5682373046875,
1825
  "step": 1200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1826
  }
1827
  ],
1828
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.06207787048073103,
6
  "eval_steps": 500,
7
+ "global_step": 1250,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1823
  "rewards/margins": 1.0934114456176758,
1824
  "rewards/rejected": -171.5682373046875,
1825
  "step": 1200
1826
+ },
1827
+ {
1828
+ "epoch": 0.060091378625347636,
1829
+ "grad_norm": 18.099149703979492,
1830
+ "learning_rate": 0.00019892271922674633,
1831
+ "logits/chosen": -2.4598028659820557,
1832
+ "logits/rejected": -2.528168201446533,
1833
+ "logps/chosen": -1820.162109375,
1834
+ "logps/rejected": -1815.075927734375,
1835
+ "loss": 9.8981,
1836
+ "rewards/accuracies": 0.4000000059604645,
1837
+ "rewards/chosen": -164.43930053710938,
1838
+ "rewards/margins": -1.1209890842437744,
1839
+ "rewards/rejected": -163.31829833984375,
1840
+ "step": 1210
1841
+ },
1842
+ {
1843
+ "epoch": 0.06058800158919348,
1844
+ "grad_norm": 38.524173736572266,
1845
+ "learning_rate": 0.0001989012993586112,
1846
+ "logits/chosen": -2.442213535308838,
1847
+ "logits/rejected": -2.491849422454834,
1848
+ "logps/chosen": -1803.8734130859375,
1849
+ "logps/rejected": -1723.594970703125,
1850
+ "loss": 15.0076,
1851
+ "rewards/accuracies": 0.33125001192092896,
1852
+ "rewards/chosen": -163.45181274414062,
1853
+ "rewards/margins": -8.174293518066406,
1854
+ "rewards/rejected": -155.27752685546875,
1855
+ "step": 1220
1856
+ },
1857
+ {
1858
+ "epoch": 0.061084624553039334,
1859
+ "grad_norm": 15.730033874511719,
1860
+ "learning_rate": 0.00019887966980912841,
1861
+ "logits/chosen": -2.4136605262756348,
1862
+ "logits/rejected": -2.4654335975646973,
1863
+ "logps/chosen": -1924.3226318359375,
1864
+ "logps/rejected": -1878.5234375,
1865
+ "loss": 12.3718,
1866
+ "rewards/accuracies": 0.41874998807907104,
1867
+ "rewards/chosen": -168.52288818359375,
1868
+ "rewards/margins": -3.0252227783203125,
1869
+ "rewards/rejected": -165.49766540527344,
1870
+ "step": 1230
1871
+ },
1872
+ {
1873
+ "epoch": 0.06158124751688518,
1874
+ "grad_norm": 18.012292861938477,
1875
+ "learning_rate": 0.00019885783062415503,
1876
+ "logits/chosen": -2.262502431869507,
1877
+ "logits/rejected": -2.30684757232666,
1878
+ "logps/chosen": -1802.6370849609375,
1879
+ "logps/rejected": -1747.916748046875,
1880
+ "loss": 15.6914,
1881
+ "rewards/accuracies": 0.3375000059604645,
1882
+ "rewards/chosen": -159.5081024169922,
1883
+ "rewards/margins": -6.31906795501709,
1884
+ "rewards/rejected": -153.18905639648438,
1885
+ "step": 1240
1886
+ },
1887
+ {
1888
+ "epoch": 0.06207787048073103,
1889
+ "grad_norm": 31.46440315246582,
1890
+ "learning_rate": 0.00019883578184999237,
1891
+ "logits/chosen": -2.357743978500366,
1892
+ "logits/rejected": -2.4041171073913574,
1893
+ "logps/chosen": -1812.1968994140625,
1894
+ "logps/rejected": -1822.2720947265625,
1895
+ "loss": 11.4807,
1896
+ "rewards/accuracies": 0.35624998807907104,
1897
+ "rewards/chosen": -160.29063415527344,
1898
+ "rewards/margins": -0.3494691848754883,
1899
+ "rewards/rejected": -159.94117736816406,
1900
+ "step": 1250
1901
  }
1902
  ],
1903
  "logging_steps": 10,