apriasmoro commited on
Commit
651f5c3
·
verified ·
1 Parent(s): 4824868

Training in progress, step 1200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a929d3820cc1c1b87aaa3a19c3151498709177752334961b9baea187ddadb3d2
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e84e33b10cbbb24ba1662f22f7320ae2ca58b43ff8e8ae917cf116fc02f200
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:599b6cf815824832480a957835de20f2fb6f9fe2a51fece8ad90ddab79816071
3
  size 162934501
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccf8419f1ca2bff65f49e54850fabd0198bb85d3259b300b99d840aacfc85037
3
  size 162934501
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37e03f7c5107969cc1329932dd9cfeb378051a709bd5fa18e9c0569938b2a7c7
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e36620b0d3c2a332d42c0e242e0afe41f2692002d485895a0199c7f0b34e433
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.07223164374097105,
6
  "eval_steps": 500,
7
- "global_step": 1150,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -1748,6 +1748,81 @@
1748
  "rewards/margins": -21.650144577026367,
1749
  "rewards/rejected": -189.57769775390625,
1750
  "step": 1150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1751
  }
1752
  ],
1753
  "logging_steps": 10,
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.07537214999057848,
6
  "eval_steps": 500,
7
+ "global_step": 1200,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
1748
  "rewards/margins": -21.650144577026367,
1749
  "rewards/rejected": -189.57769775390625,
1750
  "step": 1150
1751
+ },
1752
+ {
1753
+ "epoch": 0.07285974499089254,
1754
+ "grad_norm": 53.36018371582031,
1755
+ "learning_rate": 0.00019859878751505834,
1756
+ "logits/chosen": -2.3769264221191406,
1757
+ "logits/rejected": -2.3773722648620605,
1758
+ "logps/chosen": -2328.4267578125,
1759
+ "logps/rejected": -2146.54541015625,
1760
+ "loss": 35.5288,
1761
+ "rewards/accuracies": 0.45625001192092896,
1762
+ "rewards/chosen": -208.66061401367188,
1763
+ "rewards/margins": -17.51542854309082,
1764
+ "rewards/rejected": -191.1452178955078,
1765
+ "step": 1160
1766
+ },
1767
+ {
1768
+ "epoch": 0.07348784624081402,
1769
+ "grad_norm": 23.861251831054688,
1770
+ "learning_rate": 0.00019856948302398827,
1771
+ "logits/chosen": -2.2911629676818848,
1772
+ "logits/rejected": -2.291214942932129,
1773
+ "logps/chosen": -2545.89599609375,
1774
+ "logps/rejected": -2199.31396484375,
1775
+ "loss": 46.9012,
1776
+ "rewards/accuracies": 0.3687500059604645,
1777
+ "rewards/chosen": -227.5490264892578,
1778
+ "rewards/margins": -32.629981994628906,
1779
+ "rewards/rejected": -194.91903686523438,
1780
+ "step": 1170
1781
+ },
1782
+ {
1783
+ "epoch": 0.07411594749073551,
1784
+ "grad_norm": 111.14179229736328,
1785
+ "learning_rate": 0.00019853987747088186,
1786
+ "logits/chosen": -2.3458666801452637,
1787
+ "logits/rejected": -2.3459057807922363,
1788
+ "logps/chosen": -2504.62841796875,
1789
+ "logps/rejected": -2294.13037109375,
1790
+ "loss": 40.86,
1791
+ "rewards/accuracies": 0.3812499940395355,
1792
+ "rewards/chosen": -225.32571411132812,
1793
+ "rewards/margins": -19.68381118774414,
1794
+ "rewards/rejected": -205.64187622070312,
1795
+ "step": 1180
1796
+ },
1797
+ {
1798
+ "epoch": 0.07474404874065699,
1799
+ "grad_norm": 44.11448669433594,
1800
+ "learning_rate": 0.00019850997094616375,
1801
+ "logits/chosen": -2.415238857269287,
1802
+ "logits/rejected": -2.415276288986206,
1803
+ "logps/chosen": -2614.903564453125,
1804
+ "logps/rejected": -2364.593017578125,
1805
+ "loss": 39.6064,
1806
+ "rewards/accuracies": 0.3812499940395355,
1807
+ "rewards/chosen": -234.325439453125,
1808
+ "rewards/margins": -23.00688934326172,
1809
+ "rewards/rejected": -211.3185272216797,
1810
+ "step": 1190
1811
+ },
1812
+ {
1813
+ "epoch": 0.07537214999057848,
1814
+ "grad_norm": 12.600385665893555,
1815
+ "learning_rate": 0.00019847976354117782,
1816
+ "logits/chosen": -2.3000645637512207,
1817
+ "logits/rejected": -2.300097942352295,
1818
+ "logps/chosen": -2576.286865234375,
1819
+ "logps/rejected": -2298.492919921875,
1820
+ "loss": 44.5961,
1821
+ "rewards/accuracies": 0.4124999940395355,
1822
+ "rewards/chosen": -230.34500122070312,
1823
+ "rewards/margins": -25.56293296813965,
1824
+ "rewards/rejected": -204.7820587158203,
1825
+ "step": 1200
1826
  }
1827
  ],
1828
  "logging_steps": 10,