mehuldamani commited on
Commit
4fa500b
·
verified ·
1 Parent(s): 0c6456d

Model save

Browse files
README.md CHANGED
@@ -27,7 +27,7 @@ print(output["generated_text"])
27
 
28
  ## Training procedure
29
 
30
- [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/mehuldamani/grpo/runs/05z5vzuu)
31
 
32
 
33
  This model was trained with SFT.
 
27
 
28
  ## Training procedure
29
 
30
+ [<img src="https://raw.githubusercontent.com/wandb/assets/main/wandb-github-badge-28.svg" alt="Visualize in Weights & Biases" width="150" height="24"/>](https://wandb.ai/mehuldamani/grpo/runs/6zd8lzhq)
31
 
32
 
33
  This model was trained with SFT.
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "total_flos": 3.4831312270239744e+16,
3
- "train_loss": 0.5093159944780411,
4
- "train_runtime": 190.1381,
5
  "train_samples": 500,
6
- "train_samples_per_second": 2.63,
7
- "train_steps_per_second": 0.163
8
  }
 
1
  {
2
+ "total_flos": 3.478922794514227e+16,
3
+ "train_loss": 0.5009717364465037,
4
+ "train_runtime": 188.6962,
5
  "train_samples": 500,
6
+ "train_samples_per_second": 2.65,
7
+ "train_steps_per_second": 0.164
8
  }
model-00001-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05df9af5bbcc1fc4485237adf9b59fafee3503ca6472022d290d0877364bd118
3
  size 4877660776
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3a937840bb41d58c6831fd1ab4cc2730322968e3acba561a8f0a43213d1514a
3
  size 4877660776
model-00002-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:735ea284946ed2bbe6c1d7630edd5f35288d8fd9f32ed1f197bf4b981387e8cd
3
  size 4932751008
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:824041223d77709dde94551d419b903ec85bce195d9f471d2982d64fc49f87e1
3
  size 4932751008
model-00003-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:586f47df3b7eefbb6a1ba768e09bb2acc46ac93eafbfee29c3229f45c0918944
3
  size 4330865200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:257ff2712398fba61ff1234263fb3cf5770c9a8a908b9e3085aa22931e1b6f87
3
  size 4330865200
model-00004-of-00004.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47744af40e9a9dba93296005eab6fa0fc73f49acec96f6f0e57fd524bce9530c
3
  size 1089994880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7b39b048f665379c405a5678c6d82cb40c21ced641dbf38c7ea1d70348a2ad8
3
  size 1089994880
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "total_flos": 3.4831312270239744e+16,
3
- "train_loss": 0.5093159944780411,
4
- "train_runtime": 190.1381,
5
  "train_samples": 500,
6
- "train_samples_per_second": 2.63,
7
- "train_steps_per_second": 0.163
8
  }
 
1
  {
2
+ "total_flos": 3.478922794514227e+16,
3
+ "train_loss": 0.5009717364465037,
4
+ "train_runtime": 188.6962,
5
  "train_samples": 500,
6
+ "train_samples_per_second": 2.65,
7
+ "train_steps_per_second": 0.164
8
  }
trainer_state.json CHANGED
@@ -10,88 +10,88 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.16,
13
- "grad_norm": 80.5,
14
  "learning_rate": 1e-05,
15
- "loss": 0.8016,
16
- "mean_token_accuracy": 0.7859388060867787,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.32,
21
- "grad_norm": 5.90625,
22
  "learning_rate": 1e-05,
23
- "loss": 0.4746,
24
- "mean_token_accuracy": 0.85956239849329,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 0.32,
29
- "eval_loss": 0.5104641914367676,
30
- "eval_mean_token_accuracy": 0.8601989203160352,
31
- "eval_runtime": 8.1194,
32
- "eval_samples_per_second": 12.439,
33
- "eval_steps_per_second": 12.439,
34
  "step": 10
35
  },
36
  {
37
  "epoch": 0.48,
38
- "grad_norm": 47.0,
39
  "learning_rate": 1e-05,
40
- "loss": 0.4803,
41
- "mean_token_accuracy": 0.855245703458786,
42
  "step": 15
43
  },
44
  {
45
  "epoch": 0.64,
46
- "grad_norm": 1.1875,
47
  "learning_rate": 1e-05,
48
- "loss": 0.3854,
49
- "mean_token_accuracy": 0.8733080595731735,
50
  "step": 20
51
  },
52
  {
53
  "epoch": 0.64,
54
- "eval_loss": 0.44995084404945374,
55
- "eval_mean_token_accuracy": 0.8694882983028298,
56
- "eval_runtime": 8.144,
57
- "eval_samples_per_second": 12.402,
58
- "eval_steps_per_second": 12.402,
59
  "step": 20
60
  },
61
  {
62
  "epoch": 0.8,
63
- "grad_norm": 1.4609375,
64
  "learning_rate": 1e-05,
65
- "loss": 0.4753,
66
- "mean_token_accuracy": 0.8553871914744378,
67
  "step": 25
68
  },
69
  {
70
  "epoch": 0.96,
71
- "grad_norm": 1.578125,
72
  "learning_rate": 1e-05,
73
- "loss": 0.4362,
74
- "mean_token_accuracy": 0.863992878049612,
75
  "step": 30
76
  },
77
  {
78
  "epoch": 0.96,
79
- "eval_loss": 0.4244144558906555,
80
- "eval_mean_token_accuracy": 0.8749614068777254,
81
- "eval_runtime": 8.1573,
82
- "eval_samples_per_second": 12.382,
83
- "eval_steps_per_second": 12.382,
84
  "step": 30
85
  },
86
  {
87
  "epoch": 0.992,
88
- "mean_token_accuracy": 0.8452935963869095,
89
  "step": 31,
90
- "total_flos": 3.4831312270239744e+16,
91
- "train_loss": 0.5093159944780411,
92
- "train_runtime": 190.1381,
93
- "train_samples_per_second": 2.63,
94
- "train_steps_per_second": 0.163
95
  }
96
  ],
97
  "logging_steps": 5,
@@ -111,7 +111,7 @@
111
  "attributes": {}
112
  }
113
  },
114
- "total_flos": 3.4831312270239744e+16,
115
  "train_batch_size": 1,
116
  "trial_name": null,
117
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.16,
13
+ "grad_norm": 72.5,
14
  "learning_rate": 1e-05,
15
+ "loss": 0.791,
16
+ "mean_token_accuracy": 0.7902325183153153,
17
  "step": 5
18
  },
19
  {
20
  "epoch": 0.32,
21
+ "grad_norm": 2.78125,
22
  "learning_rate": 1e-05,
23
+ "loss": 0.458,
24
+ "mean_token_accuracy": 0.8656408250331878,
25
  "step": 10
26
  },
27
  {
28
  "epoch": 0.32,
29
+ "eval_loss": 0.5019531846046448,
30
+ "eval_mean_token_accuracy": 0.8634858332057991,
31
+ "eval_runtime": 8.0528,
32
+ "eval_samples_per_second": 12.542,
33
+ "eval_steps_per_second": 12.542,
34
  "step": 10
35
  },
36
  {
37
  "epoch": 0.48,
38
+ "grad_norm": 2.390625,
39
  "learning_rate": 1e-05,
40
+ "loss": 0.4723,
41
+ "mean_token_accuracy": 0.8590857356786727,
42
  "step": 15
43
  },
44
  {
45
  "epoch": 0.64,
46
+ "grad_norm": 1.1640625,
47
  "learning_rate": 1e-05,
48
+ "loss": 0.3782,
49
+ "mean_token_accuracy": 0.8752073399722576,
50
  "step": 20
51
  },
52
  {
53
  "epoch": 0.64,
54
+ "eval_loss": 0.44335949420928955,
55
+ "eval_mean_token_accuracy": 0.8705554610431785,
56
+ "eval_runtime": 8.1133,
57
+ "eval_samples_per_second": 12.449,
58
+ "eval_steps_per_second": 12.449,
59
  "step": 20
60
  },
61
  {
62
  "epoch": 0.8,
63
+ "grad_norm": 1.5234375,
64
  "learning_rate": 1e-05,
65
+ "loss": 0.4699,
66
+ "mean_token_accuracy": 0.8568822145462036,
67
  "step": 25
68
  },
69
  {
70
  "epoch": 0.96,
71
+ "grad_norm": 1.5390625,
72
  "learning_rate": 1e-05,
73
+ "loss": 0.4329,
74
+ "mean_token_accuracy": 0.8653567247092724,
75
  "step": 30
76
  },
77
  {
78
  "epoch": 0.96,
79
+ "eval_loss": 0.42237037420272827,
80
+ "eval_mean_token_accuracy": 0.8750163653109333,
81
+ "eval_runtime": 8.1202,
82
+ "eval_samples_per_second": 12.438,
83
+ "eval_steps_per_second": 12.438,
84
  "step": 30
85
  },
86
  {
87
  "epoch": 0.992,
88
+ "mean_token_accuracy": 0.8463566526770592,
89
  "step": 31,
90
+ "total_flos": 3.478922794514227e+16,
91
+ "train_loss": 0.5009717364465037,
92
+ "train_runtime": 188.6962,
93
+ "train_samples_per_second": 2.65,
94
+ "train_steps_per_second": 0.164
95
  }
96
  ],
97
  "logging_steps": 5,
 
111
  "attributes": {}
112
  }
113
  },
114
+ "total_flos": 3.478922794514227e+16,
115
  "train_batch_size": 1,
116
  "trial_name": null,
117
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9559d7192e2181d349e0aec63d9aaf3c7573ff5a67df719b40ad5ed946eb229
3
  size 5880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0727bb194d0f1b17b7923152ee54dbb78426c0763fbfa564c944ae46a0e3a52b
3
  size 5880