chelizi commited on
Commit
6a226f8
·
verified ·
1 Parent(s): e75deab

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -25,10 +25,10 @@
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
- "q_proj",
29
  "k_proj",
30
- "o_proj",
31
- "v_proj"
32
  ],
33
  "target_parameters": null,
34
  "task_type": "CAUSAL_LM",
 
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
+ "v_proj",
29
  "k_proj",
30
+ "q_proj",
31
+ "o_proj"
32
  ],
33
  "target_parameters": null,
34
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60da9d58360530663741e4ca22bae2db608962d358f38f3359596bc8edd8cfdc
3
  size 33588528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:737226b4a4649a3965c71c19abb2efbe5b0b31ab5554485bdbaf8db81faa945b
3
  size 33588528
checkpoint-125/adapter_config.json CHANGED
@@ -25,10 +25,10 @@
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
- "q_proj",
29
  "k_proj",
30
- "o_proj",
31
- "v_proj"
32
  ],
33
  "target_parameters": null,
34
  "task_type": "CAUSAL_LM",
 
25
  "rank_pattern": {},
26
  "revision": null,
27
  "target_modules": [
28
+ "v_proj",
29
  "k_proj",
30
+ "q_proj",
31
+ "o_proj"
32
  ],
33
  "target_parameters": null,
34
  "task_type": "CAUSAL_LM",
checkpoint-125/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60da9d58360530663741e4ca22bae2db608962d358f38f3359596bc8edd8cfdc
3
  size 33588528
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:737226b4a4649a3965c71c19abb2efbe5b0b31ab5554485bdbaf8db81faa945b
3
  size 33588528
checkpoint-125/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:897397825ce06bb33563ff8cf2867213c66af5fb3e0103d48d97cf3069112e5f
3
  size 67327691
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565d0efc3c4dd91b6fce73d62da0c22a098b7cc863d7887d34b5269f8b75e6b2
3
  size 67327691
checkpoint-125/trainer_state.json CHANGED
@@ -11,86 +11,86 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.08,
14
- "grad_norm": 0.814979076385498,
15
  "learning_rate": 0.0001856,
16
- "loss": 1.5935,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.16,
21
- "grad_norm": 0.6319419741630554,
22
  "learning_rate": 0.0001696,
23
- "loss": 1.358,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.24,
28
- "grad_norm": 1.6961193084716797,
29
  "learning_rate": 0.00015360000000000002,
30
- "loss": 1.2815,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.32,
35
- "grad_norm": 0.353381484746933,
36
  "learning_rate": 0.00013759999999999998,
37
- "loss": 1.1922,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.4,
42
- "grad_norm": 0.49867403507232666,
43
  "learning_rate": 0.0001216,
44
- "loss": 1.3126,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.48,
49
- "grad_norm": 0.39462074637413025,
50
  "learning_rate": 0.0001056,
51
- "loss": 1.3094,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.56,
56
- "grad_norm": 0.38629117608070374,
57
  "learning_rate": 8.960000000000001e-05,
58
  "loss": 1.2157,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.64,
63
- "grad_norm": 0.41698044538497925,
64
  "learning_rate": 7.36e-05,
65
- "loss": 1.3107,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.72,
70
- "grad_norm": 0.37216514348983765,
71
  "learning_rate": 5.76e-05,
72
- "loss": 1.2042,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.8,
77
- "grad_norm": 0.5222511291503906,
78
  "learning_rate": 4.16e-05,
79
- "loss": 1.2705,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.88,
84
- "grad_norm": 0.5186126232147217,
85
  "learning_rate": 2.5600000000000002e-05,
86
- "loss": 1.3127,
87
  "step": 110
88
  },
89
  {
90
  "epoch": 0.96,
91
- "grad_norm": 0.3743302524089813,
92
  "learning_rate": 9.600000000000001e-06,
93
- "loss": 1.2898,
94
  "step": 120
95
  }
96
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.08,
14
+ "grad_norm": 0.6921696066856384,
15
  "learning_rate": 0.0001856,
16
+ "loss": 1.593,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.16,
21
+ "grad_norm": 0.6577253341674805,
22
  "learning_rate": 0.0001696,
23
+ "loss": 1.3572,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.24,
28
+ "grad_norm": 0.4804594814777374,
29
  "learning_rate": 0.00015360000000000002,
30
+ "loss": 1.28,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.32,
35
+ "grad_norm": 0.3516864478588104,
36
  "learning_rate": 0.00013759999999999998,
37
+ "loss": 1.191,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.4,
42
+ "grad_norm": 0.5060459971427917,
43
  "learning_rate": 0.0001216,
44
+ "loss": 1.3117,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.48,
49
+ "grad_norm": 0.3955352008342743,
50
  "learning_rate": 0.0001056,
51
+ "loss": 1.3089,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.56,
56
+ "grad_norm": 0.3805406391620636,
57
  "learning_rate": 8.960000000000001e-05,
58
  "loss": 1.2157,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.64,
63
+ "grad_norm": 0.405851811170578,
64
  "learning_rate": 7.36e-05,
65
+ "loss": 1.3118,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.72,
70
+ "grad_norm": 0.3721686899662018,
71
  "learning_rate": 5.76e-05,
72
+ "loss": 1.2033,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.8,
77
+ "grad_norm": 0.5105984807014465,
78
  "learning_rate": 4.16e-05,
79
+ "loss": 1.2716,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.88,
84
+ "grad_norm": 0.5118499398231506,
85
  "learning_rate": 2.5600000000000002e-05,
86
+ "loss": 1.3137,
87
  "step": 110
88
  },
89
  {
90
  "epoch": 0.96,
91
+ "grad_norm": 0.36947008967399597,
92
  "learning_rate": 9.600000000000001e-06,
93
+ "loss": 1.2903,
94
  "step": 120
95
  }
96
  ],