Chenghao-Qiu commited on
Commit
7c02b84
·
verified ·
1 Parent(s): 36f732b

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "pGenomeOcean/GenomeOcean-100M",
3
+ "architectures": [
4
+ "MistralForSequenceClassification"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoConfig": "pGenomeOcean/GenomeOcean-100M--configuration_mistral.MistralConfig",
9
+ "AutoModel": "pGenomeOcean/GenomeOcean-100M--modeling_mistral.MistralModel",
10
+ "AutoModelForCausalLM": "pGenomeOcean/GenomeOcean-100M--modeling_mistral.MistralForCausalLM",
11
+ "AutoModelForMaskedLM": "pGenomeOcean/GenomeOcean-100M--modeling_mistral.MistralForMaskedLM",
12
+ "AutoModelForSequenceClassification": "pGenomeOcean/GenomeOcean-100M--modeling_mistral.MistralForSequenceClassification"
13
+ },
14
+ "bos_token_id": 1,
15
+ "classifier_dropout": 0.1,
16
+ "eos_token_id": 2,
17
+ "hidden_act": "silu",
18
+ "hidden_size": 768,
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "is_causal": true,
22
+ "max_position_embeddings": 32768,
23
+ "model_type": "mistral",
24
+ "num_attention_heads": 8,
25
+ "num_hidden_layers": 12,
26
+ "num_key_value_heads": 8,
27
+ "output_router_logits": false,
28
+ "pad_token_id": 3,
29
+ "problem_type": "single_label_classification",
30
+ "rms_norm_eps": 1e-05,
31
+ "rope_theta": 1000000.0,
32
+ "router_aux_loss_coef": 0.02,
33
+ "sliding_window": null,
34
+ "tie_word_embeddings": false,
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.45.2",
37
+ "use_cache": true,
38
+ "vocab_size": 4096
39
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04ac521cfeddce7fb6a5a9b6400a33f0577daa74ca7da7865618f7b3bf612a73
3
+ size 465662960
optimizer_state_dict.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41102730b44f0129dfe37b8363c0946c841266a962359d3aaee8fc650fa51673
3
+ size 931398901
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": {
3
+ "content": "[CLS]",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "mask_token": {
10
+ "content": "[MASK]",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "[PAD]",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "sep_token": {
24
+ "content": "[SEP]",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "unk_token": {
31
+ "content": "[UNK]",
32
+ "lstrip": false,
33
+ "normalized": false,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ }
37
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "[UNK]",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "[CLS]",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "[SEP]",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "[PAD]",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "4": {
36
+ "content": "[MASK]",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ }
43
+ },
44
+ "clean_up_tokenization_spaces": true,
45
+ "cls_token": "[CLS]",
46
+ "mask_token": "[MASK]",
47
+ "model_max_length": 256,
48
+ "pad_token": "[PAD]",
49
+ "padding_side": "right",
50
+ "sep_token": "[SEP]",
51
+ "tokenizer_class": "PreTrainedTokenizerFast",
52
+ "unk_token": "[UNK]"
53
+ }
trainer_state.json ADDED
@@ -0,0 +1,481 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.2197728157043457,
3
+ "best_model_checkpoint": "output_pipe/1/origin/checkpoint-1600",
4
+ "epoch": 4.0,
5
+ "eval_steps": 200,
6
+ "global_step": 3372,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.11862396204033215,
13
+ "grad_norm": 9.243636131286621,
14
+ "learning_rate": 2.957555689343769e-05,
15
+ "loss": 0.4501,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.2372479240806643,
20
+ "grad_norm": 6.825350761413574,
21
+ "learning_rate": 2.8672486453943407e-05,
22
+ "loss": 0.2984,
23
+ "step": 200
24
+ },
25
+ {
26
+ "epoch": 0.2372479240806643,
27
+ "eval_accuracy": 0.8756115641215715,
28
+ "eval_f1": 0.8753583742291109,
29
+ "eval_loss": 0.2889566719532013,
30
+ "eval_matthews_correlation": 0.7552199511937604,
31
+ "eval_precision": 0.8793427094656048,
32
+ "eval_recall": 0.8758851563831229,
33
+ "eval_runtime": 1.416,
34
+ "eval_samples_per_second": 4763.557,
35
+ "eval_steps_per_second": 74.861,
36
+ "step": 200
37
+ },
38
+ {
39
+ "epoch": 0.35587188612099646,
40
+ "grad_norm": 9.723295211791992,
41
+ "learning_rate": 2.7769416014449128e-05,
42
+ "loss": 0.2867,
43
+ "step": 300
44
+ },
45
+ {
46
+ "epoch": 0.4744958481613286,
47
+ "grad_norm": 4.6286139488220215,
48
+ "learning_rate": 2.686634557495485e-05,
49
+ "loss": 0.2692,
50
+ "step": 400
51
+ },
52
+ {
53
+ "epoch": 0.4744958481613286,
54
+ "eval_accuracy": 0.8923647146034099,
55
+ "eval_f1": 0.8923344963363296,
56
+ "eval_loss": 0.2463846057653427,
57
+ "eval_matthews_correlation": 0.7855525209874886,
58
+ "eval_precision": 0.8930692687442148,
59
+ "eval_recall": 0.892483470662371,
60
+ "eval_runtime": 1.4056,
61
+ "eval_samples_per_second": 4798.74,
62
+ "eval_steps_per_second": 75.414,
63
+ "step": 400
64
+ },
65
+ {
66
+ "epoch": 0.5931198102016607,
67
+ "grad_norm": 6.366510391235352,
68
+ "learning_rate": 2.5963275135460566e-05,
69
+ "loss": 0.2648,
70
+ "step": 500
71
+ },
72
+ {
73
+ "epoch": 0.7117437722419929,
74
+ "grad_norm": 6.894649505615234,
75
+ "learning_rate": 2.5060204695966287e-05,
76
+ "loss": 0.2594,
77
+ "step": 600
78
+ },
79
+ {
80
+ "epoch": 0.7117437722419929,
81
+ "eval_accuracy": 0.9012601927353595,
82
+ "eval_f1": 0.901260173202269,
83
+ "eval_loss": 0.22498522698879242,
84
+ "eval_matthews_correlation": 0.8025661484949196,
85
+ "eval_precision": 0.9012812748493957,
86
+ "eval_recall": 0.9012848736535926,
87
+ "eval_runtime": 1.4052,
88
+ "eval_samples_per_second": 4800.055,
89
+ "eval_steps_per_second": 75.435,
90
+ "step": 600
91
+ },
92
+ {
93
+ "epoch": 0.830367734282325,
94
+ "grad_norm": 16.520292282104492,
95
+ "learning_rate": 2.4157134256472004e-05,
96
+ "loss": 0.2517,
97
+ "step": 700
98
+ },
99
+ {
100
+ "epoch": 0.9489916963226572,
101
+ "grad_norm": 3.2996578216552734,
102
+ "learning_rate": 2.3254063816977725e-05,
103
+ "loss": 0.2432,
104
+ "step": 800
105
+ },
106
+ {
107
+ "epoch": 0.9489916963226572,
108
+ "eval_accuracy": 0.8917716827279466,
109
+ "eval_f1": 0.8917528361445037,
110
+ "eval_loss": 0.24542774260044098,
111
+ "eval_matthews_correlation": 0.7836117160608459,
112
+ "eval_precision": 0.8918855979873312,
113
+ "eval_recall": 0.891726134298813,
114
+ "eval_runtime": 1.4043,
115
+ "eval_samples_per_second": 4803.154,
116
+ "eval_steps_per_second": 75.483,
117
+ "step": 800
118
+ },
119
+ {
120
+ "epoch": 1.0676156583629894,
121
+ "grad_norm": 6.839262962341309,
122
+ "learning_rate": 2.2350993377483446e-05,
123
+ "loss": 0.2125,
124
+ "step": 900
125
+ },
126
+ {
127
+ "epoch": 1.1862396204033214,
128
+ "grad_norm": 10.704083442687988,
129
+ "learning_rate": 2.1447922937989163e-05,
130
+ "loss": 0.1561,
131
+ "step": 1000
132
+ },
133
+ {
134
+ "epoch": 1.1862396204033214,
135
+ "eval_accuracy": 0.8956263899184581,
136
+ "eval_f1": 0.8951017745713171,
137
+ "eval_loss": 0.2612854242324829,
138
+ "eval_matthews_correlation": 0.8007606389596266,
139
+ "eval_precision": 0.904766892075258,
140
+ "eval_recall": 0.8960412854047166,
141
+ "eval_runtime": 1.4053,
142
+ "eval_samples_per_second": 4799.797,
143
+ "eval_steps_per_second": 75.43,
144
+ "step": 1000
145
+ },
146
+ {
147
+ "epoch": 1.3048635824436536,
148
+ "grad_norm": 4.566791534423828,
149
+ "learning_rate": 2.0544852498494884e-05,
150
+ "loss": 0.1685,
151
+ "step": 1100
152
+ },
153
+ {
154
+ "epoch": 1.4234875444839858,
155
+ "grad_norm": 3.4650704860687256,
156
+ "learning_rate": 1.96417820590006e-05,
157
+ "loss": 0.1607,
158
+ "step": 1200
159
+ },
160
+ {
161
+ "epoch": 1.4234875444839858,
162
+ "eval_accuracy": 0.9071905114899926,
163
+ "eval_f1": 0.9071251020390567,
164
+ "eval_loss": 0.22077599167823792,
165
+ "eval_matthews_correlation": 0.8161151585361717,
166
+ "eval_precision": 0.9087529499988971,
167
+ "eval_recall": 0.9073633915023229,
168
+ "eval_runtime": 1.4041,
169
+ "eval_samples_per_second": 4803.697,
170
+ "eval_steps_per_second": 75.492,
171
+ "step": 1200
172
+ },
173
+ {
174
+ "epoch": 1.5421115065243178,
175
+ "grad_norm": 9.395886421203613,
176
+ "learning_rate": 1.8738711619506322e-05,
177
+ "loss": 0.1751,
178
+ "step": 1300
179
+ },
180
+ {
181
+ "epoch": 1.66073546856465,
182
+ "grad_norm": 5.750766277313232,
183
+ "learning_rate": 1.7835641180012043e-05,
184
+ "loss": 0.1617,
185
+ "step": 1400
186
+ },
187
+ {
188
+ "epoch": 1.66073546856465,
189
+ "eval_accuracy": 0.9033358042994811,
190
+ "eval_f1": 0.9032721716896848,
191
+ "eval_loss": 0.23882386088371277,
192
+ "eval_matthews_correlation": 0.8072800092044834,
193
+ "eval_precision": 0.9040581552977713,
194
+ "eval_recall": 0.9032222866414605,
195
+ "eval_runtime": 1.4054,
196
+ "eval_samples_per_second": 4799.439,
197
+ "eval_steps_per_second": 75.425,
198
+ "step": 1400
199
+ },
200
+ {
201
+ "epoch": 1.7793594306049823,
202
+ "grad_norm": 10.305908203125,
203
+ "learning_rate": 1.693257074051776e-05,
204
+ "loss": 0.1615,
205
+ "step": 1500
206
+ },
207
+ {
208
+ "epoch": 1.8979833926453145,
209
+ "grad_norm": 6.007554531097412,
210
+ "learning_rate": 1.602950030102348e-05,
211
+ "loss": 0.1669,
212
+ "step": 1600
213
+ },
214
+ {
215
+ "epoch": 1.8979833926453145,
216
+ "eval_accuracy": 0.9058561897702001,
217
+ "eval_f1": 0.9057890959881881,
218
+ "eval_loss": 0.2197728157043457,
219
+ "eval_matthews_correlation": 0.8134577263257711,
220
+ "eval_precision": 0.9074290868275333,
221
+ "eval_recall": 0.9060298429301878,
222
+ "eval_runtime": 1.4038,
223
+ "eval_samples_per_second": 4804.94,
224
+ "eval_steps_per_second": 75.511,
225
+ "step": 1600
226
+ },
227
+ {
228
+ "epoch": 2.0166073546856467,
229
+ "grad_norm": 17.52599334716797,
230
+ "learning_rate": 1.5126429861529199e-05,
231
+ "loss": 0.1482,
232
+ "step": 1700
233
+ },
234
+ {
235
+ "epoch": 2.135231316725979,
236
+ "grad_norm": 13.2647066116333,
237
+ "learning_rate": 1.422335942203492e-05,
238
+ "loss": 0.0629,
239
+ "step": 1800
240
+ },
241
+ {
242
+ "epoch": 2.135231316725979,
243
+ "eval_accuracy": 0.900815418828762,
244
+ "eval_f1": 0.9008021532090624,
245
+ "eval_loss": 0.37351372838020325,
246
+ "eval_matthews_correlation": 0.8016708933554149,
247
+ "eval_precision": 0.9008918124334665,
248
+ "eval_recall": 0.9007790888470252,
249
+ "eval_runtime": 1.4037,
250
+ "eval_samples_per_second": 4805.322,
251
+ "eval_steps_per_second": 75.517,
252
+ "step": 1800
253
+ },
254
+ {
255
+ "epoch": 2.2538552787663106,
256
+ "grad_norm": 5.28846549987793,
257
+ "learning_rate": 1.3320288982540638e-05,
258
+ "loss": 0.0622,
259
+ "step": 1900
260
+ },
261
+ {
262
+ "epoch": 2.372479240806643,
263
+ "grad_norm": 11.060345649719238,
264
+ "learning_rate": 1.2417218543046358e-05,
265
+ "loss": 0.0748,
266
+ "step": 2000
267
+ },
268
+ {
269
+ "epoch": 2.372479240806643,
270
+ "eval_accuracy": 0.9082283172720533,
271
+ "eval_f1": 0.9082064942582599,
272
+ "eval_loss": 0.342942476272583,
273
+ "eval_matthews_correlation": 0.8172074539472458,
274
+ "eval_precision": 0.908867392978981,
275
+ "eval_recall": 0.9083402309983616,
276
+ "eval_runtime": 1.425,
277
+ "eval_samples_per_second": 4733.445,
278
+ "eval_steps_per_second": 74.388,
279
+ "step": 2000
280
+ },
281
+ {
282
+ "epoch": 2.491103202846975,
283
+ "grad_norm": 16.14167594909668,
284
+ "learning_rate": 1.1514148103552077e-05,
285
+ "loss": 0.0678,
286
+ "step": 2100
287
+ },
288
+ {
289
+ "epoch": 2.6097271648873073,
290
+ "grad_norm": 14.60816478729248,
291
+ "learning_rate": 1.0611077664057798e-05,
292
+ "loss": 0.0651,
293
+ "step": 2200
294
+ },
295
+ {
296
+ "epoch": 2.6097271648873073,
297
+ "eval_accuracy": 0.9015567086730912,
298
+ "eval_f1": 0.9015537463069576,
299
+ "eval_loss": 0.3432765305042267,
300
+ "eval_matthews_correlation": 0.8031074926139152,
301
+ "eval_precision": 0.9015537463069576,
302
+ "eval_recall": 0.9015537463069576,
303
+ "eval_runtime": 1.413,
304
+ "eval_samples_per_second": 4773.585,
305
+ "eval_steps_per_second": 75.019,
306
+ "step": 2200
307
+ },
308
+ {
309
+ "epoch": 2.7283511269276395,
310
+ "grad_norm": 19.60405731201172,
311
+ "learning_rate": 9.708007224563517e-06,
312
+ "loss": 0.0573,
313
+ "step": 2300
314
+ },
315
+ {
316
+ "epoch": 2.8469750889679717,
317
+ "grad_norm": 12.747634887695312,
318
+ "learning_rate": 8.804936785069236e-06,
319
+ "loss": 0.0587,
320
+ "step": 2400
321
+ },
322
+ {
323
+ "epoch": 2.8469750889679717,
324
+ "eval_accuracy": 0.9012601927353595,
325
+ "eval_f1": 0.9012397677542721,
326
+ "eval_loss": 0.3955250680446625,
327
+ "eval_matthews_correlation": 0.8026222319497477,
328
+ "eval_precision": 0.9014138348009262,
329
+ "eval_recall": 0.901208423433808,
330
+ "eval_runtime": 1.4171,
331
+ "eval_samples_per_second": 4759.731,
332
+ "eval_steps_per_second": 74.801,
333
+ "step": 2400
334
+ },
335
+ {
336
+ "epoch": 2.9655990510083035,
337
+ "grad_norm": 13.178675651550293,
338
+ "learning_rate": 7.901866345574955e-06,
339
+ "loss": 0.0569,
340
+ "step": 2500
341
+ },
342
+ {
343
+ "epoch": 3.0842230130486357,
344
+ "grad_norm": 0.011187891475856304,
345
+ "learning_rate": 6.998795906080675e-06,
346
+ "loss": 0.0263,
347
+ "step": 2600
348
+ },
349
+ {
350
+ "epoch": 3.0842230130486357,
351
+ "eval_accuracy": 0.9057079318013344,
352
+ "eval_f1": 0.9057078799868117,
353
+ "eval_loss": 0.525695264339447,
354
+ "eval_matthews_correlation": 0.8114565321991912,
355
+ "eval_precision": 0.9057254122582752,
356
+ "eval_recall": 0.9057311199609897,
357
+ "eval_runtime": 1.4153,
358
+ "eval_samples_per_second": 4765.722,
359
+ "eval_steps_per_second": 74.895,
360
+ "step": 2600
361
+ },
362
+ {
363
+ "epoch": 3.202846975088968,
364
+ "grad_norm": 0.2136336714029312,
365
+ "learning_rate": 6.104756170981336e-06,
366
+ "loss": 0.011,
367
+ "step": 2700
368
+ },
369
+ {
370
+ "epoch": 3.3214709371293,
371
+ "grad_norm": 33.0368766784668,
372
+ "learning_rate": 5.201685731487056e-06,
373
+ "loss": 0.018,
374
+ "step": 2800
375
+ },
376
+ {
377
+ "epoch": 3.3214709371293,
378
+ "eval_accuracy": 0.9057079318013344,
379
+ "eval_f1": 0.9056997050145418,
380
+ "eval_loss": 0.6456906199455261,
381
+ "eval_matthews_correlation": 0.8117977167650796,
382
+ "eval_precision": 0.9060113235426336,
383
+ "eval_recall": 0.905786424375302,
384
+ "eval_runtime": 1.4245,
385
+ "eval_samples_per_second": 4734.973,
386
+ "eval_steps_per_second": 74.412,
387
+ "step": 2800
388
+ },
389
+ {
390
+ "epoch": 3.4400948991696323,
391
+ "grad_norm": 0.03287828713655472,
392
+ "learning_rate": 4.2986152919927755e-06,
393
+ "loss": 0.0134,
394
+ "step": 2900
395
+ },
396
+ {
397
+ "epoch": 3.5587188612099645,
398
+ "grad_norm": 0.05251970514655113,
399
+ "learning_rate": 3.395544852498495e-06,
400
+ "loss": 0.0197,
401
+ "step": 3000
402
+ },
403
+ {
404
+ "epoch": 3.5587188612099645,
405
+ "eval_accuracy": 0.9027427724240178,
406
+ "eval_f1": 0.9027319948057138,
407
+ "eval_loss": 0.5632529258728027,
408
+ "eval_matthews_correlation": 0.805927089741698,
409
+ "eval_precision": 0.9030994533926766,
410
+ "eval_recall": 0.9028276821717736,
411
+ "eval_runtime": 1.4184,
412
+ "eval_samples_per_second": 4755.316,
413
+ "eval_steps_per_second": 74.731,
414
+ "step": 3000
415
+ },
416
+ {
417
+ "epoch": 3.6773428232502967,
418
+ "grad_norm": 0.18964755535125732,
419
+ "learning_rate": 2.4924744130042145e-06,
420
+ "loss": 0.0103,
421
+ "step": 3100
422
+ },
423
+ {
424
+ "epoch": 3.795966785290629,
425
+ "grad_norm": 2.074934244155884,
426
+ "learning_rate": 1.5894039735099338e-06,
427
+ "loss": 0.0067,
428
+ "step": 3200
429
+ },
430
+ {
431
+ "epoch": 3.795966785290629,
432
+ "eval_accuracy": 0.9052631578947369,
433
+ "eval_f1": 0.9052587514243154,
434
+ "eval_loss": 0.6363104581832886,
435
+ "eval_matthews_correlation": 0.8107917228123425,
436
+ "eval_precision": 0.905463923256376,
437
+ "eval_recall": 0.9053278109809421,
438
+ "eval_runtime": 1.4184,
439
+ "eval_samples_per_second": 4755.362,
440
+ "eval_steps_per_second": 74.732,
441
+ "step": 3200
442
+ },
443
+ {
444
+ "epoch": 3.914590747330961,
445
+ "grad_norm": 0.007920457050204277,
446
+ "learning_rate": 6.863335340156533e-07,
447
+ "loss": 0.0084,
448
+ "step": 3300
449
+ },
450
+ {
451
+ "epoch": 4.0,
452
+ "step": 3372,
453
+ "total_flos": 5866539036180480.0,
454
+ "train_loss": 0.1323915239189302,
455
+ "train_runtime": 204.5834,
456
+ "train_samples_per_second": 1054.865,
457
+ "train_steps_per_second": 16.482
458
+ }
459
+ ],
460
+ "logging_steps": 100,
461
+ "max_steps": 3372,
462
+ "num_input_tokens_seen": 0,
463
+ "num_train_epochs": 4,
464
+ "save_steps": 200,
465
+ "stateful_callbacks": {
466
+ "TrainerControl": {
467
+ "args": {
468
+ "should_epoch_stop": false,
469
+ "should_evaluate": false,
470
+ "should_log": false,
471
+ "should_save": true,
472
+ "should_training_stop": true
473
+ },
474
+ "attributes": {}
475
+ }
476
+ },
477
+ "total_flos": 5866539036180480.0,
478
+ "train_batch_size": 64,
479
+ "trial_name": null,
480
+ "trial_params": null
481
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3437ac5717655ab79710df94f304f8369fe5dc9bb162c16af1fb1dd819b7b6e
3
+ size 5368