File size: 12,208 Bytes
284cd41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
{
  "best_metric": 0.18491357564926147,
  "best_model_checkpoint": "output_pipe/prom_300_all/origin/checkpoint-1400",
  "epoch": 4.0,
  "eval_steps": 200,
  "global_step": 2960,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.13513513513513514,
      "grad_norm": 10.229140281677246,
      "learning_rate": 2.951546391752577e-05,
      "loss": 0.3957,
      "step": 100
    },
    {
      "epoch": 0.2702702702702703,
      "grad_norm": 4.366018772125244,
      "learning_rate": 2.8484536082474226e-05,
      "loss": 0.3029,
      "step": 200
    },
    {
      "epoch": 0.2702702702702703,
      "eval_accuracy": 0.8190878378378378,
      "eval_f1": 0.8137850769429716,
      "eval_loss": 0.37689200043678284,
      "eval_matthews_correlation": 0.675083279534266,
      "eval_precision": 0.8579374628996808,
      "eval_recall": 0.8183080017768842,
      "eval_runtime": 1.6963,
      "eval_samples_per_second": 3490.009,
      "eval_steps_per_second": 54.826,
      "step": 200
    },
    {
      "epoch": 0.40540540540540543,
      "grad_norm": 12.47264575958252,
      "learning_rate": 2.745360824742268e-05,
      "loss": 0.2656,
      "step": 300
    },
    {
      "epoch": 0.5405405405405406,
      "grad_norm": 8.677423477172852,
      "learning_rate": 2.6422680412371135e-05,
      "loss": 0.2217,
      "step": 400
    },
    {
      "epoch": 0.5405405405405406,
      "eval_accuracy": 0.9070945945945946,
      "eval_f1": 0.9068358241206858,
      "eval_loss": 0.23628243803977966,
      "eval_matthews_correlation": 0.8197057914004126,
      "eval_precision": 0.9123568337823056,
      "eval_recall": 0.9073641621822256,
      "eval_runtime": 1.6952,
      "eval_samples_per_second": 3492.293,
      "eval_steps_per_second": 54.862,
      "step": 400
    },
    {
      "epoch": 0.6756756756756757,
      "grad_norm": 10.468666076660156,
      "learning_rate": 2.5391752577319586e-05,
      "loss": 0.2133,
      "step": 500
    },
    {
      "epoch": 0.8108108108108109,
      "grad_norm": 6.7936506271362305,
      "learning_rate": 2.436082474226804e-05,
      "loss": 0.2092,
      "step": 600
    },
    {
      "epoch": 0.8108108108108109,
      "eval_accuracy": 0.9152027027027027,
      "eval_f1": 0.9149447781009592,
      "eval_loss": 0.2045987993478775,
      "eval_matthews_correlation": 0.8364946316412692,
      "eval_precision": 0.921029377746935,
      "eval_recall": 0.9154836370974333,
      "eval_runtime": 1.7024,
      "eval_samples_per_second": 3477.472,
      "eval_steps_per_second": 54.629,
      "step": 600
    },
    {
      "epoch": 0.9459459459459459,
      "grad_norm": 7.481766700744629,
      "learning_rate": 2.3329896907216496e-05,
      "loss": 0.2038,
      "step": 700
    },
    {
      "epoch": 1.0810810810810811,
      "grad_norm": 6.448531627655029,
      "learning_rate": 2.229896907216495e-05,
      "loss": 0.1432,
      "step": 800
    },
    {
      "epoch": 1.0810810810810811,
      "eval_accuracy": 0.93125,
      "eval_f1": 0.9312120773759067,
      "eval_loss": 0.20802178978919983,
      "eval_matthews_correlation": 0.8630798400949948,
      "eval_precision": 0.9319203976686419,
      "eval_recall": 0.9311597775881583,
      "eval_runtime": 1.7038,
      "eval_samples_per_second": 3474.605,
      "eval_steps_per_second": 54.584,
      "step": 800
    },
    {
      "epoch": 1.2162162162162162,
      "grad_norm": 5.360595703125,
      "learning_rate": 2.1268041237113405e-05,
      "loss": 0.1225,
      "step": 900
    },
    {
      "epoch": 1.3513513513513513,
      "grad_norm": 12.399473190307617,
      "learning_rate": 2.0237113402061856e-05,
      "loss": 0.119,
      "step": 1000
    },
    {
      "epoch": 1.3513513513513513,
      "eval_accuracy": 0.9273648648648649,
      "eval_f1": 0.9273253739163184,
      "eval_loss": 0.1945222169160843,
      "eval_matthews_correlation": 0.8561116827116911,
      "eval_precision": 0.9286165491178344,
      "eval_recall": 0.9274958671007523,
      "eval_runtime": 1.7116,
      "eval_samples_per_second": 3458.664,
      "eval_steps_per_second": 54.334,
      "step": 1000
    },
    {
      "epoch": 1.4864864864864864,
      "grad_norm": 4.339570045471191,
      "learning_rate": 1.9206185567010307e-05,
      "loss": 0.1273,
      "step": 1100
    },
    {
      "epoch": 1.6216216216216215,
      "grad_norm": 14.022846221923828,
      "learning_rate": 1.8175257731958762e-05,
      "loss": 0.1153,
      "step": 1200
    },
    {
      "epoch": 1.6216216216216215,
      "eval_accuracy": 0.9302364864864865,
      "eval_f1": 0.9302364685710435,
      "eval_loss": 0.18970273435115814,
      "eval_matthews_correlation": 0.8605061164956768,
      "eval_precision": 0.9302512167180906,
      "eval_recall": 0.9302548997854683,
      "eval_runtime": 1.7115,
      "eval_samples_per_second": 3458.921,
      "eval_steps_per_second": 54.338,
      "step": 1200
    },
    {
      "epoch": 1.7567567567567568,
      "grad_norm": 4.394283771514893,
      "learning_rate": 1.7144329896907217e-05,
      "loss": 0.1036,
      "step": 1300
    },
    {
      "epoch": 1.8918918918918919,
      "grad_norm": 5.471324920654297,
      "learning_rate": 1.611340206185567e-05,
      "loss": 0.1189,
      "step": 1400
    },
    {
      "epoch": 1.8918918918918919,
      "eval_accuracy": 0.9305743243243243,
      "eval_f1": 0.9305326495433668,
      "eval_loss": 0.18491357564926147,
      "eval_matthews_correlation": 0.8617933500441142,
      "eval_precision": 0.9313144616824476,
      "eval_recall": 0.9304792930448134,
      "eval_runtime": 1.7174,
      "eval_samples_per_second": 3447.102,
      "eval_steps_per_second": 54.152,
      "step": 1400
    },
    {
      "epoch": 2.027027027027027,
      "grad_norm": 5.195973873138428,
      "learning_rate": 1.5082474226804124e-05,
      "loss": 0.0993,
      "step": 1500
    },
    {
      "epoch": 2.1621621621621623,
      "grad_norm": 3.4343478679656982,
      "learning_rate": 1.4051546391752577e-05,
      "loss": 0.0416,
      "step": 1600
    },
    {
      "epoch": 2.1621621621621623,
      "eval_accuracy": 0.9320945945945946,
      "eval_f1": 0.9320657433152479,
      "eval_loss": 0.2865821123123169,
      "eval_matthews_correlation": 0.8646019805851967,
      "eval_precision": 0.9325841926158089,
      "eval_recall": 0.9320179733750436,
      "eval_runtime": 1.7142,
      "eval_samples_per_second": 3453.6,
      "eval_steps_per_second": 54.254,
      "step": 1600
    },
    {
      "epoch": 2.2972972972972974,
      "grad_norm": 1.7518272399902344,
      "learning_rate": 1.3020618556701032e-05,
      "loss": 0.0321,
      "step": 1700
    },
    {
      "epoch": 2.4324324324324325,
      "grad_norm": 0.545198380947113,
      "learning_rate": 1.1989690721649485e-05,
      "loss": 0.0472,
      "step": 1800
    },
    {
      "epoch": 2.4324324324324325,
      "eval_accuracy": 0.9346283783783784,
      "eval_f1": 0.9346277050025539,
      "eval_loss": 0.2627970576286316,
      "eval_matthews_correlation": 0.8693732402984062,
      "eval_precision": 0.9347087744082508,
      "eval_recall": 0.9346644670192129,
      "eval_runtime": 1.7138,
      "eval_samples_per_second": 3454.269,
      "eval_steps_per_second": 54.265,
      "step": 1800
    },
    {
      "epoch": 2.5675675675675675,
      "grad_norm": 8.60261344909668,
      "learning_rate": 1.0958762886597938e-05,
      "loss": 0.0463,
      "step": 1900
    },
    {
      "epoch": 2.7027027027027026,
      "grad_norm": 5.734014511108398,
      "learning_rate": 9.927835051546392e-06,
      "loss": 0.0426,
      "step": 2000
    },
    {
      "epoch": 2.7027027027027026,
      "eval_accuracy": 0.9390202702702702,
      "eval_f1": 0.9390032120412144,
      "eval_loss": 0.24185040593147278,
      "eval_matthews_correlation": 0.8782767377163032,
      "eval_precision": 0.9393146284000857,
      "eval_recall": 0.9389621800341589,
      "eval_runtime": 1.7176,
      "eval_samples_per_second": 3446.664,
      "eval_steps_per_second": 54.145,
      "step": 2000
    },
    {
      "epoch": 2.8378378378378377,
      "grad_norm": 16.915462493896484,
      "learning_rate": 8.896907216494845e-06,
      "loss": 0.0404,
      "step": 2100
    },
    {
      "epoch": 2.972972972972973,
      "grad_norm": 11.942590713500977,
      "learning_rate": 7.8659793814433e-06,
      "loss": 0.0449,
      "step": 2200
    },
    {
      "epoch": 2.972972972972973,
      "eval_accuracy": 0.9346283783783784,
      "eval_f1": 0.9346085835481779,
      "eval_loss": 0.26332417130470276,
      "eval_matthews_correlation": 0.8695174028730603,
      "eval_precision": 0.9349504933868377,
      "eval_recall": 0.9345669940571169,
      "eval_runtime": 1.7174,
      "eval_samples_per_second": 3447.038,
      "eval_steps_per_second": 54.151,
      "step": 2200
    },
    {
      "epoch": 3.108108108108108,
      "grad_norm": 0.1922147125005722,
      "learning_rate": 6.835051546391753e-06,
      "loss": 0.0183,
      "step": 2300
    },
    {
      "epoch": 3.2432432432432434,
      "grad_norm": 0.016464663669466972,
      "learning_rate": 5.804123711340207e-06,
      "loss": 0.0151,
      "step": 2400
    },
    {
      "epoch": 3.2432432432432434,
      "eval_accuracy": 0.935304054054054,
      "eval_f1": 0.9352742660769024,
      "eval_loss": 0.3918153643608093,
      "eval_matthews_correlation": 0.8710722575664533,
      "eval_precision": 0.9358489722798395,
      "eval_recall": 0.9352235098392906,
      "eval_runtime": 1.7154,
      "eval_samples_per_second": 3451.033,
      "eval_steps_per_second": 54.214,
      "step": 2400
    },
    {
      "epoch": 3.3783783783783785,
      "grad_norm": 0.8072592616081238,
      "learning_rate": 4.77319587628866e-06,
      "loss": 0.0086,
      "step": 2500
    },
    {
      "epoch": 3.5135135135135136,
      "grad_norm": 11.552366256713867,
      "learning_rate": 3.7422680412371135e-06,
      "loss": 0.013,
      "step": 2600
    },
    {
      "epoch": 3.5135135135135136,
      "eval_accuracy": 0.9363175675675676,
      "eval_f1": 0.936317042424479,
      "eval_loss": 0.35771170258522034,
      "eval_matthews_correlation": 0.872640550366574,
      "eval_precision": 0.9363156189326801,
      "eval_recall": 0.9363249314835842,
      "eval_runtime": 1.7187,
      "eval_samples_per_second": 3444.564,
      "eval_steps_per_second": 54.112,
      "step": 2600
    },
    {
      "epoch": 3.6486486486486487,
      "grad_norm": 0.023688938468694687,
      "learning_rate": 2.711340206185567e-06,
      "loss": 0.0097,
      "step": 2700
    },
    {
      "epoch": 3.7837837837837838,
      "grad_norm": 0.010523764416575432,
      "learning_rate": 1.6804123711340206e-06,
      "loss": 0.011,
      "step": 2800
    },
    {
      "epoch": 3.7837837837837838,
      "eval_accuracy": 0.9363175675675676,
      "eval_f1": 0.9363174785300759,
      "eval_loss": 0.3880373537540436,
      "eval_matthews_correlation": 0.8727004149732407,
      "eval_precision": 0.9363563085660243,
      "eval_recall": 0.9363441064925211,
      "eval_runtime": 1.7189,
      "eval_samples_per_second": 3444.029,
      "eval_steps_per_second": 54.104,
      "step": 2800
    },
    {
      "epoch": 3.918918918918919,
      "grad_norm": 0.0027509788051247597,
      "learning_rate": 6.494845360824742e-07,
      "loss": 0.0095,
      "step": 2900
    },
    {
      "epoch": 4.0,
      "step": 2960,
      "total_flos": 9268766323310592.0,
      "train_loss": 0.10634732993470655,
      "train_runtime": 216.3397,
      "train_samples_per_second": 875.586,
      "train_steps_per_second": 13.682
    }
  ],
  "logging_steps": 100,
  "max_steps": 2960,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 200,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 9268766323310592.0,
  "train_batch_size": 64,
  "trial_name": null,
  "trial_params": null
}