atsuki-yamaguchi commited on
Commit
35d17fe
·
verified ·
1 Parent(s): 28c4807

Remove unnecessary files to save space

Browse files
Files changed (1) hide show
  1. checkpoint-3052/trainer_state.json +0 -719
checkpoint-3052/trainer_state.json DELETED
@@ -1,719 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.10143578835416113,
5
- "eval_steps": 500,
6
- "global_step": 3052,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.001030311087476735,
13
- "grad_norm": 60.25547409057617,
14
- "learning_rate": 1.0157273918741808e-06,
15
- "loss": 8.8455,
16
- "step": 31
17
- },
18
- {
19
- "epoch": 0.00206062217495347,
20
- "grad_norm": 15.669363975524902,
21
- "learning_rate": 2.0314547837483616e-06,
22
- "loss": 7.1553,
23
- "step": 62
24
- },
25
- {
26
- "epoch": 0.003090933262430205,
27
- "grad_norm": 15.366345405578613,
28
- "learning_rate": 3.0471821756225426e-06,
29
- "loss": 5.8784,
30
- "step": 93
31
- },
32
- {
33
- "epoch": 0.00412124434990694,
34
- "grad_norm": 36.30561828613281,
35
- "learning_rate": 4.062909567496723e-06,
36
- "loss": 4.7708,
37
- "step": 124
38
- },
39
- {
40
- "epoch": 0.005151555437383675,
41
- "grad_norm": 27.202678680419922,
42
- "learning_rate": 5.078636959370905e-06,
43
- "loss": 4.1629,
44
- "step": 155
45
- },
46
- {
47
- "epoch": 0.00618186652486041,
48
- "grad_norm": 24.30484962463379,
49
- "learning_rate": 6.094364351245085e-06,
50
- "loss": 3.867,
51
- "step": 186
52
- },
53
- {
54
- "epoch": 0.007212177612337145,
55
- "grad_norm": 19.916366577148438,
56
- "learning_rate": 7.110091743119267e-06,
57
- "loss": 3.6131,
58
- "step": 217
59
- },
60
- {
61
- "epoch": 0.00824248869981388,
62
- "grad_norm": 17.577274322509766,
63
- "learning_rate": 8.125819134993446e-06,
64
- "loss": 3.4772,
65
- "step": 248
66
- },
67
- {
68
- "epoch": 0.009272799787290615,
69
- "grad_norm": 12.133190155029297,
70
- "learning_rate": 9.141546526867629e-06,
71
- "loss": 3.3218,
72
- "step": 279
73
- },
74
- {
75
- "epoch": 0.01030311087476735,
76
- "grad_norm": 19.79263687133789,
77
- "learning_rate": 1.015727391874181e-05,
78
- "loss": 3.2055,
79
- "step": 310
80
- },
81
- {
82
- "epoch": 0.011333421962244085,
83
- "grad_norm": 16.38133430480957,
84
- "learning_rate": 1.117300131061599e-05,
85
- "loss": 3.1062,
86
- "step": 341
87
- },
88
- {
89
- "epoch": 0.01236373304972082,
90
- "grad_norm": 12.638299942016602,
91
- "learning_rate": 1.218872870249017e-05,
92
- "loss": 3.0106,
93
- "step": 372
94
- },
95
- {
96
- "epoch": 0.013394044137197554,
97
- "grad_norm": 9.46596908569336,
98
- "learning_rate": 1.3204456094364351e-05,
99
- "loss": 2.924,
100
- "step": 403
101
- },
102
- {
103
- "epoch": 0.01442435522467429,
104
- "grad_norm": 10.945392608642578,
105
- "learning_rate": 1.4220183486238533e-05,
106
- "loss": 2.844,
107
- "step": 434
108
- },
109
- {
110
- "epoch": 0.015454666312151024,
111
- "grad_norm": 8.474015235900879,
112
- "learning_rate": 1.5235910878112714e-05,
113
- "loss": 2.7892,
114
- "step": 465
115
- },
116
- {
117
- "epoch": 0.01648497739962776,
118
- "grad_norm": 9.370804786682129,
119
- "learning_rate": 1.6251638269986893e-05,
120
- "loss": 2.7509,
121
- "step": 496
122
- },
123
- {
124
- "epoch": 0.017515288487104493,
125
- "grad_norm": 11.63398551940918,
126
- "learning_rate": 1.7267365661861077e-05,
127
- "loss": 2.6999,
128
- "step": 527
129
- },
130
- {
131
- "epoch": 0.01854559957458123,
132
- "grad_norm": 9.17713451385498,
133
- "learning_rate": 1.8283093053735257e-05,
134
- "loss": 2.6459,
135
- "step": 558
136
- },
137
- {
138
- "epoch": 0.019575910662057962,
139
- "grad_norm": 7.119054794311523,
140
- "learning_rate": 1.9298820445609438e-05,
141
- "loss": 2.603,
142
- "step": 589
143
- },
144
- {
145
- "epoch": 0.0206062217495347,
146
- "grad_norm": 6.653646945953369,
147
- "learning_rate": 2.031454783748362e-05,
148
- "loss": 2.5588,
149
- "step": 620
150
- },
151
- {
152
- "epoch": 0.021636532837011432,
153
- "grad_norm": 8.332653045654297,
154
- "learning_rate": 2.13302752293578e-05,
155
- "loss": 2.5357,
156
- "step": 651
157
- },
158
- {
159
- "epoch": 0.02266684392448817,
160
- "grad_norm": 6.4949116706848145,
161
- "learning_rate": 2.234600262123198e-05,
162
- "loss": 2.4967,
163
- "step": 682
164
- },
165
- {
166
- "epoch": 0.023697155011964902,
167
- "grad_norm": 9.41009521484375,
168
- "learning_rate": 2.336173001310616e-05,
169
- "loss": 2.4563,
170
- "step": 713
171
- },
172
- {
173
- "epoch": 0.02472746609944164,
174
- "grad_norm": 7.840345859527588,
175
- "learning_rate": 2.437745740498034e-05,
176
- "loss": 2.4383,
177
- "step": 744
178
- },
179
- {
180
- "epoch": 0.025757777186918372,
181
- "grad_norm": 6.116458415985107,
182
- "learning_rate": 2.5393184796854525e-05,
183
- "loss": 2.3817,
184
- "step": 775
185
- },
186
- {
187
- "epoch": 0.02678808827439511,
188
- "grad_norm": 5.938300609588623,
189
- "learning_rate": 2.6408912188728702e-05,
190
- "loss": 2.3508,
191
- "step": 806
192
- },
193
- {
194
- "epoch": 0.027818399361871842,
195
- "grad_norm": 5.4408345222473145,
196
- "learning_rate": 2.7424639580602886e-05,
197
- "loss": 2.3325,
198
- "step": 837
199
- },
200
- {
201
- "epoch": 0.02884871044934858,
202
- "grad_norm": 5.375136375427246,
203
- "learning_rate": 2.8440366972477066e-05,
204
- "loss": 2.3101,
205
- "step": 868
206
- },
207
- {
208
- "epoch": 0.029879021536825312,
209
- "grad_norm": 5.149726867675781,
210
- "learning_rate": 2.9456094364351244e-05,
211
- "loss": 2.282,
212
- "step": 899
213
- },
214
- {
215
- "epoch": 0.03090933262430205,
216
- "grad_norm": 4.591221332550049,
217
- "learning_rate": 3.0471821756225428e-05,
218
- "loss": 2.2427,
219
- "step": 930
220
- },
221
- {
222
- "epoch": 0.031939643711778785,
223
- "grad_norm": 4.977034091949463,
224
- "learning_rate": 3.148754914809961e-05,
225
- "loss": 2.2218,
226
- "step": 961
227
- },
228
- {
229
- "epoch": 0.03296995479925552,
230
- "grad_norm": 5.038781642913818,
231
- "learning_rate": 3.2503276539973785e-05,
232
- "loss": 2.2044,
233
- "step": 992
234
- },
235
- {
236
- "epoch": 0.03400026588673225,
237
- "grad_norm": 4.872281551361084,
238
- "learning_rate": 3.351900393184797e-05,
239
- "loss": 2.1657,
240
- "step": 1023
241
- },
242
- {
243
- "epoch": 0.035030576974208985,
244
- "grad_norm": 4.370841979980469,
245
- "learning_rate": 3.453473132372215e-05,
246
- "loss": 2.1365,
247
- "step": 1054
248
- },
249
- {
250
- "epoch": 0.036060888061685725,
251
- "grad_norm": 4.087072849273682,
252
- "learning_rate": 3.555045871559633e-05,
253
- "loss": 2.1253,
254
- "step": 1085
255
- },
256
- {
257
- "epoch": 0.03709119914916246,
258
- "grad_norm": 4.113957405090332,
259
- "learning_rate": 3.6566186107470514e-05,
260
- "loss": 2.0973,
261
- "step": 1116
262
- },
263
- {
264
- "epoch": 0.03812151023663919,
265
- "grad_norm": 4.0119733810424805,
266
- "learning_rate": 3.7581913499344695e-05,
267
- "loss": 2.1024,
268
- "step": 1147
269
- },
270
- {
271
- "epoch": 0.039151821324115925,
272
- "grad_norm": 4.247573375701904,
273
- "learning_rate": 3.8597640891218876e-05,
274
- "loss": 2.0722,
275
- "step": 1178
276
- },
277
- {
278
- "epoch": 0.04018213241159266,
279
- "grad_norm": 3.5575129985809326,
280
- "learning_rate": 3.9613368283093056e-05,
281
- "loss": 2.056,
282
- "step": 1209
283
- },
284
- {
285
- "epoch": 0.0412124434990694,
286
- "grad_norm": 3.8885862827301025,
287
- "learning_rate": 4.062909567496724e-05,
288
- "loss": 2.0389,
289
- "step": 1240
290
- },
291
- {
292
- "epoch": 0.04224275458654613,
293
- "grad_norm": 3.680628538131714,
294
- "learning_rate": 4.164482306684142e-05,
295
- "loss": 2.0385,
296
- "step": 1271
297
- },
298
- {
299
- "epoch": 0.043273065674022865,
300
- "grad_norm": 3.780876874923706,
301
- "learning_rate": 4.26605504587156e-05,
302
- "loss": 2.0097,
303
- "step": 1302
304
- },
305
- {
306
- "epoch": 0.0443033767614996,
307
- "grad_norm": 4.235328674316406,
308
- "learning_rate": 4.367627785058978e-05,
309
- "loss": 2.0024,
310
- "step": 1333
311
- },
312
- {
313
- "epoch": 0.04533368784897634,
314
- "grad_norm": 3.326941967010498,
315
- "learning_rate": 4.469200524246396e-05,
316
- "loss": 1.9953,
317
- "step": 1364
318
- },
319
- {
320
- "epoch": 0.04636399893645307,
321
- "grad_norm": 3.28456449508667,
322
- "learning_rate": 4.570773263433814e-05,
323
- "loss": 1.9579,
324
- "step": 1395
325
- },
326
- {
327
- "epoch": 0.047394310023929805,
328
- "grad_norm": 16.107433319091797,
329
- "learning_rate": 4.672346002621232e-05,
330
- "loss": 1.9701,
331
- "step": 1426
332
- },
333
- {
334
- "epoch": 0.04842462111140654,
335
- "grad_norm": 3.5708224773406982,
336
- "learning_rate": 4.77391874180865e-05,
337
- "loss": 1.9621,
338
- "step": 1457
339
- },
340
- {
341
- "epoch": 0.04945493219888328,
342
- "grad_norm": 2.9053499698638916,
343
- "learning_rate": 4.875491480996068e-05,
344
- "loss": 1.9458,
345
- "step": 1488
346
- },
347
- {
348
- "epoch": 0.05048524328636001,
349
- "grad_norm": 3.0863258838653564,
350
- "learning_rate": 4.977064220183487e-05,
351
- "loss": 1.9483,
352
- "step": 1519
353
- },
354
- {
355
- "epoch": 0.051515554373836744,
356
- "grad_norm": 2.9012269973754883,
357
- "learning_rate": 4.9999915451558777e-05,
358
- "loss": 1.928,
359
- "step": 1550
360
- },
361
- {
362
- "epoch": 0.05254586546131348,
363
- "grad_norm": 3.0949041843414307,
364
- "learning_rate": 4.999955597496219e-05,
365
- "loss": 1.9229,
366
- "step": 1581
367
- },
368
- {
369
- "epoch": 0.05357617654879022,
370
- "grad_norm": 2.8687901496887207,
371
- "learning_rate": 4.9998914381774255e-05,
372
- "loss": 1.915,
373
- "step": 1612
374
- },
375
- {
376
- "epoch": 0.05460648763626695,
377
- "grad_norm": 3.2136878967285156,
378
- "learning_rate": 4.999799067923527e-05,
379
- "loss": 1.9197,
380
- "step": 1643
381
- },
382
- {
383
- "epoch": 0.055636798723743684,
384
- "grad_norm": 2.590843677520752,
385
- "learning_rate": 4.999678487776908e-05,
386
- "loss": 1.8756,
387
- "step": 1674
388
- },
389
- {
390
- "epoch": 0.05666710981122042,
391
- "grad_norm": 2.64634108543396,
392
- "learning_rate": 4.9995296990983006e-05,
393
- "loss": 1.9033,
394
- "step": 1705
395
- },
396
- {
397
- "epoch": 0.05769742089869716,
398
- "grad_norm": 3.0151331424713135,
399
- "learning_rate": 4.999352703566763e-05,
400
- "loss": 1.8883,
401
- "step": 1736
402
- },
403
- {
404
- "epoch": 0.05872773198617389,
405
- "grad_norm": 2.526806354522705,
406
- "learning_rate": 4.999147503179668e-05,
407
- "loss": 1.8666,
408
- "step": 1767
409
- },
410
- {
411
- "epoch": 0.059758043073650624,
412
- "grad_norm": 2.510300397872925,
413
- "learning_rate": 4.998914100252672e-05,
414
- "loss": 1.854,
415
- "step": 1798
416
- },
417
- {
418
- "epoch": 0.06078835416112736,
419
- "grad_norm": 2.4867682456970215,
420
- "learning_rate": 4.998652497419696e-05,
421
- "loss": 1.8548,
422
- "step": 1829
423
- },
424
- {
425
- "epoch": 0.0618186652486041,
426
- "grad_norm": 2.3920586109161377,
427
- "learning_rate": 4.9983626976328927e-05,
428
- "loss": 1.8495,
429
- "step": 1860
430
- },
431
- {
432
- "epoch": 0.06284897633608083,
433
- "grad_norm": 2.714177370071411,
434
- "learning_rate": 4.998044704162613e-05,
435
- "loss": 1.8433,
436
- "step": 1891
437
- },
438
- {
439
- "epoch": 0.06387928742355757,
440
- "grad_norm": 2.3094465732574463,
441
- "learning_rate": 4.9976985205973705e-05,
442
- "loss": 1.8382,
443
- "step": 1922
444
- },
445
- {
446
- "epoch": 0.0649095985110343,
447
- "grad_norm": 2.47184419631958,
448
- "learning_rate": 4.997324150843799e-05,
449
- "loss": 1.8464,
450
- "step": 1953
451
- },
452
- {
453
- "epoch": 0.06593990959851104,
454
- "grad_norm": 2.391841411590576,
455
- "learning_rate": 4.99692159912661e-05,
456
- "loss": 1.8179,
457
- "step": 1984
458
- },
459
- {
460
- "epoch": 0.06697022068598776,
461
- "grad_norm": 2.2471864223480225,
462
- "learning_rate": 4.996490869988546e-05,
463
- "loss": 1.8149,
464
- "step": 2015
465
- },
466
- {
467
- "epoch": 0.0680005317734645,
468
- "grad_norm": 2.5497376918792725,
469
- "learning_rate": 4.996031968290326e-05,
470
- "loss": 1.8099,
471
- "step": 2046
472
- },
473
- {
474
- "epoch": 0.06903084286094124,
475
- "grad_norm": 2.330463409423828,
476
- "learning_rate": 4.995544899210594e-05,
477
- "loss": 1.8267,
478
- "step": 2077
479
- },
480
- {
481
- "epoch": 0.07006115394841797,
482
- "grad_norm": 2.3259341716766357,
483
- "learning_rate": 4.9950296682458583e-05,
484
- "loss": 1.7801,
485
- "step": 2108
486
- },
487
- {
488
- "epoch": 0.07109146503589471,
489
- "grad_norm": 2.1711952686309814,
490
- "learning_rate": 4.994486281210429e-05,
491
- "loss": 1.7961,
492
- "step": 2139
493
- },
494
- {
495
- "epoch": 0.07212177612337145,
496
- "grad_norm": 2.1808884143829346,
497
- "learning_rate": 4.9939147442363566e-05,
498
- "loss": 1.8109,
499
- "step": 2170
500
- },
501
- {
502
- "epoch": 0.07315208721084818,
503
- "grad_norm": 2.089256525039673,
504
- "learning_rate": 4.9933150637733574e-05,
505
- "loss": 1.8026,
506
- "step": 2201
507
- },
508
- {
509
- "epoch": 0.07418239829832492,
510
- "grad_norm": 2.0864951610565186,
511
- "learning_rate": 4.992687246588743e-05,
512
- "loss": 1.7753,
513
- "step": 2232
514
- },
515
- {
516
- "epoch": 0.07521270938580164,
517
- "grad_norm": 2.36157488822937,
518
- "learning_rate": 4.992031299767347e-05,
519
- "loss": 1.7746,
520
- "step": 2263
521
- },
522
- {
523
- "epoch": 0.07624302047327838,
524
- "grad_norm": 2.5334439277648926,
525
- "learning_rate": 4.9913472307114386e-05,
526
- "loss": 1.7927,
527
- "step": 2294
528
- },
529
- {
530
- "epoch": 0.07727333156075512,
531
- "grad_norm": 2.2565715312957764,
532
- "learning_rate": 4.9906350471406446e-05,
533
- "loss": 1.7668,
534
- "step": 2325
535
- },
536
- {
537
- "epoch": 0.07830364264823185,
538
- "grad_norm": 2.1043128967285156,
539
- "learning_rate": 4.989894757091861e-05,
540
- "loss": 1.7771,
541
- "step": 2356
542
- },
543
- {
544
- "epoch": 0.07933395373570859,
545
- "grad_norm": 1.9659819602966309,
546
- "learning_rate": 4.989126368919158e-05,
547
- "loss": 1.7666,
548
- "step": 2387
549
- },
550
- {
551
- "epoch": 0.08036426482318532,
552
- "grad_norm": 2.0778403282165527,
553
- "learning_rate": 4.988329891293693e-05,
554
- "loss": 1.7405,
555
- "step": 2418
556
- },
557
- {
558
- "epoch": 0.08139457591066206,
559
- "grad_norm": 2.1767923831939697,
560
- "learning_rate": 4.987505333203608e-05,
561
- "loss": 1.7495,
562
- "step": 2449
563
- },
564
- {
565
- "epoch": 0.0824248869981388,
566
- "grad_norm": 2.260143280029297,
567
- "learning_rate": 4.9866527039539276e-05,
568
- "loss": 1.7504,
569
- "step": 2480
570
- },
571
- {
572
- "epoch": 0.08345519808561552,
573
- "grad_norm": 2.18271803855896,
574
- "learning_rate": 4.9857720131664594e-05,
575
- "loss": 1.7456,
576
- "step": 2511
577
- },
578
- {
579
- "epoch": 0.08448550917309226,
580
- "grad_norm": 2.209594964981079,
581
- "learning_rate": 4.9848632707796773e-05,
582
- "loss": 1.7528,
583
- "step": 2542
584
- },
585
- {
586
- "epoch": 0.085515820260569,
587
- "grad_norm": 2.0666229724884033,
588
- "learning_rate": 4.9839264870486155e-05,
589
- "loss": 1.7517,
590
- "step": 2573
591
- },
592
- {
593
- "epoch": 0.08654613134804573,
594
- "grad_norm": 2.1070454120635986,
595
- "learning_rate": 4.9829616725447526e-05,
596
- "loss": 1.7474,
597
- "step": 2604
598
- },
599
- {
600
- "epoch": 0.08757644243552247,
601
- "grad_norm": 1.9430303573608398,
602
- "learning_rate": 4.981968838155888e-05,
603
- "loss": 1.7348,
604
- "step": 2635
605
- },
606
- {
607
- "epoch": 0.0886067535229992,
608
- "grad_norm": 1.9638925790786743,
609
- "learning_rate": 4.980947995086024e-05,
610
- "loss": 1.7202,
611
- "step": 2666
612
- },
613
- {
614
- "epoch": 0.08963706461047594,
615
- "grad_norm": 1.8845652341842651,
616
- "learning_rate": 4.979899154855234e-05,
617
- "loss": 1.7375,
618
- "step": 2697
619
- },
620
- {
621
- "epoch": 0.09066737569795268,
622
- "grad_norm": 5.712058067321777,
623
- "learning_rate": 4.9788223292995386e-05,
624
- "loss": 1.7379,
625
- "step": 2728
626
- },
627
- {
628
- "epoch": 0.0916976867854294,
629
- "grad_norm": 1.9520670175552368,
630
- "learning_rate": 4.977717530570768e-05,
631
- "loss": 1.7302,
632
- "step": 2759
633
- },
634
- {
635
- "epoch": 0.09272799787290614,
636
- "grad_norm": 1.8802224397659302,
637
- "learning_rate": 4.976584771136425e-05,
638
- "loss": 1.74,
639
- "step": 2790
640
- },
641
- {
642
- "epoch": 0.09375830896038288,
643
- "grad_norm": 2.1098153591156006,
644
- "learning_rate": 4.975424063779547e-05,
645
- "loss": 1.7024,
646
- "step": 2821
647
- },
648
- {
649
- "epoch": 0.09478862004785961,
650
- "grad_norm": 2.1568291187286377,
651
- "learning_rate": 4.974235421598557e-05,
652
- "loss": 1.7131,
653
- "step": 2852
654
- },
655
- {
656
- "epoch": 0.09581893113533635,
657
- "grad_norm": 1.8769980669021606,
658
- "learning_rate": 4.973018858007122e-05,
659
- "loss": 1.7008,
660
- "step": 2883
661
- },
662
- {
663
- "epoch": 0.09684924222281308,
664
- "grad_norm": 1.8325533866882324,
665
- "learning_rate": 4.9717743867339963e-05,
666
- "loss": 1.7058,
667
- "step": 2914
668
- },
669
- {
670
- "epoch": 0.09787955331028982,
671
- "grad_norm": 2.086416721343994,
672
- "learning_rate": 4.9705020218228695e-05,
673
- "loss": 1.711,
674
- "step": 2945
675
- },
676
- {
677
- "epoch": 0.09890986439776656,
678
- "grad_norm": 1.8294793367385864,
679
- "learning_rate": 4.969201777632205e-05,
680
- "loss": 1.6998,
681
- "step": 2976
682
- },
683
- {
684
- "epoch": 0.09994017548524328,
685
- "grad_norm": 2.0608153343200684,
686
- "learning_rate": 4.9678736688350846e-05,
687
- "loss": 1.6948,
688
- "step": 3007
689
- },
690
- {
691
- "epoch": 0.10097048657272002,
692
- "grad_norm": 3.2166008949279785,
693
- "learning_rate": 4.966517710419033e-05,
694
- "loss": 1.6788,
695
- "step": 3038
696
- }
697
- ],
698
- "logging_steps": 31,
699
- "max_steps": 30517,
700
- "num_input_tokens_seen": 0,
701
- "num_train_epochs": 2,
702
- "save_steps": 3052,
703
- "stateful_callbacks": {
704
- "TrainerControl": {
705
- "args": {
706
- "should_epoch_stop": false,
707
- "should_evaluate": false,
708
- "should_log": false,
709
- "should_save": true,
710
- "should_training_stop": false
711
- },
712
- "attributes": {}
713
- }
714
- },
715
- "total_flos": 2.263945054022271e+18,
716
- "train_batch_size": 8,
717
- "trial_name": null,
718
- "trial_params": null
719
- }