| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 49.858356940509914, | |
| "global_step": 4400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.9999745104274995e-05, | |
| "loss": 1.1665, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.9998980430094333e-05, | |
| "loss": 1.1142, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 1.9997706016440462e-05, | |
| "loss": 1.0657, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.9995921928281893e-05, | |
| "loss": 1.0229, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.999362825656992e-05, | |
| "loss": 1.0155, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.9990825118233958e-05, | |
| "loss": 0.9919, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.9987512656175612e-05, | |
| "loss": 0.9623, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 1.9983691039261358e-05, | |
| "loss": 0.937, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.9979360462313965e-05, | |
| "loss": 0.9527, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.9974521146102535e-05, | |
| "loss": 0.9173, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.9969173337331283e-05, | |
| "loss": 0.91, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.9963317308626916e-05, | |
| "loss": 0.9045, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.9956953358524774e-05, | |
| "loss": 0.94, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 1.9950081811453598e-05, | |
| "loss": 0.9086, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.9942703017718977e-05, | |
| "loss": 0.8946, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.99348173534855e-05, | |
| "loss": 0.8836, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.9926425220757607e-05, | |
| "loss": 0.9001, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.991752704735903e-05, | |
| "loss": 0.8826, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.990812328691105e-05, | |
| "loss": 0.8549, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 1.989821441880933e-05, | |
| "loss": 0.869, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 1.9887800948199496e-05, | |
| "loss": 0.8772, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 1.9876883405951378e-05, | |
| "loss": 0.8691, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 1.9865462348631945e-05, | |
| "loss": 0.8978, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 1.9853538358476933e-05, | |
| "loss": 0.8576, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 1.984111204336116e-05, | |
| "loss": 0.8506, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "learning_rate": 1.9828184036767556e-05, | |
| "loss": 0.8679, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.981475499775484e-05, | |
| "loss": 0.888, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.17, | |
| "learning_rate": 1.9800825610923937e-05, | |
| "loss": 0.8369, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 1.9786396586383078e-05, | |
| "loss": 0.8244, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 1.9771468659711595e-05, | |
| "loss": 0.8495, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 1.9756042591922436e-05, | |
| "loss": 0.85, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.63, | |
| "learning_rate": 1.9740119169423337e-05, | |
| "loss": 0.8307, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 1.9723699203976768e-05, | |
| "loss": 0.8419, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.85, | |
| "learning_rate": 1.9706783532658528e-05, | |
| "loss": 0.8483, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 1.9689373017815076e-05, | |
| "loss": 0.8181, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 1.9671468547019575e-05, | |
| "loss": 0.8203, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 1.9653071033026635e-05, | |
| "loss": 0.8132, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 1.963418141372579e-05, | |
| "loss": 0.8299, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 1.9614800652093685e-05, | |
| "loss": 0.8228, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 1.9594929736144978e-05, | |
| "loss": 0.8083, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 1.9574569678881965e-05, | |
| "loss": 0.8054, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 1.955372151824297e-05, | |
| "loss": 0.8127, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.87, | |
| "learning_rate": 1.9532386317049387e-05, | |
| "loss": 0.8047, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "learning_rate": 1.9510565162951538e-05, | |
| "loss": 0.8113, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 5.1, | |
| "learning_rate": 1.9488259168373198e-05, | |
| "loss": 0.7806, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 5.21, | |
| "learning_rate": 1.94654694704549e-05, | |
| "loss": 0.7973, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 1.944219723099596e-05, | |
| "loss": 0.7968, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 1.941844363639525e-05, | |
| "loss": 0.7838, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 1.9394209897590707e-05, | |
| "loss": 0.7798, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.67, | |
| "learning_rate": 1.936949724999762e-05, | |
| "loss": 0.7791, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 1.9344306953445632e-05, | |
| "loss": 0.7632, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 1.9318640292114526e-05, | |
| "loss": 0.7984, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 1.929249857446875e-05, | |
| "loss": 0.7763, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 1.9265883133190715e-05, | |
| "loss": 0.7489, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 6.23, | |
| "learning_rate": 1.9238795325112867e-05, | |
| "loss": 0.7739, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 6.35, | |
| "learning_rate": 1.92112365311485e-05, | |
| "loss": 0.7664, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 1.918320815622137e-05, | |
| "loss": 0.76, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 6.57, | |
| "learning_rate": 1.9154711629194062e-05, | |
| "loss": 0.7604, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 1.912574840279516e-05, | |
| "loss": 0.7598, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 6.8, | |
| "learning_rate": 1.9096319953545186e-05, | |
| "loss": 0.7585, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 1.9066427781681314e-05, | |
| "loss": 0.7312, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 1.9036073411080917e-05, | |
| "loss": 0.732, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 7.14, | |
| "learning_rate": 1.900525838918385e-05, | |
| "loss": 0.7392, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 1.8973984286913584e-05, | |
| "loss": 0.6986, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 1.8942252698597113e-05, | |
| "loss": 0.7281, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 1.891006524188368e-05, | |
| "loss": 0.7316, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 1.8877423557662307e-05, | |
| "loss": 0.7329, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 7.71, | |
| "learning_rate": 1.8844329309978146e-05, | |
| "loss": 0.7409, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 1.8810784185947648e-05, | |
| "loss": 0.7345, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 1.8776789895672557e-05, | |
| "loss": 0.7418, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 8.05, | |
| "learning_rate": 1.8742348172152728e-05, | |
| "loss": 0.7426, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 1.8707460771197773e-05, | |
| "loss": 0.6952, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 8.27, | |
| "learning_rate": 1.8672129471337568e-05, | |
| "loss": 0.7136, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 8.39, | |
| "learning_rate": 1.863635607373157e-05, | |
| "loss": 0.7284, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 1.8600142402077006e-05, | |
| "loss": 0.6978, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 1.856349030251589e-05, | |
| "loss": 0.6843, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 1.8526401643540924e-05, | |
| "loss": 0.6895, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 1.8488878315900228e-05, | |
| "loss": 0.7084, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "learning_rate": 1.8450922232500966e-05, | |
| "loss": 0.7028, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 1.8412535328311813e-05, | |
| "loss": 0.6905, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 9.18, | |
| "learning_rate": 1.837371956026433e-05, | |
| "loss": 0.6851, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 1.8334476907153177e-05, | |
| "loss": 0.6759, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 9.41, | |
| "learning_rate": 1.8294809369535265e-05, | |
| "loss": 0.6613, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 1.825471896962774e-05, | |
| "loss": 0.6814, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 1.8214207751204917e-05, | |
| "loss": 0.7044, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 9.75, | |
| "learning_rate": 1.817327777949407e-05, | |
| "loss": 0.6753, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 1.8131931141070166e-05, | |
| "loss": 0.6748, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 9.97, | |
| "learning_rate": 1.8090169943749477e-05, | |
| "loss": 0.6853, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 10.08, | |
| "learning_rate": 1.8047996316482134e-05, | |
| "loss": 0.6406, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 10.2, | |
| "learning_rate": 1.8005412409243604e-05, | |
| "loss": 0.6391, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 10.31, | |
| "learning_rate": 1.7962420392925066e-05, | |
| "loss": 0.6524, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "learning_rate": 1.7919022459222754e-05, | |
| "loss": 0.6618, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 1.787522082052622e-05, | |
| "loss": 0.6604, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 1.7831017709805555e-05, | |
| "loss": 0.6706, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "learning_rate": 1.778641538049755e-05, | |
| "loss": 0.6589, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 1.7741416106390828e-05, | |
| "loss": 0.6631, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 1.7696022181509892e-05, | |
| "loss": 0.6408, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "learning_rate": 1.7650235919998234e-05, | |
| "loss": 0.6077, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 11.22, | |
| "learning_rate": 1.7604059656000313e-05, | |
| "loss": 0.6267, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "learning_rate": 1.7557495743542586e-05, | |
| "loss": 0.6324, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 11.44, | |
| "learning_rate": 1.75105465564135e-05, | |
| "loss": 0.645, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 11.56, | |
| "learning_rate": 1.7463214488042472e-05, | |
| "loss": 0.6167, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "learning_rate": 1.741550195137788e-05, | |
| "loss": 0.6311, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "learning_rate": 1.736741137876405e-05, | |
| "loss": 0.6329, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 11.9, | |
| "learning_rate": 1.7318945221817255e-05, | |
| "loss": 0.6469, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "learning_rate": 1.727010595130074e-05, | |
| "loss": 0.6334, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 12.12, | |
| "learning_rate": 1.7220896056998753e-05, | |
| "loss": 0.5946, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "learning_rate": 1.7171318047589637e-05, | |
| "loss": 0.5695, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 12.35, | |
| "learning_rate": 1.712137445051792e-05, | |
| "loss": 0.6101, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 12.46, | |
| "learning_rate": 1.7071067811865477e-05, | |
| "loss": 0.6203, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 12.58, | |
| "learning_rate": 1.7020400696221737e-05, | |
| "loss": 0.598, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 12.69, | |
| "learning_rate": 1.696937568655294e-05, | |
| "loss": 0.6177, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "learning_rate": 1.691799538407044e-05, | |
| "loss": 0.6153, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "learning_rate": 1.6866262408098134e-05, | |
| "loss": 0.6096, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 13.03, | |
| "learning_rate": 1.6814179395938915e-05, | |
| "loss": 0.612, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 13.14, | |
| "learning_rate": 1.6761749002740195e-05, | |
| "loss": 0.5858, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 1.6708973901358603e-05, | |
| "loss": 0.5715, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 13.37, | |
| "learning_rate": 1.6655856782223682e-05, | |
| "loss": 0.5834, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 13.48, | |
| "learning_rate": 1.660240035320075e-05, | |
| "loss": 0.5782, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 13.6, | |
| "learning_rate": 1.6548607339452853e-05, | |
| "loss": 0.5621, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 13.71, | |
| "learning_rate": 1.6494480483301836e-05, | |
| "loss": 0.5729, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 13.82, | |
| "learning_rate": 1.6440022544088553e-05, | |
| "loss": 0.5958, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 13.94, | |
| "learning_rate": 1.6385236298032183e-05, | |
| "loss": 0.6007, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 14.05, | |
| "learning_rate": 1.6330124538088705e-05, | |
| "loss": 0.5658, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 14.16, | |
| "learning_rate": 1.627469007380852e-05, | |
| "loss": 0.5674, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 14.28, | |
| "learning_rate": 1.6218935731193223e-05, | |
| "loss": 0.5649, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "learning_rate": 1.616286435255153e-05, | |
| "loss": 0.5497, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 14.5, | |
| "learning_rate": 1.6106478796354382e-05, | |
| "loss": 0.5865, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 14.62, | |
| "learning_rate": 1.6049781937089227e-05, | |
| "loss": 0.547, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 14.73, | |
| "learning_rate": 1.599277666511347e-05, | |
| "loss": 0.5473, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 14.84, | |
| "learning_rate": 1.5935465886507143e-05, | |
| "loss": 0.5553, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 14.96, | |
| "learning_rate": 1.5877852522924733e-05, | |
| "loss": 0.5672, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 15.07, | |
| "learning_rate": 1.581993951144626e-05, | |
| "loss": 0.5392, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 15.18, | |
| "learning_rate": 1.576172980442753e-05, | |
| "loss": 0.504, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 15.3, | |
| "learning_rate": 1.5703226369349642e-05, | |
| "loss": 0.5446, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 15.41, | |
| "learning_rate": 1.5644432188667695e-05, | |
| "loss": 0.5323, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 15.52, | |
| "learning_rate": 1.5585350259658748e-05, | |
| "loss": 0.54, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 15.64, | |
| "learning_rate": 1.5525983594269026e-05, | |
| "loss": 0.5387, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 15.75, | |
| "learning_rate": 1.546633521896038e-05, | |
| "loss": 0.5349, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 15.86, | |
| "learning_rate": 1.5406408174555978e-05, | |
| "loss": 0.5303, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "learning_rate": 1.5346205516085305e-05, | |
| "loss": 0.561, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 16.09, | |
| "learning_rate": 1.528573031262842e-05, | |
| "loss": 0.5295, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 16.2, | |
| "learning_rate": 1.5224985647159489e-05, | |
| "loss": 0.4995, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 16.32, | |
| "learning_rate": 1.5163974616389621e-05, | |
| "loss": 0.5151, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 16.43, | |
| "learning_rate": 1.5102700330609e-05, | |
| "loss": 0.5042, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 16.54, | |
| "learning_rate": 1.504116591352832e-05, | |
| "loss": 0.502, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 16.66, | |
| "learning_rate": 1.497937450211956e-05, | |
| "loss": 0.5101, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 16.77, | |
| "learning_rate": 1.491732924645604e-05, | |
| "loss": 0.5323, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 16.88, | |
| "learning_rate": 1.4855033309551842e-05, | |
| "loss": 0.509, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 1.479248986720057e-05, | |
| "loss": 0.5292, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 17.11, | |
| "learning_rate": 1.4729702107813438e-05, | |
| "loss": 0.4835, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 17.22, | |
| "learning_rate": 1.4666673232256738e-05, | |
| "loss": 0.4934, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 17.34, | |
| "learning_rate": 1.4603406453688656e-05, | |
| "loss": 0.4811, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 17.45, | |
| "learning_rate": 1.4539904997395468e-05, | |
| "loss": 0.4966, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 17.56, | |
| "learning_rate": 1.4476172100627127e-05, | |
| "loss": 0.4906, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 17.68, | |
| "learning_rate": 1.4412211012432213e-05, | |
| "loss": 0.4956, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 17.79, | |
| "learning_rate": 1.4348024993492323e-05, | |
| "loss": 0.5003, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 17.9, | |
| "learning_rate": 1.4283617315955815e-05, | |
| "loss": 0.5131, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "learning_rate": 1.4218991263271024e-05, | |
| "loss": 0.4642, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 18.13, | |
| "learning_rate": 1.4154150130018867e-05, | |
| "loss": 0.4679, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 18.24, | |
| "learning_rate": 1.408909722174487e-05, | |
| "loss": 0.4555, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 18.36, | |
| "learning_rate": 1.4023835854790682e-05, | |
| "loss": 0.4663, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 18.47, | |
| "learning_rate": 1.3958369356124986e-05, | |
| "loss": 0.4991, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 18.58, | |
| "learning_rate": 1.3892701063173917e-05, | |
| "loss": 0.4592, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 18.7, | |
| "learning_rate": 1.3826834323650899e-05, | |
| "loss": 0.4697, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 18.81, | |
| "learning_rate": 1.3760772495385998e-05, | |
| "loss": 0.4812, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 18.92, | |
| "learning_rate": 1.369451894615474e-05, | |
| "loss": 0.4868, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 19.04, | |
| "learning_rate": 1.362807705350641e-05, | |
| "loss": 0.4511, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 19.15, | |
| "learning_rate": 1.3561450204591898e-05, | |
| "loss": 0.4332, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 19.26, | |
| "learning_rate": 1.3494641795990986e-05, | |
| "loss": 0.4392, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 19.38, | |
| "learning_rate": 1.3427655233539227e-05, | |
| "loss": 0.435, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 19.49, | |
| "learning_rate": 1.3360493932154301e-05, | |
| "loss": 0.4377, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 19.6, | |
| "learning_rate": 1.3293161315661934e-05, | |
| "loss": 0.4509, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "learning_rate": 1.3225660816621342e-05, | |
| "loss": 0.4564, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 19.83, | |
| "learning_rate": 1.3157995876150252e-05, | |
| "loss": 0.471, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 19.94, | |
| "learning_rate": 1.3090169943749475e-05, | |
| "loss": 0.4759, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 20.06, | |
| "learning_rate": 1.302218647712706e-05, | |
| "loss": 0.454, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 20.17, | |
| "learning_rate": 1.2954048942022002e-05, | |
| "loss": 0.3999, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 20.28, | |
| "learning_rate": 1.288576081202759e-05, | |
| "loss": 0.415, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 20.4, | |
| "learning_rate": 1.2817325568414299e-05, | |
| "loss": 0.4349, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 20.51, | |
| "learning_rate": 1.2748746699952338e-05, | |
| "loss": 0.4281, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 20.62, | |
| "learning_rate": 1.2680027702733791e-05, | |
| "loss": 0.4391, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 20.74, | |
| "learning_rate": 1.2611172079994377e-05, | |
| "loss": 0.4288, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 20.85, | |
| "learning_rate": 1.2542183341934873e-05, | |
| "loss": 0.4562, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 20.96, | |
| "learning_rate": 1.2473065005542155e-05, | |
| "loss": 0.4294, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 21.08, | |
| "learning_rate": 1.2403820594409926e-05, | |
| "loss": 0.4147, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 21.19, | |
| "learning_rate": 1.2334453638559057e-05, | |
| "loss": 0.4003, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 21.3, | |
| "learning_rate": 1.2264967674257647e-05, | |
| "loss": 0.402, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 21.42, | |
| "learning_rate": 1.2195366243840745e-05, | |
| "loss": 0.3808, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 21.53, | |
| "learning_rate": 1.2125652895529766e-05, | |
| "loss": 0.4199, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 21.64, | |
| "learning_rate": 1.2055831183251608e-05, | |
| "loss": 0.4153, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 21.76, | |
| "learning_rate": 1.1985904666457455e-05, | |
| "loss": 0.4206, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 21.87, | |
| "learning_rate": 1.1915876909941356e-05, | |
| "loss": 0.4326, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 21.98, | |
| "learning_rate": 1.1845751483658454e-05, | |
| "loss": 0.4263, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 22.1, | |
| "learning_rate": 1.1775531962543036e-05, | |
| "loss": 0.3938, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 22.21, | |
| "learning_rate": 1.170522192632624e-05, | |
| "loss": 0.3748, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 22.32, | |
| "learning_rate": 1.1634824959353602e-05, | |
| "loss": 0.3757, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 22.44, | |
| "learning_rate": 1.156434465040231e-05, | |
| "loss": 0.3876, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 22.55, | |
| "learning_rate": 1.1493784592498252e-05, | |
| "loss": 0.395, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 22.66, | |
| "learning_rate": 1.1423148382732854e-05, | |
| "loss": 0.3938, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 22.78, | |
| "learning_rate": 1.1352439622079689e-05, | |
| "loss": 0.4146, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 22.89, | |
| "learning_rate": 1.1281661915210931e-05, | |
| "loss": 0.4206, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 1.1210818870313548e-05, | |
| "loss": 0.3975, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 23.12, | |
| "learning_rate": 1.1139914098905406e-05, | |
| "loss": 0.3613, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 23.23, | |
| "learning_rate": 1.1068951215651132e-05, | |
| "loss": 0.3572, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 23.34, | |
| "learning_rate": 1.0997933838177828e-05, | |
| "loss": 0.3773, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 23.46, | |
| "learning_rate": 1.0926865586890689e-05, | |
| "loss": 0.3846, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 23.57, | |
| "learning_rate": 1.08557500847884e-05, | |
| "loss": 0.3758, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 23.68, | |
| "learning_rate": 1.0784590957278452e-05, | |
| "loss": 0.393, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 23.8, | |
| "learning_rate": 1.0713391831992324e-05, | |
| "loss": 0.3699, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 23.91, | |
| "learning_rate": 1.064215633860055e-05, | |
| "loss": 0.3639, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 24.02, | |
| "learning_rate": 1.0570888108627682e-05, | |
| "loss": 0.3869, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 24.14, | |
| "learning_rate": 1.0499590775267142e-05, | |
| "loss": 0.3387, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 24.25, | |
| "learning_rate": 1.0428267973196027e-05, | |
| "loss": 0.3507, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 24.36, | |
| "learning_rate": 1.0356923338389807e-05, | |
| "loss": 0.3424, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 24.48, | |
| "learning_rate": 1.0285560507936962e-05, | |
| "loss": 0.3615, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 24.59, | |
| "learning_rate": 1.0214183119853583e-05, | |
| "loss": 0.3585, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 24.7, | |
| "learning_rate": 1.0142794812897874e-05, | |
| "loss": 0.3519, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 24.82, | |
| "learning_rate": 1.0071399226384695e-05, | |
| "loss": 0.3734, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 24.93, | |
| "learning_rate": 1e-05, | |
| "loss": 0.3788, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 25.04, | |
| "learning_rate": 9.928600773615306e-06, | |
| "loss": 0.3616, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 25.16, | |
| "learning_rate": 9.85720518710213e-06, | |
| "loss": 0.3345, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 25.27, | |
| "learning_rate": 9.78581688014642e-06, | |
| "loss": 0.3483, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 25.38, | |
| "learning_rate": 9.71443949206304e-06, | |
| "loss": 0.327, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 25.5, | |
| "learning_rate": 9.643076661610197e-06, | |
| "loss": 0.3422, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 25.61, | |
| "learning_rate": 9.571732026803978e-06, | |
| "loss": 0.3472, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 25.72, | |
| "learning_rate": 9.500409224732863e-06, | |
| "loss": 0.3475, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 25.84, | |
| "learning_rate": 9.42911189137232e-06, | |
| "loss": 0.3507, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 25.95, | |
| "learning_rate": 9.357843661399447e-06, | |
| "loss": 0.3534, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 26.06, | |
| "learning_rate": 9.286608168007678e-06, | |
| "loss": 0.3295, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 26.18, | |
| "learning_rate": 9.215409042721553e-06, | |
| "loss": 0.3187, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 26.29, | |
| "learning_rate": 9.144249915211605e-06, | |
| "loss": 0.3465, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 26.4, | |
| "learning_rate": 9.073134413109313e-06, | |
| "loss": 0.3262, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 26.52, | |
| "learning_rate": 9.002066161822174e-06, | |
| "loss": 0.3154, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 26.63, | |
| "learning_rate": 8.931048784348875e-06, | |
| "loss": 0.3277, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 26.74, | |
| "learning_rate": 8.860085901094595e-06, | |
| "loss": 0.3387, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 26.86, | |
| "learning_rate": 8.789181129686452e-06, | |
| "loss": 0.323, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 26.97, | |
| "learning_rate": 8.718338084789074e-06, | |
| "loss": 0.3243, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 27.08, | |
| "learning_rate": 8.647560377920311e-06, | |
| "loss": 0.3112, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "learning_rate": 8.576851617267151e-06, | |
| "loss": 0.3138, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 27.31, | |
| "learning_rate": 8.50621540750175e-06, | |
| "loss": 0.294, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 27.42, | |
| "learning_rate": 8.43565534959769e-06, | |
| "loss": 0.3009, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 27.54, | |
| "learning_rate": 8.365175040646403e-06, | |
| "loss": 0.3217, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 27.65, | |
| "learning_rate": 8.294778073673762e-06, | |
| "loss": 0.3083, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 27.76, | |
| "learning_rate": 8.224468037456969e-06, | |
| "loss": 0.3201, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 27.88, | |
| "learning_rate": 8.154248516341547e-06, | |
| "loss": 0.3402, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 27.99, | |
| "learning_rate": 8.084123090058646e-06, | |
| "loss": 0.3128, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 28.1, | |
| "learning_rate": 8.014095333542548e-06, | |
| "loss": 0.2901, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 28.22, | |
| "learning_rate": 7.944168816748396e-06, | |
| "loss": 0.2901, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 28.33, | |
| "learning_rate": 7.874347104470234e-06, | |
| "loss": 0.2886, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 28.44, | |
| "learning_rate": 7.804633756159258e-06, | |
| "loss": 0.2953, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 28.56, | |
| "learning_rate": 7.735032325742355e-06, | |
| "loss": 0.3088, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 28.67, | |
| "learning_rate": 7.66554636144095e-06, | |
| "loss": 0.3004, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 28.78, | |
| "learning_rate": 7.596179405590076e-06, | |
| "loss": 0.3299, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 28.9, | |
| "learning_rate": 7.5269349944578454e-06, | |
| "loss": 0.294, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 29.01, | |
| "learning_rate": 7.4578166580651335e-06, | |
| "loss": 0.2974, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 29.12, | |
| "learning_rate": 7.388827920005628e-06, | |
| "loss": 0.2773, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 29.24, | |
| "learning_rate": 7.319972297266215e-06, | |
| "loss": 0.2718, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 29.35, | |
| "learning_rate": 7.2512533000476625e-06, | |
| "loss": 0.2964, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 29.46, | |
| "learning_rate": 7.182674431585703e-06, | |
| "loss": 0.2784, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 29.58, | |
| "learning_rate": 7.114239187972416e-06, | |
| "loss": 0.3001, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 29.69, | |
| "learning_rate": 7.045951057978001e-06, | |
| "loss": 0.2877, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 29.8, | |
| "learning_rate": 6.977813522872943e-06, | |
| "loss": 0.2964, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 29.92, | |
| "learning_rate": 6.909830056250527e-06, | |
| "loss": 0.2923, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 30.03, | |
| "learning_rate": 6.8420041238497525e-06, | |
| "loss": 0.2839, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 30.14, | |
| "learning_rate": 6.774339183378663e-06, | |
| "loss": 0.2679, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 30.25, | |
| "learning_rate": 6.7068386843380695e-06, | |
| "loss": 0.2751, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 30.37, | |
| "learning_rate": 6.639506067845698e-06, | |
| "loss": 0.2588, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 30.48, | |
| "learning_rate": 6.572344766460776e-06, | |
| "loss": 0.2828, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 30.59, | |
| "learning_rate": 6.505358204009018e-06, | |
| "loss": 0.2904, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 30.71, | |
| "learning_rate": 6.438549795408107e-06, | |
| "loss": 0.2712, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 30.82, | |
| "learning_rate": 6.3719229464935915e-06, | |
| "loss": 0.2765, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 30.93, | |
| "learning_rate": 6.305481053845262e-06, | |
| "loss": 0.2732, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 31.05, | |
| "learning_rate": 6.239227504614004e-06, | |
| "loss": 0.2604, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 31.16, | |
| "learning_rate": 6.173165676349103e-06, | |
| "loss": 0.2535, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 31.27, | |
| "learning_rate": 6.107298936826086e-06, | |
| "loss": 0.2607, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 31.39, | |
| "learning_rate": 6.041630643875018e-06, | |
| "loss": 0.2769, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 31.5, | |
| "learning_rate": 5.9761641452093225e-06, | |
| "loss": 0.2747, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 31.61, | |
| "learning_rate": 5.910902778255134e-06, | |
| "loss": 0.2578, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 31.73, | |
| "learning_rate": 5.845849869981137e-06, | |
| "loss": 0.2566, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 31.84, | |
| "learning_rate": 5.781008736728975e-06, | |
| "loss": 0.2658, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 31.95, | |
| "learning_rate": 5.716382684044191e-06, | |
| "loss": 0.2732, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 32.07, | |
| "learning_rate": 5.6519750065076815e-06, | |
| "loss": 0.2656, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 32.18, | |
| "learning_rate": 5.587788987567785e-06, | |
| "loss": 0.2386, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 32.29, | |
| "learning_rate": 5.523827899372876e-06, | |
| "loss": 0.2554, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 32.41, | |
| "learning_rate": 5.460095002604533e-06, | |
| "loss": 0.2511, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 32.52, | |
| "learning_rate": 5.396593546311346e-06, | |
| "loss": 0.2641, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 32.63, | |
| "learning_rate": 5.333326767743263e-06, | |
| "loss": 0.2553, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 32.75, | |
| "learning_rate": 5.276589982435913e-06, | |
| "loss": 0.2508, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 32.86, | |
| "learning_rate": 5.213777967188747e-06, | |
| "loss": 0.2533, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 32.97, | |
| "learning_rate": 5.151209949448599e-06, | |
| "loss": 0.2621, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 33.09, | |
| "learning_rate": 5.088889118879516e-06, | |
| "loss": 0.2381, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 33.2, | |
| "learning_rate": 5.02681865254417e-06, | |
| "loss": 0.2405, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 33.31, | |
| "learning_rate": 4.965001714741851e-06, | |
| "loss": 0.2473, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 33.43, | |
| "learning_rate": 4.903441456847198e-06, | |
| "loss": 0.2345, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 33.54, | |
| "learning_rate": 4.8421410171495265e-06, | |
| "loss": 0.2349, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 33.65, | |
| "learning_rate": 4.781103520692839e-06, | |
| "loss": 0.2466, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 33.77, | |
| "learning_rate": 4.720332079116523e-06, | |
| "loss": 0.2512, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 33.88, | |
| "learning_rate": 4.659829790496699e-06, | |
| "loss": 0.2633, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 33.99, | |
| "learning_rate": 4.599599739188322e-06, | |
| "loss": 0.247, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 34.11, | |
| "learning_rate": 4.539644995667911e-06, | |
| "loss": 0.2231, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 34.22, | |
| "learning_rate": 4.479968616377024e-06, | |
| "loss": 0.2174, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 34.33, | |
| "learning_rate": 4.420573643566455e-06, | |
| "loss": 0.231, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 34.45, | |
| "learning_rate": 4.361463105141137e-06, | |
| "loss": 0.2545, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 34.56, | |
| "learning_rate": 4.302640014505779e-06, | |
| "loss": 0.2502, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 34.67, | |
| "learning_rate": 4.244107370411248e-06, | |
| "loss": 0.2505, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 34.79, | |
| "learning_rate": 4.185868156801695e-06, | |
| "loss": 0.225, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 34.9, | |
| "learning_rate": 4.1279253426624345e-06, | |
| "loss": 0.2436, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 35.01, | |
| "learning_rate": 4.07028188186859e-06, | |
| "loss": 0.2274, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 35.13, | |
| "learning_rate": 4.0129407130345114e-06, | |
| "loss": 0.214, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 35.24, | |
| "learning_rate": 3.955904759363958e-06, | |
| "loss": 0.2088, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 35.35, | |
| "learning_rate": 3.899176928501078e-06, | |
| "loss": 0.244, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 35.47, | |
| "learning_rate": 3.842760112382183e-06, | |
| "loss": 0.2255, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 35.58, | |
| "learning_rate": 3.7866571870883382e-06, | |
| "loss": 0.2282, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 35.69, | |
| "learning_rate": 3.7308710126986934e-06, | |
| "loss": 0.2415, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 35.81, | |
| "learning_rate": 3.675404433144727e-06, | |
| "loss": 0.2415, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 35.92, | |
| "learning_rate": 3.6202602760652395e-06, | |
| "loss": 0.2301, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 36.03, | |
| "learning_rate": 3.565441352662211e-06, | |
| "loss": 0.2301, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 36.15, | |
| "learning_rate": 3.510950457557487e-06, | |
| "loss": 0.2213, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 36.26, | |
| "learning_rate": 3.4567903686503103e-06, | |
| "loss": 0.2174, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 36.37, | |
| "learning_rate": 3.4029638469757055e-06, | |
| "loss": 0.2236, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 36.49, | |
| "learning_rate": 3.3494736365637304e-06, | |
| "loss": 0.2235, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 36.6, | |
| "learning_rate": 3.2963224642995675e-06, | |
| "loss": 0.222, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 36.71, | |
| "learning_rate": 3.2435130397845472e-06, | |
| "loss": 0.2237, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 36.83, | |
| "learning_rate": 3.1910480551979706e-06, | |
| "loss": 0.2239, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 36.94, | |
| "learning_rate": 3.1389301851598976e-06, | |
| "loss": 0.2149, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 37.05, | |
| "learning_rate": 3.0871620865947816e-06, | |
| "loss": 0.2279, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 37.17, | |
| "learning_rate": 3.0357463985960257e-06, | |
| "loss": 0.2007, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 37.28, | |
| "learning_rate": 2.9846857422914434e-06, | |
| "loss": 0.2132, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 37.39, | |
| "learning_rate": 2.933982720709637e-06, | |
| "loss": 0.2147, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 37.51, | |
| "learning_rate": 2.883639918647296e-06, | |
| "loss": 0.2121, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 37.62, | |
| "learning_rate": 2.833659902537429e-06, | |
| "loss": 0.2147, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 37.73, | |
| "learning_rate": 2.7840452203185154e-06, | |
| "loss": 0.2127, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 37.85, | |
| "learning_rate": 2.7347984013046435e-06, | |
| "loss": 0.2308, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 37.96, | |
| "learning_rate": 2.6859219560565407e-06, | |
| "loss": 0.2152, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 38.07, | |
| "learning_rate": 2.637418376253591e-06, | |
| "loss": 0.2161, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 38.19, | |
| "learning_rate": 2.589290134566824e-06, | |
| "loss": 0.2206, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 38.3, | |
| "learning_rate": 2.541539684532852e-06, | |
| "loss": 0.2256, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 38.41, | |
| "learning_rate": 2.4941694604287913e-06, | |
| "loss": 0.2096, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 38.53, | |
| "learning_rate": 2.447181877148165e-06, | |
| "loss": 0.2058, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 38.64, | |
| "learning_rate": 2.4005793300777933e-06, | |
| "loss": 0.2102, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 38.75, | |
| "learning_rate": 2.3543641949756835e-06, | |
| "loss": 0.2139, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 38.87, | |
| "learning_rate": 2.30853882784991e-06, | |
| "loss": 0.2048, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 38.98, | |
| "learning_rate": 2.2631055648385e-06, | |
| "loss": 0.2071, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 39.09, | |
| "learning_rate": 2.2180667220903697e-06, | |
| "loss": 0.206, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 39.21, | |
| "learning_rate": 2.1734245956472024e-06, | |
| "loss": 0.2049, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 39.32, | |
| "learning_rate": 2.1291814613264383e-06, | |
| "loss": 0.2096, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 39.43, | |
| "learning_rate": 2.085339574605243e-06, | |
| "loss": 0.1986, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 39.55, | |
| "learning_rate": 2.0419011705055115e-06, | |
| "loss": 0.2036, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 39.66, | |
| "learning_rate": 1.998868463479945e-06, | |
| "loss": 0.21, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 39.77, | |
| "learning_rate": 1.956243647299155e-06, | |
| "loss": 0.2155, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 39.89, | |
| "learning_rate": 1.914028894939829e-06, | |
| "loss": 0.2086, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 1.8722263584739486e-06, | |
| "loss": 0.1997, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 40.11, | |
| "learning_rate": 1.830838168959075e-06, | |
| "loss": 0.1932, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 40.23, | |
| "learning_rate": 1.7898664363297302e-06, | |
| "loss": 0.1997, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 40.34, | |
| "learning_rate": 1.7493132492898134e-06, | |
| "loss": 0.1975, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 40.45, | |
| "learning_rate": 1.7091806752061212e-06, | |
| "loss": 0.2072, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 40.57, | |
| "learning_rate": 1.6694707600029702e-06, | |
| "loss": 0.1786, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 40.68, | |
| "learning_rate": 1.630185528057886e-06, | |
| "loss": 0.2044, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 40.79, | |
| "learning_rate": 1.5913269820984023e-06, | |
| "loss": 0.2129, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 40.91, | |
| "learning_rate": 1.5528971030999706e-06, | |
| "loss": 0.2219, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 41.02, | |
| "learning_rate": 1.5148978501849642e-06, | |
| "loss": 0.2023, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 41.13, | |
| "learning_rate": 1.4773311605228059e-06, | |
| "loss": 0.1975, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 41.25, | |
| "learning_rate": 1.4401989492312164e-06, | |
| "loss": 0.1922, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 41.36, | |
| "learning_rate": 1.403503109278579e-06, | |
| "loss": 0.1868, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 41.47, | |
| "learning_rate": 1.3672455113874429e-06, | |
| "loss": 0.2009, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 41.59, | |
| "learning_rate": 1.3314280039391426e-06, | |
| "loss": 0.2064, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 41.7, | |
| "learning_rate": 1.2960524128795837e-06, | |
| "loss": 0.2036, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 41.81, | |
| "learning_rate": 1.2611205416261595e-06, | |
| "loss": 0.1896, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 41.93, | |
| "learning_rate": 1.2266341709757946e-06, | |
| "loss": 0.2078, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 42.04, | |
| "learning_rate": 1.192595059014179e-06, | |
| "loss": 0.1931, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 42.15, | |
| "learning_rate": 1.1590049410261384e-06, | |
| "loss": 0.203, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 42.27, | |
| "learning_rate": 1.1258655294071686e-06, | |
| "loss": 0.1928, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 42.38, | |
| "learning_rate": 1.0931785135761375e-06, | |
| "loss": 0.1898, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 42.49, | |
| "learning_rate": 1.0609455598891682e-06, | |
| "loss": 0.2001, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 42.61, | |
| "learning_rate": 1.029168311554678e-06, | |
| "loss": 0.2011, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 42.72, | |
| "learning_rate": 9.978483885496204e-07, | |
| "loss": 0.1871, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 42.83, | |
| "learning_rate": 9.669873875368852e-07, | |
| "loss": 0.2027, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 42.95, | |
| "learning_rate": 9.365868817839264e-07, | |
| "loss": 0.1941, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 43.06, | |
| "learning_rate": 9.066484210825288e-07, | |
| "loss": 0.1917, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 43.17, | |
| "learning_rate": 8.771735316698249e-07, | |
| "loss": 0.192, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 43.29, | |
| "learning_rate": 8.481637161504741e-07, | |
| "loss": 0.1959, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 43.4, | |
| "learning_rate": 8.19620453420068e-07, | |
| "loss": 0.192, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 43.51, | |
| "learning_rate": 7.915451985897382e-07, | |
| "loss": 0.2027, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 43.63, | |
| "learning_rate": 7.639393829119701e-07, | |
| "loss": 0.19, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 43.74, | |
| "learning_rate": 7.368044137076435e-07, | |
| "loss": 0.1919, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 43.85, | |
| "learning_rate": 7.101416742942913e-07, | |
| "loss": 0.1897, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 43.97, | |
| "learning_rate": 6.839525239155675e-07, | |
| "loss": 0.199, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 44.08, | |
| "learning_rate": 6.582382976719703e-07, | |
| "loss": 0.2, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 44.19, | |
| "learning_rate": 6.330003064527679e-07, | |
| "loss": 0.1917, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 44.31, | |
| "learning_rate": 6.082398368691711e-07, | |
| "loss": 0.1804, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 44.42, | |
| "learning_rate": 5.839581511887515e-07, | |
| "loss": 0.1971, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 44.53, | |
| "learning_rate": 5.601564872710851e-07, | |
| "loss": 0.1895, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 44.65, | |
| "learning_rate": 5.36836058504645e-07, | |
| "loss": 0.2008, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 44.76, | |
| "learning_rate": 5.13998053744954e-07, | |
| "loss": 0.1871, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 44.87, | |
| "learning_rate": 4.916436372539668e-07, | |
| "loss": 0.1899, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 44.99, | |
| "learning_rate": 4.697739486407227e-07, | |
| "loss": 0.1917, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 45.1, | |
| "learning_rate": 4.4839010280325003e-07, | |
| "loss": 0.1976, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 45.21, | |
| "learning_rate": 4.2749318987172385e-07, | |
| "loss": 0.1915, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 45.33, | |
| "learning_rate": 4.070842751529025e-07, | |
| "loss": 0.2004, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 45.44, | |
| "learning_rate": 3.871643990758056e-07, | |
| "loss": 0.1947, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 45.55, | |
| "learning_rate": 3.6773457713868423e-07, | |
| "loss": 0.1793, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 45.67, | |
| "learning_rate": 3.487957998572511e-07, | |
| "loss": 0.1882, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 45.78, | |
| "learning_rate": 3.3034903271417564e-07, | |
| "loss": 0.1862, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 45.89, | |
| "learning_rate": 3.1239521610987757e-07, | |
| "loss": 0.1862, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 46.01, | |
| "learning_rate": 2.949352653145754e-07, | |
| "loss": 0.1987, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 46.12, | |
| "learning_rate": 2.779700704216337e-07, | |
| "loss": 0.1837, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 46.23, | |
| "learning_rate": 2.6150049630218277e-07, | |
| "loss": 0.1949, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 46.35, | |
| "learning_rate": 2.4552738256102717e-07, | |
| "loss": 0.1892, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 46.46, | |
| "learning_rate": 2.3005154349385106e-07, | |
| "loss": 0.1959, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 46.57, | |
| "learning_rate": 2.1507376804569935e-07, | |
| "loss": 0.1925, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 46.69, | |
| "learning_rate": 2.0059481977075523e-07, | |
| "loss": 0.1933, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 46.8, | |
| "learning_rate": 1.8661543679342365e-07, | |
| "loss": 0.2049, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 46.91, | |
| "learning_rate": 1.731363317706969e-07, | |
| "loss": 0.1756, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 47.03, | |
| "learning_rate": 1.601581918558237e-07, | |
| "loss": 0.1746, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 47.14, | |
| "learning_rate": 1.4768167866328176e-07, | |
| "loss": 0.1963, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 47.25, | |
| "learning_rate": 1.357074282350457e-07, | |
| "loss": 0.1903, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 47.37, | |
| "learning_rate": 1.2423605100816304e-07, | |
| "loss": 0.1821, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 47.48, | |
| "learning_rate": 1.1326813178363927e-07, | |
| "loss": 0.1946, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 47.59, | |
| "learning_rate": 1.0280422969661696e-07, | |
| "loss": 0.1832, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 47.71, | |
| "learning_rate": 9.284487818787879e-08, | |
| "loss": 0.1892, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 47.82, | |
| "learning_rate": 8.33905849766481e-08, | |
| "loss": 0.1883, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 47.93, | |
| "learning_rate": 7.444183203471067e-08, | |
| "loss": 0.1921, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 48.05, | |
| "learning_rate": 6.599907556184115e-08, | |
| "loss": 0.2003, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 48.16, | |
| "learning_rate": 5.806274596254491e-08, | |
| "loss": 0.1789, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 48.27, | |
| "learning_rate": 5.0633247824118936e-08, | |
| "loss": 0.1853, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 48.39, | |
| "learning_rate": 4.371095989602614e-08, | |
| "loss": 0.1968, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 48.5, | |
| "learning_rate": 3.729623507058744e-08, | |
| "loss": 0.2019, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 48.61, | |
| "learning_rate": 3.13894003649895e-08, | |
| "loss": 0.1808, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 48.73, | |
| "learning_rate": 2.5990756904614723e-08, | |
| "loss": 0.1959, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 48.84, | |
| "learning_rate": 2.1100579907691322e-08, | |
| "loss": 0.1781, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 48.95, | |
| "learning_rate": 1.6719118671262302e-08, | |
| "loss": 0.1868, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 49.07, | |
| "learning_rate": 1.2846596558473424e-08, | |
| "loss": 0.1887, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 49.18, | |
| "learning_rate": 9.483210987193404e-09, | |
| "loss": 0.1982, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 49.29, | |
| "learning_rate": 6.62913341994087e-09, | |
| "loss": 0.1957, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 49.41, | |
| "learning_rate": 4.284509355151345e-09, | |
| "loss": 0.1925, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 49.52, | |
| "learning_rate": 2.449458319754294e-09, | |
| "loss": 0.1859, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 49.63, | |
| "learning_rate": 1.124073863082442e-09, | |
| "loss": 0.187, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 49.75, | |
| "learning_rate": 3.0842355210336515e-10, | |
| "loss": 0.1792, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 49.86, | |
| "learning_rate": 2.548967970028571e-12, | |
| "loss": 0.1895, | |
| "step": 4400 | |
| } | |
| ], | |
| "max_steps": 4400, | |
| "num_train_epochs": 50, | |
| "total_flos": 6.496431653421466e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |