chaojiang06's picture
Upload 4 files
0bb311b verified
{
"best_metric": 0.8428992315899235,
"best_model_checkpoint": "/nethome/cjiang95/share6/research_18_medical_cwi/src/readability_score/outputs_v2/roberta-large+cwi.py+512+8+1e-5+1/checkpoint-969",
"epoch": 10.0,
"eval_steps": 500,
"global_step": 3230,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 9.969040247678019e-06,
"loss": 12.2304,
"step": 10
},
{
"epoch": 0.06,
"learning_rate": 9.938080495356039e-06,
"loss": 3.5674,
"step": 20
},
{
"epoch": 0.09,
"learning_rate": 9.907120743034056e-06,
"loss": 1.7238,
"step": 30
},
{
"epoch": 0.12,
"learning_rate": 9.876160990712074e-06,
"loss": 0.6595,
"step": 40
},
{
"epoch": 0.15,
"learning_rate": 9.845201238390094e-06,
"loss": 0.581,
"step": 50
},
{
"epoch": 0.19,
"learning_rate": 9.814241486068112e-06,
"loss": 0.6014,
"step": 60
},
{
"epoch": 0.22,
"learning_rate": 9.78328173374613e-06,
"loss": 0.5123,
"step": 70
},
{
"epoch": 0.25,
"learning_rate": 9.75232198142415e-06,
"loss": 0.5812,
"step": 80
},
{
"epoch": 0.28,
"learning_rate": 9.721362229102167e-06,
"loss": 0.2399,
"step": 90
},
{
"epoch": 0.31,
"learning_rate": 9.690402476780186e-06,
"loss": 0.3053,
"step": 100
},
{
"epoch": 0.34,
"learning_rate": 9.659442724458206e-06,
"loss": 0.3091,
"step": 110
},
{
"epoch": 0.37,
"learning_rate": 9.628482972136224e-06,
"loss": 0.2249,
"step": 120
},
{
"epoch": 0.4,
"learning_rate": 9.597523219814242e-06,
"loss": 0.3502,
"step": 130
},
{
"epoch": 0.43,
"learning_rate": 9.56656346749226e-06,
"loss": 0.2606,
"step": 140
},
{
"epoch": 0.46,
"learning_rate": 9.535603715170279e-06,
"loss": 0.2831,
"step": 150
},
{
"epoch": 0.5,
"learning_rate": 9.504643962848297e-06,
"loss": 0.3918,
"step": 160
},
{
"epoch": 0.53,
"learning_rate": 9.473684210526315e-06,
"loss": 0.3063,
"step": 170
},
{
"epoch": 0.56,
"learning_rate": 9.442724458204335e-06,
"loss": 0.2705,
"step": 180
},
{
"epoch": 0.59,
"learning_rate": 9.411764705882354e-06,
"loss": 0.3898,
"step": 190
},
{
"epoch": 0.62,
"learning_rate": 9.380804953560372e-06,
"loss": 0.3299,
"step": 200
},
{
"epoch": 0.65,
"learning_rate": 9.34984520123839e-06,
"loss": 0.3137,
"step": 210
},
{
"epoch": 0.68,
"learning_rate": 9.318885448916409e-06,
"loss": 0.2415,
"step": 220
},
{
"epoch": 0.71,
"learning_rate": 9.287925696594427e-06,
"loss": 0.2178,
"step": 230
},
{
"epoch": 0.74,
"learning_rate": 9.256965944272447e-06,
"loss": 0.2553,
"step": 240
},
{
"epoch": 0.77,
"learning_rate": 9.226006191950465e-06,
"loss": 0.309,
"step": 250
},
{
"epoch": 0.8,
"learning_rate": 9.195046439628484e-06,
"loss": 0.2224,
"step": 260
},
{
"epoch": 0.84,
"learning_rate": 9.164086687306502e-06,
"loss": 0.1824,
"step": 270
},
{
"epoch": 0.87,
"learning_rate": 9.13312693498452e-06,
"loss": 0.3109,
"step": 280
},
{
"epoch": 0.9,
"learning_rate": 9.102167182662539e-06,
"loss": 0.1844,
"step": 290
},
{
"epoch": 0.93,
"learning_rate": 9.071207430340559e-06,
"loss": 0.209,
"step": 300
},
{
"epoch": 0.96,
"learning_rate": 9.040247678018577e-06,
"loss": 0.2062,
"step": 310
},
{
"epoch": 0.99,
"learning_rate": 9.009287925696595e-06,
"loss": 0.3654,
"step": 320
},
{
"epoch": 1.0,
"eval_addition_pearsonr": 0.8162883662605274,
"eval_addition_pearsonr_pvalue": 1.4513372865753766e-188,
"eval_addition_spearmanr": 0.8075565818829462,
"eval_addition_spearmanr_pvalue": 1.7106471374756087e-181,
"eval_loss": 0.6905816793441772,
"eval_pearsonr": 0.8162883662605274,
"eval_runtime": 20.8379,
"eval_samples_per_second": 37.624,
"eval_steps_per_second": 4.703,
"step": 323
},
{
"epoch": 1.02,
"learning_rate": 8.978328173374614e-06,
"loss": 0.3027,
"step": 330
},
{
"epoch": 1.05,
"learning_rate": 8.947368421052632e-06,
"loss": 0.2079,
"step": 340
},
{
"epoch": 1.08,
"learning_rate": 8.91640866873065e-06,
"loss": 0.1931,
"step": 350
},
{
"epoch": 1.11,
"learning_rate": 8.88544891640867e-06,
"loss": 0.2685,
"step": 360
},
{
"epoch": 1.15,
"learning_rate": 8.854489164086688e-06,
"loss": 0.1325,
"step": 370
},
{
"epoch": 1.18,
"learning_rate": 8.823529411764707e-06,
"loss": 0.2067,
"step": 380
},
{
"epoch": 1.21,
"learning_rate": 8.792569659442725e-06,
"loss": 0.2614,
"step": 390
},
{
"epoch": 1.24,
"learning_rate": 8.761609907120743e-06,
"loss": 0.2262,
"step": 400
},
{
"epoch": 1.27,
"learning_rate": 8.730650154798762e-06,
"loss": 0.1988,
"step": 410
},
{
"epoch": 1.3,
"learning_rate": 8.699690402476782e-06,
"loss": 0.3304,
"step": 420
},
{
"epoch": 1.33,
"learning_rate": 8.6687306501548e-06,
"loss": 0.1908,
"step": 430
},
{
"epoch": 1.36,
"learning_rate": 8.637770897832818e-06,
"loss": 0.3833,
"step": 440
},
{
"epoch": 1.39,
"learning_rate": 8.606811145510837e-06,
"loss": 0.2676,
"step": 450
},
{
"epoch": 1.42,
"learning_rate": 8.575851393188855e-06,
"loss": 0.2567,
"step": 460
},
{
"epoch": 1.46,
"learning_rate": 8.544891640866873e-06,
"loss": 0.1856,
"step": 470
},
{
"epoch": 1.49,
"learning_rate": 8.513931888544892e-06,
"loss": 0.2137,
"step": 480
},
{
"epoch": 1.52,
"learning_rate": 8.482972136222912e-06,
"loss": 0.2722,
"step": 490
},
{
"epoch": 1.55,
"learning_rate": 8.45201238390093e-06,
"loss": 0.5435,
"step": 500
},
{
"epoch": 1.58,
"learning_rate": 8.421052631578948e-06,
"loss": 0.2294,
"step": 510
},
{
"epoch": 1.61,
"learning_rate": 8.390092879256967e-06,
"loss": 0.1798,
"step": 520
},
{
"epoch": 1.64,
"learning_rate": 8.359133126934985e-06,
"loss": 0.1719,
"step": 530
},
{
"epoch": 1.67,
"learning_rate": 8.328173374613003e-06,
"loss": 0.155,
"step": 540
},
{
"epoch": 1.7,
"learning_rate": 8.297213622291023e-06,
"loss": 0.2538,
"step": 550
},
{
"epoch": 1.73,
"learning_rate": 8.266253869969041e-06,
"loss": 0.1764,
"step": 560
},
{
"epoch": 1.76,
"learning_rate": 8.23529411764706e-06,
"loss": 0.1925,
"step": 570
},
{
"epoch": 1.8,
"learning_rate": 8.204334365325078e-06,
"loss": 0.2219,
"step": 580
},
{
"epoch": 1.83,
"learning_rate": 8.173374613003096e-06,
"loss": 0.165,
"step": 590
},
{
"epoch": 1.86,
"learning_rate": 8.142414860681115e-06,
"loss": 0.2247,
"step": 600
},
{
"epoch": 1.89,
"learning_rate": 8.111455108359135e-06,
"loss": 0.1937,
"step": 610
},
{
"epoch": 1.92,
"learning_rate": 8.080495356037153e-06,
"loss": 0.2156,
"step": 620
},
{
"epoch": 1.95,
"learning_rate": 8.049535603715171e-06,
"loss": 0.1989,
"step": 630
},
{
"epoch": 1.98,
"learning_rate": 8.01857585139319e-06,
"loss": 0.1951,
"step": 640
},
{
"epoch": 2.0,
"eval_addition_pearsonr": 0.833471093139616,
"eval_addition_pearsonr_pvalue": 1.1930818786250392e-203,
"eval_addition_spearmanr": 0.8310151262215819,
"eval_addition_spearmanr_pvalue": 2.1696450750894246e-201,
"eval_loss": 0.2739636301994324,
"eval_pearsonr": 0.833471093139616,
"eval_runtime": 21.1548,
"eval_samples_per_second": 37.06,
"eval_steps_per_second": 4.633,
"step": 646
},
{
"epoch": 2.01,
"learning_rate": 7.987616099071208e-06,
"loss": 0.1673,
"step": 650
},
{
"epoch": 2.04,
"learning_rate": 7.956656346749226e-06,
"loss": 0.1734,
"step": 660
},
{
"epoch": 2.07,
"learning_rate": 7.925696594427246e-06,
"loss": 0.1418,
"step": 670
},
{
"epoch": 2.11,
"learning_rate": 7.894736842105265e-06,
"loss": 0.2072,
"step": 680
},
{
"epoch": 2.14,
"learning_rate": 7.863777089783283e-06,
"loss": 0.1734,
"step": 690
},
{
"epoch": 2.17,
"learning_rate": 7.832817337461301e-06,
"loss": 0.1367,
"step": 700
},
{
"epoch": 2.2,
"learning_rate": 7.80185758513932e-06,
"loss": 0.2328,
"step": 710
},
{
"epoch": 2.23,
"learning_rate": 7.770897832817338e-06,
"loss": 0.184,
"step": 720
},
{
"epoch": 2.26,
"learning_rate": 7.739938080495358e-06,
"loss": 0.1618,
"step": 730
},
{
"epoch": 2.29,
"learning_rate": 7.708978328173376e-06,
"loss": 0.1844,
"step": 740
},
{
"epoch": 2.32,
"learning_rate": 7.678018575851394e-06,
"loss": 0.1344,
"step": 750
},
{
"epoch": 2.35,
"learning_rate": 7.647058823529411e-06,
"loss": 0.157,
"step": 760
},
{
"epoch": 2.38,
"learning_rate": 7.616099071207431e-06,
"loss": 0.1606,
"step": 770
},
{
"epoch": 2.41,
"learning_rate": 7.585139318885449e-06,
"loss": 0.171,
"step": 780
},
{
"epoch": 2.45,
"learning_rate": 7.554179566563468e-06,
"loss": 0.1466,
"step": 790
},
{
"epoch": 2.48,
"learning_rate": 7.523219814241487e-06,
"loss": 0.1604,
"step": 800
},
{
"epoch": 2.51,
"learning_rate": 7.492260061919505e-06,
"loss": 0.2169,
"step": 810
},
{
"epoch": 2.54,
"learning_rate": 7.4613003095975235e-06,
"loss": 0.1331,
"step": 820
},
{
"epoch": 2.57,
"learning_rate": 7.430340557275543e-06,
"loss": 0.2256,
"step": 830
},
{
"epoch": 2.6,
"learning_rate": 7.399380804953561e-06,
"loss": 0.1618,
"step": 840
},
{
"epoch": 2.63,
"learning_rate": 7.368421052631579e-06,
"loss": 0.1899,
"step": 850
},
{
"epoch": 2.66,
"learning_rate": 7.3374613003095984e-06,
"loss": 0.1661,
"step": 860
},
{
"epoch": 2.69,
"learning_rate": 7.306501547987617e-06,
"loss": 0.2316,
"step": 870
},
{
"epoch": 2.72,
"learning_rate": 7.275541795665635e-06,
"loss": 0.2487,
"step": 880
},
{
"epoch": 2.76,
"learning_rate": 7.244582043343654e-06,
"loss": 0.3622,
"step": 890
},
{
"epoch": 2.79,
"learning_rate": 7.2136222910216725e-06,
"loss": 0.2141,
"step": 900
},
{
"epoch": 2.82,
"learning_rate": 7.182662538699691e-06,
"loss": 0.1235,
"step": 910
},
{
"epoch": 2.85,
"learning_rate": 7.15170278637771e-06,
"loss": 0.2302,
"step": 920
},
{
"epoch": 2.88,
"learning_rate": 7.120743034055728e-06,
"loss": 0.1333,
"step": 930
},
{
"epoch": 2.91,
"learning_rate": 7.089783281733747e-06,
"loss": 0.1173,
"step": 940
},
{
"epoch": 2.94,
"learning_rate": 7.058823529411766e-06,
"loss": 0.1425,
"step": 950
},
{
"epoch": 2.97,
"learning_rate": 7.027863777089784e-06,
"loss": 0.2671,
"step": 960
},
{
"epoch": 3.0,
"eval_addition_pearsonr": 0.8428992315899235,
"eval_addition_pearsonr_pvalue": 1.1118263127325887e-212,
"eval_addition_spearmanr": 0.8297168645213742,
"eval_addition_spearmanr_pvalue": 3.2826876940025893e-200,
"eval_loss": 0.2137002795934677,
"eval_pearsonr": 0.8428992315899235,
"eval_runtime": 22.8904,
"eval_samples_per_second": 34.25,
"eval_steps_per_second": 4.281,
"step": 969
},
{
"epoch": 3.0,
"learning_rate": 6.996904024767802e-06,
"loss": 0.1432,
"step": 970
},
{
"epoch": 3.03,
"learning_rate": 6.9659442724458216e-06,
"loss": 0.1243,
"step": 980
},
{
"epoch": 3.07,
"learning_rate": 6.93498452012384e-06,
"loss": 0.1287,
"step": 990
},
{
"epoch": 3.1,
"learning_rate": 6.904024767801858e-06,
"loss": 0.1692,
"step": 1000
},
{
"epoch": 3.13,
"learning_rate": 6.873065015479877e-06,
"loss": 0.1892,
"step": 1010
},
{
"epoch": 3.16,
"learning_rate": 6.842105263157896e-06,
"loss": 0.1924,
"step": 1020
},
{
"epoch": 3.19,
"learning_rate": 6.811145510835914e-06,
"loss": 0.1521,
"step": 1030
},
{
"epoch": 3.22,
"learning_rate": 6.780185758513932e-06,
"loss": 0.1356,
"step": 1040
},
{
"epoch": 3.25,
"learning_rate": 6.7492260061919514e-06,
"loss": 0.0998,
"step": 1050
},
{
"epoch": 3.28,
"learning_rate": 6.71826625386997e-06,
"loss": 0.0911,
"step": 1060
},
{
"epoch": 3.31,
"learning_rate": 6.687306501547988e-06,
"loss": 0.1198,
"step": 1070
},
{
"epoch": 3.34,
"learning_rate": 6.656346749226007e-06,
"loss": 0.1085,
"step": 1080
},
{
"epoch": 3.37,
"learning_rate": 6.6253869969040255e-06,
"loss": 0.1267,
"step": 1090
},
{
"epoch": 3.41,
"learning_rate": 6.594427244582044e-06,
"loss": 0.2094,
"step": 1100
},
{
"epoch": 3.44,
"learning_rate": 6.563467492260063e-06,
"loss": 0.1466,
"step": 1110
},
{
"epoch": 3.47,
"learning_rate": 6.532507739938081e-06,
"loss": 0.1655,
"step": 1120
},
{
"epoch": 3.5,
"learning_rate": 6.501547987616099e-06,
"loss": 0.1336,
"step": 1130
},
{
"epoch": 3.53,
"learning_rate": 6.470588235294119e-06,
"loss": 0.1072,
"step": 1140
},
{
"epoch": 3.56,
"learning_rate": 6.439628482972137e-06,
"loss": 0.1634,
"step": 1150
},
{
"epoch": 3.59,
"learning_rate": 6.4086687306501545e-06,
"loss": 0.145,
"step": 1160
},
{
"epoch": 3.62,
"learning_rate": 6.3777089783281746e-06,
"loss": 0.164,
"step": 1170
},
{
"epoch": 3.65,
"learning_rate": 6.346749226006193e-06,
"loss": 0.1984,
"step": 1180
},
{
"epoch": 3.68,
"learning_rate": 6.31578947368421e-06,
"loss": 0.1056,
"step": 1190
},
{
"epoch": 3.72,
"learning_rate": 6.28482972136223e-06,
"loss": 0.107,
"step": 1200
},
{
"epoch": 3.75,
"learning_rate": 6.253869969040249e-06,
"loss": 0.1933,
"step": 1210
},
{
"epoch": 3.78,
"learning_rate": 6.222910216718266e-06,
"loss": 0.1582,
"step": 1220
},
{
"epoch": 3.81,
"learning_rate": 6.191950464396286e-06,
"loss": 0.151,
"step": 1230
},
{
"epoch": 3.84,
"learning_rate": 6.160990712074304e-06,
"loss": 0.1314,
"step": 1240
},
{
"epoch": 3.87,
"learning_rate": 6.130030959752322e-06,
"loss": 0.1297,
"step": 1250
},
{
"epoch": 3.9,
"learning_rate": 6.099071207430342e-06,
"loss": 0.1857,
"step": 1260
},
{
"epoch": 3.93,
"learning_rate": 6.068111455108359e-06,
"loss": 0.1366,
"step": 1270
},
{
"epoch": 3.96,
"learning_rate": 6.037151702786378e-06,
"loss": 0.1637,
"step": 1280
},
{
"epoch": 3.99,
"learning_rate": 6.006191950464398e-06,
"loss": 0.1559,
"step": 1290
},
{
"epoch": 4.0,
"eval_addition_pearsonr": 0.8238007807218948,
"eval_addition_pearsonr_pvalue": 5.872623507428365e-195,
"eval_addition_spearmanr": 0.8262514029951532,
"eval_addition_spearmanr_pvalue": 4.142278748148408e-197,
"eval_loss": 0.24727971851825714,
"eval_pearsonr": 0.8238007807218948,
"eval_runtime": 20.8571,
"eval_samples_per_second": 37.589,
"eval_steps_per_second": 4.699,
"step": 1292
},
{
"epoch": 4.02,
"learning_rate": 5.975232198142415e-06,
"loss": 0.1501,
"step": 1300
},
{
"epoch": 4.06,
"learning_rate": 5.9442724458204335e-06,
"loss": 0.1392,
"step": 1310
},
{
"epoch": 4.09,
"learning_rate": 5.9133126934984535e-06,
"loss": 0.099,
"step": 1320
},
{
"epoch": 4.12,
"learning_rate": 5.882352941176471e-06,
"loss": 0.122,
"step": 1330
},
{
"epoch": 4.15,
"learning_rate": 5.851393188854489e-06,
"loss": 0.1082,
"step": 1340
},
{
"epoch": 4.18,
"learning_rate": 5.8204334365325075e-06,
"loss": 0.1452,
"step": 1350
},
{
"epoch": 4.21,
"learning_rate": 5.789473684210527e-06,
"loss": 0.1209,
"step": 1360
},
{
"epoch": 4.24,
"learning_rate": 5.758513931888545e-06,
"loss": 0.1224,
"step": 1370
},
{
"epoch": 4.27,
"learning_rate": 5.727554179566563e-06,
"loss": 0.1225,
"step": 1380
},
{
"epoch": 4.3,
"learning_rate": 5.6965944272445825e-06,
"loss": 0.1417,
"step": 1390
},
{
"epoch": 4.33,
"learning_rate": 5.665634674922601e-06,
"loss": 0.098,
"step": 1400
},
{
"epoch": 4.37,
"learning_rate": 5.634674922600619e-06,
"loss": 0.1344,
"step": 1410
},
{
"epoch": 4.4,
"learning_rate": 5.603715170278638e-06,
"loss": 0.1188,
"step": 1420
},
{
"epoch": 4.43,
"learning_rate": 5.5727554179566566e-06,
"loss": 0.1011,
"step": 1430
},
{
"epoch": 4.46,
"learning_rate": 5.541795665634675e-06,
"loss": 0.1213,
"step": 1440
},
{
"epoch": 4.49,
"learning_rate": 5.510835913312694e-06,
"loss": 0.0964,
"step": 1450
},
{
"epoch": 4.52,
"learning_rate": 5.479876160990712e-06,
"loss": 0.1074,
"step": 1460
},
{
"epoch": 4.55,
"learning_rate": 5.448916408668731e-06,
"loss": 0.1506,
"step": 1470
},
{
"epoch": 4.58,
"learning_rate": 5.41795665634675e-06,
"loss": 0.1183,
"step": 1480
},
{
"epoch": 4.61,
"learning_rate": 5.386996904024768e-06,
"loss": 0.1251,
"step": 1490
},
{
"epoch": 4.64,
"learning_rate": 5.3560371517027864e-06,
"loss": 0.1216,
"step": 1500
},
{
"epoch": 4.67,
"learning_rate": 5.325077399380806e-06,
"loss": 0.1638,
"step": 1510
},
{
"epoch": 4.71,
"learning_rate": 5.294117647058824e-06,
"loss": 0.1159,
"step": 1520
},
{
"epoch": 4.74,
"learning_rate": 5.263157894736842e-06,
"loss": 0.1338,
"step": 1530
},
{
"epoch": 4.77,
"learning_rate": 5.232198142414861e-06,
"loss": 0.1058,
"step": 1540
},
{
"epoch": 4.8,
"learning_rate": 5.20123839009288e-06,
"loss": 0.1275,
"step": 1550
},
{
"epoch": 4.83,
"learning_rate": 5.170278637770898e-06,
"loss": 0.1337,
"step": 1560
},
{
"epoch": 4.86,
"learning_rate": 5.139318885448917e-06,
"loss": 0.1649,
"step": 1570
},
{
"epoch": 4.89,
"learning_rate": 5.1083591331269355e-06,
"loss": 0.1279,
"step": 1580
},
{
"epoch": 4.92,
"learning_rate": 5.077399380804954e-06,
"loss": 0.1606,
"step": 1590
},
{
"epoch": 4.95,
"learning_rate": 5.046439628482973e-06,
"loss": 0.1132,
"step": 1600
},
{
"epoch": 4.98,
"learning_rate": 5.015479876160991e-06,
"loss": 0.1334,
"step": 1610
},
{
"epoch": 5.0,
"eval_addition_pearsonr": 0.8426868365133882,
"eval_addition_pearsonr_pvalue": 1.8029995412035784e-212,
"eval_addition_spearmanr": 0.8288625510728332,
"eval_addition_spearmanr_pvalue": 1.9372096694580484e-199,
"eval_loss": 0.2357955276966095,
"eval_pearsonr": 0.8426868365133882,
"eval_runtime": 23.3047,
"eval_samples_per_second": 33.641,
"eval_steps_per_second": 4.205,
"step": 1615
},
{
"epoch": 5.02,
"learning_rate": 4.9845201238390096e-06,
"loss": 0.0962,
"step": 1620
},
{
"epoch": 5.05,
"learning_rate": 4.953560371517028e-06,
"loss": 0.1327,
"step": 1630
},
{
"epoch": 5.08,
"learning_rate": 4.922600619195047e-06,
"loss": 0.0572,
"step": 1640
},
{
"epoch": 5.11,
"learning_rate": 4.891640866873065e-06,
"loss": 0.1291,
"step": 1650
},
{
"epoch": 5.14,
"learning_rate": 4.860681114551084e-06,
"loss": 0.1007,
"step": 1660
},
{
"epoch": 5.17,
"learning_rate": 4.829721362229103e-06,
"loss": 0.1078,
"step": 1670
},
{
"epoch": 5.2,
"learning_rate": 4.798761609907121e-06,
"loss": 0.1001,
"step": 1680
},
{
"epoch": 5.23,
"learning_rate": 4.7678018575851394e-06,
"loss": 0.0935,
"step": 1690
},
{
"epoch": 5.26,
"learning_rate": 4.736842105263158e-06,
"loss": 0.1132,
"step": 1700
},
{
"epoch": 5.29,
"learning_rate": 4.705882352941177e-06,
"loss": 0.1028,
"step": 1710
},
{
"epoch": 5.33,
"learning_rate": 4.674922600619195e-06,
"loss": 0.1326,
"step": 1720
},
{
"epoch": 5.36,
"learning_rate": 4.6439628482972135e-06,
"loss": 0.0907,
"step": 1730
},
{
"epoch": 5.39,
"learning_rate": 4.613003095975233e-06,
"loss": 0.1104,
"step": 1740
},
{
"epoch": 5.42,
"learning_rate": 4.582043343653251e-06,
"loss": 0.1247,
"step": 1750
},
{
"epoch": 5.45,
"learning_rate": 4.551083591331269e-06,
"loss": 0.0997,
"step": 1760
},
{
"epoch": 5.48,
"learning_rate": 4.5201238390092885e-06,
"loss": 0.0687,
"step": 1770
},
{
"epoch": 5.51,
"learning_rate": 4.489164086687307e-06,
"loss": 0.0658,
"step": 1780
},
{
"epoch": 5.54,
"learning_rate": 4.458204334365325e-06,
"loss": 0.125,
"step": 1790
},
{
"epoch": 5.57,
"learning_rate": 4.427244582043344e-06,
"loss": 0.0712,
"step": 1800
},
{
"epoch": 5.6,
"learning_rate": 4.3962848297213626e-06,
"loss": 0.0794,
"step": 1810
},
{
"epoch": 5.63,
"learning_rate": 4.365325077399381e-06,
"loss": 0.0714,
"step": 1820
},
{
"epoch": 5.67,
"learning_rate": 4.3343653250774e-06,
"loss": 0.082,
"step": 1830
},
{
"epoch": 5.7,
"learning_rate": 4.303405572755418e-06,
"loss": 0.0809,
"step": 1840
},
{
"epoch": 5.73,
"learning_rate": 4.272445820433437e-06,
"loss": 0.086,
"step": 1850
},
{
"epoch": 5.76,
"learning_rate": 4.241486068111456e-06,
"loss": 0.1231,
"step": 1860
},
{
"epoch": 5.79,
"learning_rate": 4.210526315789474e-06,
"loss": 0.0904,
"step": 1870
},
{
"epoch": 5.82,
"learning_rate": 4.1795665634674924e-06,
"loss": 0.1097,
"step": 1880
},
{
"epoch": 5.85,
"learning_rate": 4.148606811145512e-06,
"loss": 0.0966,
"step": 1890
},
{
"epoch": 5.88,
"learning_rate": 4.11764705882353e-06,
"loss": 0.1001,
"step": 1900
},
{
"epoch": 5.91,
"learning_rate": 4.086687306501548e-06,
"loss": 0.0864,
"step": 1910
},
{
"epoch": 5.94,
"learning_rate": 4.055727554179567e-06,
"loss": 0.1117,
"step": 1920
},
{
"epoch": 5.98,
"learning_rate": 4.024767801857586e-06,
"loss": 0.07,
"step": 1930
},
{
"epoch": 6.0,
"eval_addition_pearsonr": 0.8384818214625009,
"eval_addition_pearsonr_pvalue": 2.2368823862257995e-208,
"eval_addition_spearmanr": 0.8310922211257242,
"eval_addition_spearmanr_pvalue": 1.8450561477329855e-201,
"eval_loss": 0.24519018828868866,
"eval_pearsonr": 0.8384818214625009,
"eval_runtime": 21.145,
"eval_samples_per_second": 37.077,
"eval_steps_per_second": 4.635,
"step": 1938
},
{
"epoch": 6.01,
"learning_rate": 3.993808049535604e-06,
"loss": 0.0935,
"step": 1940
},
{
"epoch": 6.04,
"learning_rate": 3.962848297213623e-06,
"loss": 0.0913,
"step": 1950
},
{
"epoch": 6.07,
"learning_rate": 3.9318885448916415e-06,
"loss": 0.0645,
"step": 1960
},
{
"epoch": 6.1,
"learning_rate": 3.90092879256966e-06,
"loss": 0.0926,
"step": 1970
},
{
"epoch": 6.13,
"learning_rate": 3.869969040247679e-06,
"loss": 0.095,
"step": 1980
},
{
"epoch": 6.16,
"learning_rate": 3.839009287925697e-06,
"loss": 0.1012,
"step": 1990
},
{
"epoch": 6.19,
"learning_rate": 3.8080495356037155e-06,
"loss": 0.0832,
"step": 2000
},
{
"epoch": 6.22,
"learning_rate": 3.777089783281734e-06,
"loss": 0.064,
"step": 2010
},
{
"epoch": 6.25,
"learning_rate": 3.7461300309597526e-06,
"loss": 0.0726,
"step": 2020
},
{
"epoch": 6.28,
"learning_rate": 3.7151702786377713e-06,
"loss": 0.0863,
"step": 2030
},
{
"epoch": 6.32,
"learning_rate": 3.6842105263157896e-06,
"loss": 0.0743,
"step": 2040
},
{
"epoch": 6.35,
"learning_rate": 3.6532507739938084e-06,
"loss": 0.1048,
"step": 2050
},
{
"epoch": 6.38,
"learning_rate": 3.622291021671827e-06,
"loss": 0.0879,
"step": 2060
},
{
"epoch": 6.41,
"learning_rate": 3.5913312693498454e-06,
"loss": 0.0938,
"step": 2070
},
{
"epoch": 6.44,
"learning_rate": 3.560371517027864e-06,
"loss": 0.1054,
"step": 2080
},
{
"epoch": 6.47,
"learning_rate": 3.529411764705883e-06,
"loss": 0.0713,
"step": 2090
},
{
"epoch": 6.5,
"learning_rate": 3.498452012383901e-06,
"loss": 0.0674,
"step": 2100
},
{
"epoch": 6.53,
"learning_rate": 3.46749226006192e-06,
"loss": 0.0778,
"step": 2110
},
{
"epoch": 6.56,
"learning_rate": 3.4365325077399387e-06,
"loss": 0.0838,
"step": 2120
},
{
"epoch": 6.59,
"learning_rate": 3.405572755417957e-06,
"loss": 0.0868,
"step": 2130
},
{
"epoch": 6.63,
"learning_rate": 3.3746130030959757e-06,
"loss": 0.0998,
"step": 2140
},
{
"epoch": 6.66,
"learning_rate": 3.343653250773994e-06,
"loss": 0.1182,
"step": 2150
},
{
"epoch": 6.69,
"learning_rate": 3.3126934984520128e-06,
"loss": 0.0578,
"step": 2160
},
{
"epoch": 6.72,
"learning_rate": 3.2817337461300315e-06,
"loss": 0.0949,
"step": 2170
},
{
"epoch": 6.75,
"learning_rate": 3.2507739938080494e-06,
"loss": 0.0653,
"step": 2180
},
{
"epoch": 6.78,
"learning_rate": 3.2198142414860685e-06,
"loss": 0.083,
"step": 2190
},
{
"epoch": 6.81,
"learning_rate": 3.1888544891640873e-06,
"loss": 0.087,
"step": 2200
},
{
"epoch": 6.84,
"learning_rate": 3.157894736842105e-06,
"loss": 0.0818,
"step": 2210
},
{
"epoch": 6.87,
"learning_rate": 3.1269349845201243e-06,
"loss": 0.0825,
"step": 2220
},
{
"epoch": 6.9,
"learning_rate": 3.095975232198143e-06,
"loss": 0.1078,
"step": 2230
},
{
"epoch": 6.93,
"learning_rate": 3.065015479876161e-06,
"loss": 0.0858,
"step": 2240
},
{
"epoch": 6.97,
"learning_rate": 3.0340557275541797e-06,
"loss": 0.081,
"step": 2250
},
{
"epoch": 7.0,
"learning_rate": 3.003095975232199e-06,
"loss": 0.1171,
"step": 2260
},
{
"epoch": 7.0,
"eval_addition_pearsonr": 0.8370610164555807,
"eval_addition_pearsonr_pvalue": 5.085114673351772e-207,
"eval_addition_spearmanr": 0.8274385995758836,
"eval_addition_spearmanr_pvalue": 3.6535030178809915e-198,
"eval_loss": 0.2490989863872528,
"eval_pearsonr": 0.8370610164555807,
"eval_runtime": 20.8544,
"eval_samples_per_second": 37.594,
"eval_steps_per_second": 4.699,
"step": 2261
},
{
"epoch": 7.03,
"learning_rate": 2.9721362229102167e-06,
"loss": 0.075,
"step": 2270
},
{
"epoch": 7.06,
"learning_rate": 2.9411764705882355e-06,
"loss": 0.0796,
"step": 2280
},
{
"epoch": 7.09,
"learning_rate": 2.9102167182662538e-06,
"loss": 0.0758,
"step": 2290
},
{
"epoch": 7.12,
"learning_rate": 2.8792569659442725e-06,
"loss": 0.086,
"step": 2300
},
{
"epoch": 7.15,
"learning_rate": 2.8482972136222912e-06,
"loss": 0.0815,
"step": 2310
},
{
"epoch": 7.18,
"learning_rate": 2.8173374613003096e-06,
"loss": 0.087,
"step": 2320
},
{
"epoch": 7.21,
"learning_rate": 2.7863777089783283e-06,
"loss": 0.0913,
"step": 2330
},
{
"epoch": 7.24,
"learning_rate": 2.755417956656347e-06,
"loss": 0.1225,
"step": 2340
},
{
"epoch": 7.28,
"learning_rate": 2.7244582043343653e-06,
"loss": 0.0743,
"step": 2350
},
{
"epoch": 7.31,
"learning_rate": 2.693498452012384e-06,
"loss": 0.0754,
"step": 2360
},
{
"epoch": 7.34,
"learning_rate": 2.662538699690403e-06,
"loss": 0.0696,
"step": 2370
},
{
"epoch": 7.37,
"learning_rate": 2.631578947368421e-06,
"loss": 0.0882,
"step": 2380
},
{
"epoch": 7.4,
"learning_rate": 2.60061919504644e-06,
"loss": 0.07,
"step": 2390
},
{
"epoch": 7.43,
"learning_rate": 2.5696594427244586e-06,
"loss": 0.0804,
"step": 2400
},
{
"epoch": 7.46,
"learning_rate": 2.538699690402477e-06,
"loss": 0.0786,
"step": 2410
},
{
"epoch": 7.49,
"learning_rate": 2.5077399380804956e-06,
"loss": 0.0743,
"step": 2420
},
{
"epoch": 7.52,
"learning_rate": 2.476780185758514e-06,
"loss": 0.1035,
"step": 2430
},
{
"epoch": 7.55,
"learning_rate": 2.4458204334365327e-06,
"loss": 0.1006,
"step": 2440
},
{
"epoch": 7.59,
"learning_rate": 2.4148606811145514e-06,
"loss": 0.0679,
"step": 2450
},
{
"epoch": 7.62,
"learning_rate": 2.3839009287925697e-06,
"loss": 0.0794,
"step": 2460
},
{
"epoch": 7.65,
"learning_rate": 2.3529411764705885e-06,
"loss": 0.071,
"step": 2470
},
{
"epoch": 7.68,
"learning_rate": 2.3219814241486068e-06,
"loss": 0.0644,
"step": 2480
},
{
"epoch": 7.71,
"learning_rate": 2.2910216718266255e-06,
"loss": 0.0935,
"step": 2490
},
{
"epoch": 7.74,
"learning_rate": 2.2600619195046442e-06,
"loss": 0.073,
"step": 2500
},
{
"epoch": 7.77,
"learning_rate": 2.2291021671826625e-06,
"loss": 0.0878,
"step": 2510
},
{
"epoch": 7.8,
"learning_rate": 2.1981424148606813e-06,
"loss": 0.0871,
"step": 2520
},
{
"epoch": 7.83,
"learning_rate": 2.1671826625387e-06,
"loss": 0.0777,
"step": 2530
},
{
"epoch": 7.86,
"learning_rate": 2.1362229102167183e-06,
"loss": 0.0741,
"step": 2540
},
{
"epoch": 7.89,
"learning_rate": 2.105263157894737e-06,
"loss": 0.0729,
"step": 2550
},
{
"epoch": 7.93,
"learning_rate": 2.074303405572756e-06,
"loss": 0.1155,
"step": 2560
},
{
"epoch": 7.96,
"learning_rate": 2.043343653250774e-06,
"loss": 0.0649,
"step": 2570
},
{
"epoch": 7.99,
"learning_rate": 2.012383900928793e-06,
"loss": 0.086,
"step": 2580
},
{
"epoch": 8.0,
"eval_addition_pearsonr": 0.8358219799874653,
"eval_addition_pearsonr_pvalue": 7.564266135601688e-206,
"eval_addition_spearmanr": 0.8310207088268229,
"eval_addition_spearmanr_pvalue": 2.1443397292362208e-201,
"eval_loss": 0.24417398869991302,
"eval_pearsonr": 0.8358219799874653,
"eval_runtime": 20.8538,
"eval_samples_per_second": 37.595,
"eval_steps_per_second": 4.699,
"step": 2584
},
{
"epoch": 8.02,
"learning_rate": 1.9814241486068116e-06,
"loss": 0.0876,
"step": 2590
},
{
"epoch": 8.05,
"learning_rate": 1.95046439628483e-06,
"loss": 0.0511,
"step": 2600
},
{
"epoch": 8.08,
"learning_rate": 1.9195046439628486e-06,
"loss": 0.06,
"step": 2610
},
{
"epoch": 8.11,
"learning_rate": 1.888544891640867e-06,
"loss": 0.0769,
"step": 2620
},
{
"epoch": 8.14,
"learning_rate": 1.8575851393188857e-06,
"loss": 0.0772,
"step": 2630
},
{
"epoch": 8.17,
"learning_rate": 1.8266253869969042e-06,
"loss": 0.0598,
"step": 2640
},
{
"epoch": 8.2,
"learning_rate": 1.7956656346749227e-06,
"loss": 0.0709,
"step": 2650
},
{
"epoch": 8.24,
"learning_rate": 1.7647058823529414e-06,
"loss": 0.0731,
"step": 2660
},
{
"epoch": 8.27,
"learning_rate": 1.73374613003096e-06,
"loss": 0.0684,
"step": 2670
},
{
"epoch": 8.3,
"learning_rate": 1.7027863777089785e-06,
"loss": 0.0637,
"step": 2680
},
{
"epoch": 8.33,
"learning_rate": 1.671826625386997e-06,
"loss": 0.0573,
"step": 2690
},
{
"epoch": 8.36,
"learning_rate": 1.6408668730650157e-06,
"loss": 0.082,
"step": 2700
},
{
"epoch": 8.39,
"learning_rate": 1.6099071207430343e-06,
"loss": 0.057,
"step": 2710
},
{
"epoch": 8.42,
"learning_rate": 1.5789473684210526e-06,
"loss": 0.0631,
"step": 2720
},
{
"epoch": 8.45,
"learning_rate": 1.5479876160990715e-06,
"loss": 0.0899,
"step": 2730
},
{
"epoch": 8.48,
"learning_rate": 1.5170278637770898e-06,
"loss": 0.0609,
"step": 2740
},
{
"epoch": 8.51,
"learning_rate": 1.4860681114551084e-06,
"loss": 0.072,
"step": 2750
},
{
"epoch": 8.54,
"learning_rate": 1.4551083591331269e-06,
"loss": 0.0612,
"step": 2760
},
{
"epoch": 8.58,
"learning_rate": 1.4241486068111456e-06,
"loss": 0.0733,
"step": 2770
},
{
"epoch": 8.61,
"learning_rate": 1.3931888544891641e-06,
"loss": 0.0652,
"step": 2780
},
{
"epoch": 8.64,
"learning_rate": 1.3622291021671827e-06,
"loss": 0.0731,
"step": 2790
},
{
"epoch": 8.67,
"learning_rate": 1.3312693498452014e-06,
"loss": 0.071,
"step": 2800
},
{
"epoch": 8.7,
"learning_rate": 1.30030959752322e-06,
"loss": 0.0672,
"step": 2810
},
{
"epoch": 8.73,
"learning_rate": 1.2693498452012384e-06,
"loss": 0.0633,
"step": 2820
},
{
"epoch": 8.76,
"learning_rate": 1.238390092879257e-06,
"loss": 0.0686,
"step": 2830
},
{
"epoch": 8.79,
"learning_rate": 1.2074303405572757e-06,
"loss": 0.1018,
"step": 2840
},
{
"epoch": 8.82,
"learning_rate": 1.1764705882352942e-06,
"loss": 0.0691,
"step": 2850
},
{
"epoch": 8.85,
"learning_rate": 1.1455108359133127e-06,
"loss": 0.07,
"step": 2860
},
{
"epoch": 8.89,
"learning_rate": 1.1145510835913313e-06,
"loss": 0.0861,
"step": 2870
},
{
"epoch": 8.92,
"learning_rate": 1.08359133126935e-06,
"loss": 0.0779,
"step": 2880
},
{
"epoch": 8.95,
"learning_rate": 1.0526315789473685e-06,
"loss": 0.0762,
"step": 2890
},
{
"epoch": 8.98,
"learning_rate": 1.021671826625387e-06,
"loss": 0.0696,
"step": 2900
},
{
"epoch": 9.0,
"eval_addition_pearsonr": 0.832331050711504,
"eval_addition_pearsonr_pvalue": 1.3495245460645167e-202,
"eval_addition_spearmanr": 0.8260737087952287,
"eval_addition_spearmanr_pvalue": 5.94820182819766e-197,
"eval_loss": 0.24034035205841064,
"eval_pearsonr": 0.832331050711504,
"eval_runtime": 20.8858,
"eval_samples_per_second": 37.538,
"eval_steps_per_second": 4.692,
"step": 2907
},
{
"epoch": 9.01,
"learning_rate": 9.907120743034058e-07,
"loss": 0.0403,
"step": 2910
},
{
"epoch": 9.04,
"learning_rate": 9.597523219814243e-07,
"loss": 0.0519,
"step": 2920
},
{
"epoch": 9.07,
"learning_rate": 9.287925696594428e-07,
"loss": 0.046,
"step": 2930
},
{
"epoch": 9.1,
"learning_rate": 8.978328173374614e-07,
"loss": 0.0656,
"step": 2940
},
{
"epoch": 9.13,
"learning_rate": 8.6687306501548e-07,
"loss": 0.0634,
"step": 2950
},
{
"epoch": 9.16,
"learning_rate": 8.359133126934985e-07,
"loss": 0.0541,
"step": 2960
},
{
"epoch": 9.2,
"learning_rate": 8.049535603715171e-07,
"loss": 0.072,
"step": 2970
},
{
"epoch": 9.23,
"learning_rate": 7.739938080495358e-07,
"loss": 0.0563,
"step": 2980
},
{
"epoch": 9.26,
"learning_rate": 7.430340557275542e-07,
"loss": 0.082,
"step": 2990
},
{
"epoch": 9.29,
"learning_rate": 7.120743034055728e-07,
"loss": 0.0597,
"step": 3000
},
{
"epoch": 9.32,
"learning_rate": 6.811145510835913e-07,
"loss": 0.0661,
"step": 3010
},
{
"epoch": 9.35,
"learning_rate": 6.5015479876161e-07,
"loss": 0.0636,
"step": 3020
},
{
"epoch": 9.38,
"learning_rate": 6.191950464396285e-07,
"loss": 0.0545,
"step": 3030
},
{
"epoch": 9.41,
"learning_rate": 5.882352941176471e-07,
"loss": 0.0411,
"step": 3040
},
{
"epoch": 9.44,
"learning_rate": 5.572755417956656e-07,
"loss": 0.0583,
"step": 3050
},
{
"epoch": 9.47,
"learning_rate": 5.263157894736843e-07,
"loss": 0.0817,
"step": 3060
},
{
"epoch": 9.5,
"learning_rate": 4.953560371517029e-07,
"loss": 0.0448,
"step": 3070
},
{
"epoch": 9.54,
"learning_rate": 4.643962848297214e-07,
"loss": 0.0519,
"step": 3080
},
{
"epoch": 9.57,
"learning_rate": 4.3343653250774e-07,
"loss": 0.0753,
"step": 3090
},
{
"epoch": 9.6,
"learning_rate": 4.0247678018575857e-07,
"loss": 0.0584,
"step": 3100
},
{
"epoch": 9.63,
"learning_rate": 3.715170278637771e-07,
"loss": 0.0659,
"step": 3110
},
{
"epoch": 9.66,
"learning_rate": 3.4055727554179567e-07,
"loss": 0.0708,
"step": 3120
},
{
"epoch": 9.69,
"learning_rate": 3.0959752321981424e-07,
"loss": 0.0543,
"step": 3130
},
{
"epoch": 9.72,
"learning_rate": 2.786377708978328e-07,
"loss": 0.0627,
"step": 3140
},
{
"epoch": 9.75,
"learning_rate": 2.4767801857585145e-07,
"loss": 0.0367,
"step": 3150
},
{
"epoch": 9.78,
"learning_rate": 2.1671826625387e-07,
"loss": 0.0446,
"step": 3160
},
{
"epoch": 9.81,
"learning_rate": 1.8575851393188855e-07,
"loss": 0.0696,
"step": 3170
},
{
"epoch": 9.85,
"learning_rate": 1.5479876160990712e-07,
"loss": 0.0651,
"step": 3180
},
{
"epoch": 9.88,
"learning_rate": 1.2383900928792572e-07,
"loss": 0.0741,
"step": 3190
},
{
"epoch": 9.91,
"learning_rate": 9.287925696594427e-08,
"loss": 0.0608,
"step": 3200
},
{
"epoch": 9.94,
"learning_rate": 6.191950464396286e-08,
"loss": 0.0663,
"step": 3210
},
{
"epoch": 9.97,
"learning_rate": 3.095975232198143e-08,
"loss": 0.0593,
"step": 3220
},
{
"epoch": 10.0,
"learning_rate": 0.0,
"loss": 0.0737,
"step": 3230
},
{
"epoch": 10.0,
"eval_addition_pearsonr": 0.8294677192385509,
"eval_addition_pearsonr_pvalue": 5.514496860614628e-200,
"eval_addition_spearmanr": 0.8259331195621286,
"eval_addition_spearmanr_pvalue": 7.917553990214238e-197,
"eval_loss": 0.26464056968688965,
"eval_pearsonr": 0.8294677192385509,
"eval_runtime": 20.8166,
"eval_samples_per_second": 37.662,
"eval_steps_per_second": 4.708,
"step": 3230
},
{
"epoch": 10.0,
"step": 3230,
"total_flos": 2.40437479348224e+16,
"train_loss": 0.19238069481893957,
"train_runtime": 2615.6748,
"train_samples_per_second": 9.864,
"train_steps_per_second": 1.235
}
],
"logging_steps": 10,
"max_steps": 3230,
"num_train_epochs": 10,
"save_steps": 500,
"total_flos": 2.40437479348224e+16,
"trial_name": null,
"trial_params": null
}