| { | |
| "best_metric": 0.863021420518602, | |
| "best_model_checkpoint": "dinov2-base-finetuned-galaxy10-decals/checkpoint-906", | |
| "epoch": 29.76, | |
| "eval_steps": 500, | |
| "global_step": 930, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 5.376344086021506e-06, | |
| "loss": 2.6504, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.0752688172043012e-05, | |
| "loss": 1.771, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.6129032258064517e-05, | |
| "loss": 1.1698, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.6634723788049606, | |
| "eval_f1": 0.6488201253523778, | |
| "eval_loss": 0.910478949546814, | |
| "eval_precision": 0.6861326245634023, | |
| "eval_recall": 0.6634723788049606, | |
| "eval_runtime": 13.0532, | |
| "eval_samples_per_second": 135.905, | |
| "eval_steps_per_second": 1.073, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.1505376344086024e-05, | |
| "loss": 0.9385, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 2.6881720430107527e-05, | |
| "loss": 0.7928, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 3.2258064516129034e-05, | |
| "loss": 0.7528, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_accuracy": 0.7790304396843292, | |
| "eval_f1": 0.7612637488816166, | |
| "eval_loss": 0.6493567228317261, | |
| "eval_precision": 0.8146646455914968, | |
| "eval_recall": 0.7790304396843292, | |
| "eval_runtime": 11.0333, | |
| "eval_samples_per_second": 160.786, | |
| "eval_steps_per_second": 1.269, | |
| "step": 62 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 3.763440860215054e-05, | |
| "loss": 0.7283, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 4.301075268817205e-05, | |
| "loss": 0.761, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 4.8387096774193554e-05, | |
| "loss": 0.6893, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_accuracy": 0.794250281848929, | |
| "eval_f1": 0.7912121355030378, | |
| "eval_loss": 0.6537806391716003, | |
| "eval_precision": 0.8019920570293133, | |
| "eval_recall": 0.794250281848929, | |
| "eval_runtime": 10.9026, | |
| "eval_samples_per_second": 162.714, | |
| "eval_steps_per_second": 1.284, | |
| "step": 93 | |
| }, | |
| { | |
| "epoch": 3.2, | |
| "learning_rate": 4.9581839904420555e-05, | |
| "loss": 0.7426, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 4.898446833930705e-05, | |
| "loss": 0.7075, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 4.8387096774193554e-05, | |
| "loss": 0.6554, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7886133032694476, | |
| "eval_f1": 0.7773123934332622, | |
| "eval_loss": 0.633278489112854, | |
| "eval_precision": 0.8070560215205211, | |
| "eval_recall": 0.7886133032694476, | |
| "eval_runtime": 10.9718, | |
| "eval_samples_per_second": 161.688, | |
| "eval_steps_per_second": 1.276, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 4.778972520908005e-05, | |
| "loss": 0.6531, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 4.7192353643966546e-05, | |
| "loss": 0.6259, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 4.8, | |
| "learning_rate": 4.659498207885305e-05, | |
| "loss": 0.6342, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_accuracy": 0.8134160090191658, | |
| "eval_f1": 0.8137533775668285, | |
| "eval_loss": 0.5532479882240295, | |
| "eval_precision": 0.8197652262902653, | |
| "eval_recall": 0.8134160090191658, | |
| "eval_runtime": 10.9592, | |
| "eval_samples_per_second": 161.874, | |
| "eval_steps_per_second": 1.277, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 4.5997610513739546e-05, | |
| "loss": 0.5988, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 4.540023894862604e-05, | |
| "loss": 0.6068, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 4.4802867383512545e-05, | |
| "loss": 0.5565, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "eval_accuracy": 0.8207440811724915, | |
| "eval_f1": 0.8179640665669187, | |
| "eval_loss": 0.5323979258537292, | |
| "eval_precision": 0.8343261000652813, | |
| "eval_recall": 0.8207440811724915, | |
| "eval_runtime": 11.3197, | |
| "eval_samples_per_second": 156.718, | |
| "eval_steps_per_second": 1.237, | |
| "step": 187 | |
| }, | |
| { | |
| "epoch": 6.08, | |
| "learning_rate": 4.420549581839905e-05, | |
| "loss": 0.5592, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 6.4, | |
| "learning_rate": 4.360812425328555e-05, | |
| "loss": 0.5665, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 4.301075268817205e-05, | |
| "loss": 0.5475, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 6.98, | |
| "eval_accuracy": 0.8354002254791432, | |
| "eval_f1": 0.8343016406233452, | |
| "eval_loss": 0.5029850602149963, | |
| "eval_precision": 0.8387498718572008, | |
| "eval_recall": 0.8354002254791432, | |
| "eval_runtime": 11.3637, | |
| "eval_samples_per_second": 156.112, | |
| "eval_steps_per_second": 1.232, | |
| "step": 218 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 4.241338112305854e-05, | |
| "loss": 0.5693, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 7.36, | |
| "learning_rate": 4.1816009557945046e-05, | |
| "loss": 0.5376, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 4.121863799283154e-05, | |
| "loss": 0.5441, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 4.062126642771804e-05, | |
| "loss": 0.5271, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.8337091319052987, | |
| "eval_f1": 0.832521887921682, | |
| "eval_loss": 0.4833582043647766, | |
| "eval_precision": 0.8443837910122144, | |
| "eval_recall": 0.8337091319052987, | |
| "eval_runtime": 10.9142, | |
| "eval_samples_per_second": 162.54, | |
| "eval_steps_per_second": 1.283, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 4.002389486260454e-05, | |
| "loss": 0.4638, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 8.64, | |
| "learning_rate": 3.9426523297491045e-05, | |
| "loss": 0.5195, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 8.96, | |
| "learning_rate": 3.882915173237754e-05, | |
| "loss": 0.5086, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 0.8432919954904171, | |
| "eval_f1": 0.838178160275144, | |
| "eval_loss": 0.4611281752586365, | |
| "eval_precision": 0.8440756169112026, | |
| "eval_recall": 0.8432919954904171, | |
| "eval_runtime": 10.9989, | |
| "eval_samples_per_second": 161.29, | |
| "eval_steps_per_second": 1.273, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 3.8231780167264044e-05, | |
| "loss": 0.4719, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 9.6, | |
| "learning_rate": 3.763440860215054e-05, | |
| "loss": 0.5024, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 9.92, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.4341, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "eval_accuracy": 0.850620067643743, | |
| "eval_f1": 0.8508828796406267, | |
| "eval_loss": 0.44368883967399597, | |
| "eval_precision": 0.853874747203978, | |
| "eval_recall": 0.850620067643743, | |
| "eval_runtime": 10.9558, | |
| "eval_samples_per_second": 161.923, | |
| "eval_steps_per_second": 1.278, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 10.24, | |
| "learning_rate": 3.643966547192354e-05, | |
| "loss": 0.4465, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 3.5842293906810036e-05, | |
| "loss": 0.4517, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 10.88, | |
| "learning_rate": 3.524492234169653e-05, | |
| "loss": 0.4557, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "eval_accuracy": 0.8483652762119503, | |
| "eval_f1": 0.8495285854672462, | |
| "eval_loss": 0.45592719316482544, | |
| "eval_precision": 0.8528985684910855, | |
| "eval_recall": 0.8483652762119503, | |
| "eval_runtime": 11.0155, | |
| "eval_samples_per_second": 161.046, | |
| "eval_steps_per_second": 1.271, | |
| "step": 343 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "learning_rate": 3.4647550776583035e-05, | |
| "loss": 0.4113, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "learning_rate": 3.405017921146954e-05, | |
| "loss": 0.4091, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 11.84, | |
| "learning_rate": 3.3452807646356034e-05, | |
| "loss": 0.4179, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8128523111612176, | |
| "eval_f1": 0.8133198423401731, | |
| "eval_loss": 0.5942395329475403, | |
| "eval_precision": 0.8256669174022289, | |
| "eval_recall": 0.8128523111612176, | |
| "eval_runtime": 11.1803, | |
| "eval_samples_per_second": 158.672, | |
| "eval_steps_per_second": 1.252, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 12.16, | |
| "learning_rate": 3.285543608124254e-05, | |
| "loss": 0.4124, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 12.48, | |
| "learning_rate": 3.2258064516129034e-05, | |
| "loss": 0.3914, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "learning_rate": 3.1660692951015537e-05, | |
| "loss": 0.4243, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_accuracy": 0.8540022547914318, | |
| "eval_f1": 0.8518108652158739, | |
| "eval_loss": 0.459916353225708, | |
| "eval_precision": 0.8536974522219319, | |
| "eval_recall": 0.8540022547914318, | |
| "eval_runtime": 10.9326, | |
| "eval_samples_per_second": 162.267, | |
| "eval_steps_per_second": 1.281, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 13.12, | |
| "learning_rate": 3.106332138590203e-05, | |
| "loss": 0.382, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 13.44, | |
| "learning_rate": 3.046594982078853e-05, | |
| "loss": 0.385, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 13.76, | |
| "learning_rate": 2.9868578255675032e-05, | |
| "loss": 0.372, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "eval_accuracy": 0.8410372040586246, | |
| "eval_f1": 0.8402829432875131, | |
| "eval_loss": 0.4743349850177765, | |
| "eval_precision": 0.8471728150244378, | |
| "eval_recall": 0.8410372040586246, | |
| "eval_runtime": 10.9817, | |
| "eval_samples_per_second": 161.541, | |
| "eval_steps_per_second": 1.275, | |
| "step": 437 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "learning_rate": 2.9271206690561532e-05, | |
| "loss": 0.4001, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 14.4, | |
| "learning_rate": 2.8673835125448028e-05, | |
| "loss": 0.3564, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 14.72, | |
| "learning_rate": 2.807646356033453e-05, | |
| "loss": 0.4003, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 14.98, | |
| "eval_accuracy": 0.8478015783540023, | |
| "eval_f1": 0.8460587531419033, | |
| "eval_loss": 0.47490546107292175, | |
| "eval_precision": 0.8470790221196264, | |
| "eval_recall": 0.8478015783540023, | |
| "eval_runtime": 10.9186, | |
| "eval_samples_per_second": 162.475, | |
| "eval_steps_per_second": 1.282, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 15.04, | |
| "learning_rate": 2.747909199522103e-05, | |
| "loss": 0.3709, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 15.36, | |
| "learning_rate": 2.6881720430107527e-05, | |
| "loss": 0.3343, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 15.68, | |
| "learning_rate": 2.628434886499403e-05, | |
| "loss": 0.342, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2.5686977299880526e-05, | |
| "loss": 0.344, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.8596392333709132, | |
| "eval_f1": 0.8572223333441049, | |
| "eval_loss": 0.4677709937095642, | |
| "eval_precision": 0.8574700407213602, | |
| "eval_recall": 0.8596392333709132, | |
| "eval_runtime": 11.0483, | |
| "eval_samples_per_second": 160.568, | |
| "eval_steps_per_second": 1.267, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 16.32, | |
| "learning_rate": 2.5089605734767026e-05, | |
| "loss": 0.3188, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 16.64, | |
| "learning_rate": 2.4492234169653525e-05, | |
| "loss": 0.3423, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "learning_rate": 2.3894862604540025e-05, | |
| "loss": 0.3252, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "eval_accuracy": 0.8472378804960541, | |
| "eval_f1": 0.8458902825091352, | |
| "eval_loss": 0.5024080276489258, | |
| "eval_precision": 0.8469505736783929, | |
| "eval_recall": 0.8472378804960541, | |
| "eval_runtime": 10.8627, | |
| "eval_samples_per_second": 163.312, | |
| "eval_steps_per_second": 1.289, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 17.28, | |
| "learning_rate": 2.3297491039426525e-05, | |
| "loss": 0.3194, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 17.6, | |
| "learning_rate": 2.270011947431302e-05, | |
| "loss": 0.315, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 17.92, | |
| "learning_rate": 2.2102747909199524e-05, | |
| "loss": 0.3166, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 17.98, | |
| "eval_accuracy": 0.8438556933483653, | |
| "eval_f1": 0.841753361423925, | |
| "eval_loss": 0.5038200616836548, | |
| "eval_precision": 0.8441624506359545, | |
| "eval_recall": 0.8438556933483653, | |
| "eval_runtime": 10.9467, | |
| "eval_samples_per_second": 162.059, | |
| "eval_steps_per_second": 1.279, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 18.24, | |
| "learning_rate": 2.1505376344086024e-05, | |
| "loss": 0.3036, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 18.56, | |
| "learning_rate": 2.0908004778972523e-05, | |
| "loss": 0.2912, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 18.88, | |
| "learning_rate": 2.031063321385902e-05, | |
| "loss": 0.2978, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 18.98, | |
| "eval_accuracy": 0.8365276211950394, | |
| "eval_f1": 0.8349266239501303, | |
| "eval_loss": 0.5239560008049011, | |
| "eval_precision": 0.8350616045263993, | |
| "eval_recall": 0.8365276211950394, | |
| "eval_runtime": 10.8783, | |
| "eval_samples_per_second": 163.078, | |
| "eval_steps_per_second": 1.287, | |
| "step": 593 | |
| }, | |
| { | |
| "epoch": 19.2, | |
| "learning_rate": 1.9713261648745522e-05, | |
| "loss": 0.2865, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 19.52, | |
| "learning_rate": 1.9115890083632022e-05, | |
| "loss": 0.2903, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 19.84, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.2748, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8511837655016911, | |
| "eval_f1": 0.8472172420526128, | |
| "eval_loss": 0.5176008343696594, | |
| "eval_precision": 0.849662483048973, | |
| "eval_recall": 0.8511837655016911, | |
| "eval_runtime": 10.9331, | |
| "eval_samples_per_second": 162.259, | |
| "eval_steps_per_second": 1.281, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 20.16, | |
| "learning_rate": 1.7921146953405018e-05, | |
| "loss": 0.2696, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 20.48, | |
| "learning_rate": 1.7323775388291518e-05, | |
| "loss": 0.2645, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 20.8, | |
| "learning_rate": 1.6726403823178017e-05, | |
| "loss": 0.2691, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "eval_accuracy": 0.8534385569334837, | |
| "eval_f1": 0.850574302117733, | |
| "eval_loss": 0.5529418587684631, | |
| "eval_precision": 0.8514327131139023, | |
| "eval_recall": 0.8534385569334837, | |
| "eval_runtime": 11.1461, | |
| "eval_samples_per_second": 159.159, | |
| "eval_steps_per_second": 1.256, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 21.12, | |
| "learning_rate": 1.6129032258064517e-05, | |
| "loss": 0.2555, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 21.44, | |
| "learning_rate": 1.5531660692951016e-05, | |
| "loss": 0.2532, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 21.76, | |
| "learning_rate": 1.4934289127837516e-05, | |
| "loss": 0.2571, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 21.98, | |
| "eval_accuracy": 0.8562570462232244, | |
| "eval_f1": 0.8535331719575036, | |
| "eval_loss": 0.5440572500228882, | |
| "eval_precision": 0.8573345104741896, | |
| "eval_recall": 0.8562570462232244, | |
| "eval_runtime": 10.8704, | |
| "eval_samples_per_second": 163.196, | |
| "eval_steps_per_second": 1.288, | |
| "step": 687 | |
| }, | |
| { | |
| "epoch": 22.08, | |
| "learning_rate": 1.4336917562724014e-05, | |
| "loss": 0.267, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 22.4, | |
| "learning_rate": 1.3739545997610515e-05, | |
| "loss": 0.2577, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 22.72, | |
| "learning_rate": 1.3142174432497015e-05, | |
| "loss": 0.2451, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 22.98, | |
| "eval_accuracy": 0.842728297632469, | |
| "eval_f1": 0.8411701610062584, | |
| "eval_loss": 0.5440482497215271, | |
| "eval_precision": 0.8426930970925739, | |
| "eval_recall": 0.842728297632469, | |
| "eval_runtime": 11.1949, | |
| "eval_samples_per_second": 158.466, | |
| "eval_steps_per_second": 1.251, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 23.04, | |
| "learning_rate": 1.2544802867383513e-05, | |
| "loss": 0.2448, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 23.36, | |
| "learning_rate": 1.1947431302270013e-05, | |
| "loss": 0.2316, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 23.68, | |
| "learning_rate": 1.135005973715651e-05, | |
| "loss": 0.2545, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 1.0752688172043012e-05, | |
| "loss": 0.2256, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.850620067643743, | |
| "eval_f1": 0.8474199467625554, | |
| "eval_loss": 0.5489442348480225, | |
| "eval_precision": 0.8467373057809777, | |
| "eval_recall": 0.850620067643743, | |
| "eval_runtime": 11.283, | |
| "eval_samples_per_second": 157.228, | |
| "eval_steps_per_second": 1.241, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 24.32, | |
| "learning_rate": 1.015531660692951e-05, | |
| "loss": 0.2449, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 24.64, | |
| "learning_rate": 9.557945041816011e-06, | |
| "loss": 0.2227, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 24.96, | |
| "learning_rate": 8.960573476702509e-06, | |
| "loss": 0.2304, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "eval_accuracy": 0.8534385569334837, | |
| "eval_f1": 0.8498128597008927, | |
| "eval_loss": 0.5695037245750427, | |
| "eval_precision": 0.8492335232117652, | |
| "eval_recall": 0.8534385569334837, | |
| "eval_runtime": 10.9266, | |
| "eval_samples_per_second": 162.356, | |
| "eval_steps_per_second": 1.281, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 25.28, | |
| "learning_rate": 8.363201911589009e-06, | |
| "loss": 0.2085, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "learning_rate": 7.765830346475508e-06, | |
| "loss": 0.2208, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 25.92, | |
| "learning_rate": 7.168458781362007e-06, | |
| "loss": 0.2102, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 25.98, | |
| "eval_accuracy": 0.8568207440811725, | |
| "eval_f1": 0.8540004867753292, | |
| "eval_loss": 0.5347083210945129, | |
| "eval_precision": 0.8532798699553742, | |
| "eval_recall": 0.8568207440811725, | |
| "eval_runtime": 10.9611, | |
| "eval_samples_per_second": 161.845, | |
| "eval_steps_per_second": 1.277, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 26.24, | |
| "learning_rate": 6.5710872162485075e-06, | |
| "loss": 0.2199, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 26.56, | |
| "learning_rate": 5.973715651135006e-06, | |
| "loss": 0.2082, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 26.88, | |
| "learning_rate": 5.376344086021506e-06, | |
| "loss": 0.2172, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 26.98, | |
| "eval_accuracy": 0.8523111612175873, | |
| "eval_f1": 0.8517977660010669, | |
| "eval_loss": 0.5399231910705566, | |
| "eval_precision": 0.8525415835506752, | |
| "eval_recall": 0.8523111612175873, | |
| "eval_runtime": 11.1925, | |
| "eval_samples_per_second": 158.5, | |
| "eval_steps_per_second": 1.251, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "learning_rate": 4.7789725209080055e-06, | |
| "loss": 0.2079, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 27.52, | |
| "learning_rate": 4.181600955794504e-06, | |
| "loss": 0.2186, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 27.84, | |
| "learning_rate": 3.5842293906810035e-06, | |
| "loss": 0.1953, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.855129650507328, | |
| "eval_f1": 0.8521518627937925, | |
| "eval_loss": 0.5699470043182373, | |
| "eval_precision": 0.8547328115878524, | |
| "eval_recall": 0.855129650507328, | |
| "eval_runtime": 11.0392, | |
| "eval_samples_per_second": 160.7, | |
| "eval_steps_per_second": 1.268, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 28.16, | |
| "learning_rate": 2.986857825567503e-06, | |
| "loss": 0.2106, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 28.48, | |
| "learning_rate": 2.3894862604540028e-06, | |
| "loss": 0.1925, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 28.8, | |
| "learning_rate": 1.7921146953405017e-06, | |
| "loss": 0.2035, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "eval_accuracy": 0.863021420518602, | |
| "eval_f1": 0.8606635079855887, | |
| "eval_loss": 0.5538451075553894, | |
| "eval_precision": 0.863028156731382, | |
| "eval_recall": 0.863021420518602, | |
| "eval_runtime": 11.164, | |
| "eval_samples_per_second": 158.904, | |
| "eval_steps_per_second": 1.254, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 29.12, | |
| "learning_rate": 1.1947431302270014e-06, | |
| "loss": 0.1954, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 29.44, | |
| "learning_rate": 5.973715651135007e-07, | |
| "loss": 0.186, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 29.76, | |
| "learning_rate": 0.0, | |
| "loss": 0.1926, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 29.76, | |
| "eval_accuracy": 0.863021420518602, | |
| "eval_f1": 0.860579135743333, | |
| "eval_loss": 0.543451189994812, | |
| "eval_precision": 0.8608767769841421, | |
| "eval_recall": 0.863021420518602, | |
| "eval_runtime": 11.0366, | |
| "eval_samples_per_second": 160.739, | |
| "eval_steps_per_second": 1.269, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 29.76, | |
| "step": 930, | |
| "total_flos": 4.852841995091313e+19, | |
| "train_loss": 0.44145851263435937, | |
| "train_runtime": 6147.3767, | |
| "train_samples_per_second": 77.897, | |
| "train_steps_per_second": 0.151 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 930, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 30, | |
| "save_steps": 500, | |
| "total_flos": 4.852841995091313e+19, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |