| { | |
| "best_metric": 0.9148148148148149, | |
| "best_model_checkpoint": "dinov2-finetuned-har/checkpoint-830", | |
| "epoch": 9.91044776119403, | |
| "eval_steps": 500, | |
| "global_step": 830, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11940298507462686, | |
| "grad_norm": 42.725677490234375, | |
| "learning_rate": 6.024096385542169e-06, | |
| "loss": 2.9994, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.23880597014925373, | |
| "grad_norm": 40.75918197631836, | |
| "learning_rate": 1.2048192771084338e-05, | |
| "loss": 2.1467, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.3582089552238806, | |
| "grad_norm": 36.34815979003906, | |
| "learning_rate": 1.8072289156626505e-05, | |
| "loss": 1.368, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.47761194029850745, | |
| "grad_norm": 41.22068405151367, | |
| "learning_rate": 2.4096385542168677e-05, | |
| "loss": 1.0901, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.5970149253731343, | |
| "grad_norm": 35.41105270385742, | |
| "learning_rate": 3.012048192771085e-05, | |
| "loss": 0.9148, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.7164179104477612, | |
| "grad_norm": 45.00641632080078, | |
| "learning_rate": 3.614457831325301e-05, | |
| "loss": 0.8958, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.835820895522388, | |
| "grad_norm": 32.627593994140625, | |
| "learning_rate": 4.2168674698795186e-05, | |
| "loss": 0.9745, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.9552238805970149, | |
| "grad_norm": 44.3660888671875, | |
| "learning_rate": 4.8192771084337354e-05, | |
| "loss": 0.9429, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.991044776119403, | |
| "eval_accuracy": 0.8328042328042328, | |
| "eval_loss": 0.562383234500885, | |
| "eval_runtime": 21.3212, | |
| "eval_samples_per_second": 88.644, | |
| "eval_steps_per_second": 2.814, | |
| "step": 83 | |
| }, | |
| { | |
| "epoch": 1.0746268656716418, | |
| "grad_norm": 66.64344024658203, | |
| "learning_rate": 4.953145917001339e-05, | |
| "loss": 0.8966, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.1940298507462686, | |
| "grad_norm": 31.583904266357422, | |
| "learning_rate": 4.886211512717537e-05, | |
| "loss": 0.8292, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.3134328358208955, | |
| "grad_norm": 70.04341888427734, | |
| "learning_rate": 4.8192771084337354e-05, | |
| "loss": 0.8166, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.4328358208955223, | |
| "grad_norm": 42.080177307128906, | |
| "learning_rate": 4.7523427041499336e-05, | |
| "loss": 0.9254, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.5522388059701493, | |
| "grad_norm": 123.05072784423828, | |
| "learning_rate": 4.685408299866131e-05, | |
| "loss": 0.9084, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.671641791044776, | |
| "grad_norm": 25.725753784179688, | |
| "learning_rate": 4.61847389558233e-05, | |
| "loss": 0.8126, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.7910447761194028, | |
| "grad_norm": 20.468259811401367, | |
| "learning_rate": 4.5515394912985275e-05, | |
| "loss": 0.8057, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.9104477611940298, | |
| "grad_norm": 24.806772232055664, | |
| "learning_rate": 4.484605087014726e-05, | |
| "loss": 0.7912, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.9940298507462688, | |
| "eval_accuracy": 0.8587301587301587, | |
| "eval_loss": 0.47548335790634155, | |
| "eval_runtime": 21.1591, | |
| "eval_samples_per_second": 89.323, | |
| "eval_steps_per_second": 2.836, | |
| "step": 167 | |
| }, | |
| { | |
| "epoch": 2.029850746268657, | |
| "grad_norm": 26.82819175720215, | |
| "learning_rate": 4.417670682730924e-05, | |
| "loss": 0.7938, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 2.1492537313432836, | |
| "grad_norm": 30.11927604675293, | |
| "learning_rate": 4.350736278447122e-05, | |
| "loss": 0.7185, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.2686567164179103, | |
| "grad_norm": 32.696815490722656, | |
| "learning_rate": 4.2838018741633203e-05, | |
| "loss": 0.7642, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.388059701492537, | |
| "grad_norm": 34.70363998413086, | |
| "learning_rate": 4.2168674698795186e-05, | |
| "loss": 0.7867, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.5074626865671643, | |
| "grad_norm": 21.691709518432617, | |
| "learning_rate": 4.149933065595716e-05, | |
| "loss": 0.7376, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.626865671641791, | |
| "grad_norm": 37.57550811767578, | |
| "learning_rate": 4.082998661311915e-05, | |
| "loss": 0.7446, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.746268656716418, | |
| "grad_norm": 21.075855255126953, | |
| "learning_rate": 4.0160642570281125e-05, | |
| "loss": 0.6815, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.8656716417910446, | |
| "grad_norm": 29.882211685180664, | |
| "learning_rate": 3.949129852744311e-05, | |
| "loss": 0.686, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.9850746268656714, | |
| "grad_norm": 31.888534545898438, | |
| "learning_rate": 3.882195448460509e-05, | |
| "loss": 0.7371, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.997014925373134, | |
| "eval_accuracy": 0.855026455026455, | |
| "eval_loss": 0.4584466516971588, | |
| "eval_runtime": 21.3468, | |
| "eval_samples_per_second": 88.538, | |
| "eval_steps_per_second": 2.811, | |
| "step": 251 | |
| }, | |
| { | |
| "epoch": 3.1044776119402986, | |
| "grad_norm": 31.14760971069336, | |
| "learning_rate": 3.815261044176707e-05, | |
| "loss": 0.6986, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 3.2238805970149254, | |
| "grad_norm": 20.720378875732422, | |
| "learning_rate": 3.748326639892905e-05, | |
| "loss": 0.6277, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.343283582089552, | |
| "grad_norm": 22.004247665405273, | |
| "learning_rate": 3.6813922356091035e-05, | |
| "loss": 0.5936, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.4626865671641793, | |
| "grad_norm": 24.986722946166992, | |
| "learning_rate": 3.614457831325301e-05, | |
| "loss": 0.5917, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.582089552238806, | |
| "grad_norm": 27.637083053588867, | |
| "learning_rate": 3.5475234270415e-05, | |
| "loss": 0.5619, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.701492537313433, | |
| "grad_norm": 37.6093864440918, | |
| "learning_rate": 3.4805890227576974e-05, | |
| "loss": 0.6538, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.8208955223880596, | |
| "grad_norm": 23.496280670166016, | |
| "learning_rate": 3.413654618473896e-05, | |
| "loss": 0.6455, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.9402985074626864, | |
| "grad_norm": 23.340776443481445, | |
| "learning_rate": 3.346720214190094e-05, | |
| "loss": 0.5915, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8761904761904762, | |
| "eval_loss": 0.38699546456336975, | |
| "eval_runtime": 21.0126, | |
| "eval_samples_per_second": 89.946, | |
| "eval_steps_per_second": 2.855, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 4.059701492537314, | |
| "grad_norm": 24.867502212524414, | |
| "learning_rate": 3.279785809906292e-05, | |
| "loss": 0.5275, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 4.17910447761194, | |
| "grad_norm": 22.04759407043457, | |
| "learning_rate": 3.21285140562249e-05, | |
| "loss": 0.5317, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 4.298507462686567, | |
| "grad_norm": 23.60555076599121, | |
| "learning_rate": 3.1459170013386885e-05, | |
| "loss": 0.6104, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.417910447761194, | |
| "grad_norm": 30.320343017578125, | |
| "learning_rate": 3.078982597054887e-05, | |
| "loss": 0.5362, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.537313432835821, | |
| "grad_norm": 19.35739517211914, | |
| "learning_rate": 3.012048192771085e-05, | |
| "loss": 0.5487, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.656716417910448, | |
| "grad_norm": 27.066503524780273, | |
| "learning_rate": 2.9451137884872827e-05, | |
| "loss": 0.5296, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.776119402985074, | |
| "grad_norm": 35.0575065612793, | |
| "learning_rate": 2.878179384203481e-05, | |
| "loss": 0.5051, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.895522388059701, | |
| "grad_norm": 19.96647071838379, | |
| "learning_rate": 2.8112449799196788e-05, | |
| "loss": 0.5635, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.991044776119403, | |
| "eval_accuracy": 0.8703703703703703, | |
| "eval_loss": 0.4037255644798279, | |
| "eval_runtime": 21.0465, | |
| "eval_samples_per_second": 89.801, | |
| "eval_steps_per_second": 2.851, | |
| "step": 418 | |
| }, | |
| { | |
| "epoch": 5.014925373134329, | |
| "grad_norm": 18.035507202148438, | |
| "learning_rate": 2.7443105756358774e-05, | |
| "loss": 0.5311, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 5.134328358208955, | |
| "grad_norm": 27.073444366455078, | |
| "learning_rate": 2.6773761713520752e-05, | |
| "loss": 0.4792, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 5.253731343283582, | |
| "grad_norm": 17.406234741210938, | |
| "learning_rate": 2.6104417670682734e-05, | |
| "loss": 0.5018, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 5.373134328358209, | |
| "grad_norm": 19.9872989654541, | |
| "learning_rate": 2.5435073627844713e-05, | |
| "loss": 0.4636, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 5.492537313432836, | |
| "grad_norm": 20.06911849975586, | |
| "learning_rate": 2.4765729585006695e-05, | |
| "loss": 0.4911, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.611940298507463, | |
| "grad_norm": 19.70343780517578, | |
| "learning_rate": 2.4096385542168677e-05, | |
| "loss": 0.4889, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.731343283582089, | |
| "grad_norm": 21.2330379486084, | |
| "learning_rate": 2.3427041499330656e-05, | |
| "loss": 0.4411, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.850746268656716, | |
| "grad_norm": 20.05991554260254, | |
| "learning_rate": 2.2757697456492638e-05, | |
| "loss": 0.4584, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.970149253731344, | |
| "grad_norm": 19.27924919128418, | |
| "learning_rate": 2.208835341365462e-05, | |
| "loss": 0.498, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.994029850746268, | |
| "eval_accuracy": 0.8804232804232804, | |
| "eval_loss": 0.3876107931137085, | |
| "eval_runtime": 21.1814, | |
| "eval_samples_per_second": 89.229, | |
| "eval_steps_per_second": 2.833, | |
| "step": 502 | |
| }, | |
| { | |
| "epoch": 6.08955223880597, | |
| "grad_norm": 23.8977108001709, | |
| "learning_rate": 2.1419009370816602e-05, | |
| "loss": 0.4212, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 6.208955223880597, | |
| "grad_norm": 25.704334259033203, | |
| "learning_rate": 2.074966532797858e-05, | |
| "loss": 0.4529, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 6.3283582089552235, | |
| "grad_norm": 24.296876907348633, | |
| "learning_rate": 2.0080321285140562e-05, | |
| "loss": 0.4707, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 6.447761194029851, | |
| "grad_norm": 17.248048782348633, | |
| "learning_rate": 1.9410977242302544e-05, | |
| "loss": 0.4178, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 6.567164179104478, | |
| "grad_norm": 18.4821834564209, | |
| "learning_rate": 1.8741633199464527e-05, | |
| "loss": 0.449, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 6.686567164179104, | |
| "grad_norm": 19.246505737304688, | |
| "learning_rate": 1.8072289156626505e-05, | |
| "loss": 0.4209, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 6.8059701492537314, | |
| "grad_norm": 20.905685424804688, | |
| "learning_rate": 1.7402945113788487e-05, | |
| "loss": 0.4344, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 6.925373134328359, | |
| "grad_norm": 16.702898025512695, | |
| "learning_rate": 1.673360107095047e-05, | |
| "loss": 0.4541, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 6.997014925373135, | |
| "eval_accuracy": 0.8883597883597883, | |
| "eval_loss": 0.3611726760864258, | |
| "eval_runtime": 21.6673, | |
| "eval_samples_per_second": 87.228, | |
| "eval_steps_per_second": 2.769, | |
| "step": 586 | |
| }, | |
| { | |
| "epoch": 7.044776119402985, | |
| "grad_norm": 13.757901191711426, | |
| "learning_rate": 1.606425702811245e-05, | |
| "loss": 0.3419, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 7.164179104477612, | |
| "grad_norm": 20.324989318847656, | |
| "learning_rate": 1.5394912985274433e-05, | |
| "loss": 0.3985, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 7.2835820895522385, | |
| "grad_norm": 15.066502571105957, | |
| "learning_rate": 1.4725568942436414e-05, | |
| "loss": 0.3917, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 7.402985074626866, | |
| "grad_norm": 14.179686546325684, | |
| "learning_rate": 1.4056224899598394e-05, | |
| "loss": 0.372, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 7.522388059701493, | |
| "grad_norm": 18.657678604125977, | |
| "learning_rate": 1.3386880856760376e-05, | |
| "loss": 0.3744, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 7.641791044776119, | |
| "grad_norm": 20.829837799072266, | |
| "learning_rate": 1.2717536813922356e-05, | |
| "loss": 0.3809, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 7.7611940298507465, | |
| "grad_norm": 16.62371063232422, | |
| "learning_rate": 1.2048192771084338e-05, | |
| "loss": 0.3727, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 7.880597014925373, | |
| "grad_norm": 14.337791442871094, | |
| "learning_rate": 1.1378848728246319e-05, | |
| "loss": 0.3274, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "grad_norm": 19.433561325073242, | |
| "learning_rate": 1.0709504685408301e-05, | |
| "loss": 0.3513, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9052910052910053, | |
| "eval_loss": 0.32395628094673157, | |
| "eval_runtime": 21.5863, | |
| "eval_samples_per_second": 87.555, | |
| "eval_steps_per_second": 2.78, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 8.119402985074627, | |
| "grad_norm": 21.07122802734375, | |
| "learning_rate": 1.0040160642570281e-05, | |
| "loss": 0.3392, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 8.238805970149254, | |
| "grad_norm": 21.168922424316406, | |
| "learning_rate": 9.370816599732263e-06, | |
| "loss": 0.3038, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 8.35820895522388, | |
| "grad_norm": 17.61219024658203, | |
| "learning_rate": 8.701472556894244e-06, | |
| "loss": 0.3049, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 8.477611940298507, | |
| "grad_norm": 18.213272094726562, | |
| "learning_rate": 8.032128514056226e-06, | |
| "loss": 0.3423, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 8.597014925373134, | |
| "grad_norm": 16.038349151611328, | |
| "learning_rate": 7.362784471218207e-06, | |
| "loss": 0.2895, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 8.716417910447761, | |
| "grad_norm": 14.367926597595215, | |
| "learning_rate": 6.693440428380188e-06, | |
| "loss": 0.3142, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 8.835820895522389, | |
| "grad_norm": 16.960412979125977, | |
| "learning_rate": 6.024096385542169e-06, | |
| "loss": 0.3338, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 8.955223880597014, | |
| "grad_norm": 13.258671760559082, | |
| "learning_rate": 5.3547523427041504e-06, | |
| "loss": 0.2963, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 8.991044776119402, | |
| "eval_accuracy": 0.9116402116402117, | |
| "eval_loss": 0.3176145553588867, | |
| "eval_runtime": 21.2968, | |
| "eval_samples_per_second": 88.746, | |
| "eval_steps_per_second": 2.817, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 9.074626865671641, | |
| "grad_norm": 17.589509963989258, | |
| "learning_rate": 4.685408299866132e-06, | |
| "loss": 0.265, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 9.194029850746269, | |
| "grad_norm": 13.759172439575195, | |
| "learning_rate": 4.016064257028113e-06, | |
| "loss": 0.2718, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 9.313432835820896, | |
| "grad_norm": 13.576536178588867, | |
| "learning_rate": 3.346720214190094e-06, | |
| "loss": 0.2744, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 9.432835820895523, | |
| "grad_norm": 13.831005096435547, | |
| "learning_rate": 2.6773761713520752e-06, | |
| "loss": 0.2789, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 9.552238805970148, | |
| "grad_norm": 14.289161682128906, | |
| "learning_rate": 2.0080321285140564e-06, | |
| "loss": 0.2793, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 9.671641791044776, | |
| "grad_norm": 13.557323455810547, | |
| "learning_rate": 1.3386880856760376e-06, | |
| "loss": 0.2893, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 9.791044776119403, | |
| "grad_norm": 13.76667308807373, | |
| "learning_rate": 6.693440428380188e-07, | |
| "loss": 0.2918, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 9.91044776119403, | |
| "grad_norm": 15.072478294372559, | |
| "learning_rate": 0.0, | |
| "loss": 0.2815, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 9.91044776119403, | |
| "eval_accuracy": 0.9148148148148149, | |
| "eval_loss": 0.30782097578048706, | |
| "eval_runtime": 22.2064, | |
| "eval_samples_per_second": 85.111, | |
| "eval_steps_per_second": 2.702, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 9.91044776119403, | |
| "step": 830, | |
| "total_flos": 1.0844463059730432e+19, | |
| "train_loss": 0.6027212039533868, | |
| "train_runtime": 2771.0404, | |
| "train_samples_per_second": 38.65, | |
| "train_steps_per_second": 0.3 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 830, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 10, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.0844463059730432e+19, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |