{ "best_metric": 0.6652947068214417, "best_model_checkpoint": "../Modelos/mt0_QG_SQuAD/checkpoint-4000", "epoch": 2.923512489154756, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "eval_loss": 1.2921833992004395, "eval_runtime": 99.039, "eval_samples_per_second": 106.726, "eval_steps_per_second": 26.686, "step": 50 }, { "epoch": 0.07, "eval_loss": 0.7744602560997009, "eval_runtime": 99.0509, "eval_samples_per_second": 106.713, "eval_steps_per_second": 26.683, "step": 100 }, { "epoch": 0.11, "eval_loss": 0.7465793490409851, "eval_runtime": 99.0509, "eval_samples_per_second": 106.713, "eval_steps_per_second": 26.683, "step": 150 }, { "epoch": 0.15, "eval_loss": 0.7375356554985046, "eval_runtime": 99.0564, "eval_samples_per_second": 106.707, "eval_steps_per_second": 26.682, "step": 200 }, { "epoch": 0.18, "eval_loss": 0.7277144193649292, "eval_runtime": 99.0472, "eval_samples_per_second": 106.717, "eval_steps_per_second": 26.684, "step": 250 }, { "epoch": 0.22, "eval_loss": 0.7252267003059387, "eval_runtime": 99.0554, "eval_samples_per_second": 106.708, "eval_steps_per_second": 26.682, "step": 300 }, { "epoch": 0.26, "eval_loss": 0.7177889347076416, "eval_runtime": 99.0562, "eval_samples_per_second": 106.707, "eval_steps_per_second": 26.682, "step": 350 }, { "epoch": 0.29, "eval_loss": 0.7163848876953125, "eval_runtime": 99.0555, "eval_samples_per_second": 106.708, "eval_steps_per_second": 26.682, "step": 400 }, { "epoch": 0.33, "eval_loss": 0.7115530967712402, "eval_runtime": 99.0571, "eval_samples_per_second": 106.706, "eval_steps_per_second": 26.682, "step": 450 }, { "epoch": 0.37, "learning_rate": 4.39083820662768e-05, "loss": 1.3919, "step": 500 }, { "epoch": 0.37, "eval_loss": 0.7089862823486328, "eval_runtime": 99.0105, "eval_samples_per_second": 106.756, "eval_steps_per_second": 26.694, "step": 500 }, { "epoch": 0.4, "eval_loss": 0.7095581889152527, "eval_runtime": 99.0746, "eval_samples_per_second": 106.687, "eval_steps_per_second": 26.677, "step": 550 }, { "epoch": 0.44, "eval_loss": 0.7025607228279114, "eval_runtime": 99.0556, "eval_samples_per_second": 106.708, "eval_steps_per_second": 26.682, "step": 600 }, { "epoch": 0.47, "eval_loss": 0.7023607492446899, "eval_runtime": 99.0561, "eval_samples_per_second": 106.707, "eval_steps_per_second": 26.682, "step": 650 }, { "epoch": 0.51, "eval_loss": 0.6990346312522888, "eval_runtime": 99.0585, "eval_samples_per_second": 106.705, "eval_steps_per_second": 26.681, "step": 700 }, { "epoch": 0.55, "eval_loss": 0.698978066444397, "eval_runtime": 99.0563, "eval_samples_per_second": 106.707, "eval_steps_per_second": 26.682, "step": 750 }, { "epoch": 0.58, "eval_loss": 0.6978702545166016, "eval_runtime": 99.0654, "eval_samples_per_second": 106.697, "eval_steps_per_second": 26.679, "step": 800 }, { "epoch": 0.62, "eval_loss": 0.6929869651794434, "eval_runtime": 99.0669, "eval_samples_per_second": 106.696, "eval_steps_per_second": 26.679, "step": 850 }, { "epoch": 0.66, "eval_loss": 0.693868100643158, "eval_runtime": 99.063, "eval_samples_per_second": 106.7, "eval_steps_per_second": 26.68, "step": 900 }, { "epoch": 0.69, "eval_loss": 0.6937694549560547, "eval_runtime": 99.0627, "eval_samples_per_second": 106.7, "eval_steps_per_second": 26.68, "step": 950 }, { "epoch": 0.73, "learning_rate": 3.7816764132553604e-05, "loss": 0.7713, "step": 1000 }, { "epoch": 0.73, "eval_loss": 0.6868888139724731, "eval_runtime": 99.0162, "eval_samples_per_second": 106.75, "eval_steps_per_second": 26.693, "step": 1000 }, { "epoch": 0.77, "eval_loss": 0.68803870677948, "eval_runtime": 99.0558, "eval_samples_per_second": 106.708, "eval_steps_per_second": 26.682, "step": 1050 }, { "epoch": 0.8, "eval_loss": 0.6879799365997314, "eval_runtime": 99.0081, "eval_samples_per_second": 106.759, "eval_steps_per_second": 26.695, "step": 1100 }, { "epoch": 0.84, "eval_loss": 0.6890503764152527, "eval_runtime": 99.0519, "eval_samples_per_second": 106.712, "eval_steps_per_second": 26.683, "step": 1150 }, { "epoch": 0.88, "eval_loss": 0.6867082715034485, "eval_runtime": 99.0571, "eval_samples_per_second": 106.706, "eval_steps_per_second": 26.682, "step": 1200 }, { "epoch": 0.91, "eval_loss": 0.685232400894165, "eval_runtime": 99.0511, "eval_samples_per_second": 106.713, "eval_steps_per_second": 26.683, "step": 1250 }, { "epoch": 0.95, "eval_loss": 0.6860905885696411, "eval_runtime": 99.0563, "eval_samples_per_second": 106.707, "eval_steps_per_second": 26.682, "step": 1300 }, { "epoch": 0.99, "eval_loss": 0.6822260022163391, "eval_runtime": 99.0589, "eval_samples_per_second": 106.704, "eval_steps_per_second": 26.681, "step": 1350 }, { "epoch": 1.02, "eval_loss": 0.6809637546539307, "eval_runtime": 99.0612, "eval_samples_per_second": 106.702, "eval_steps_per_second": 26.68, "step": 1400 }, { "epoch": 1.06, "eval_loss": 0.6814208030700684, "eval_runtime": 99.0551, "eval_samples_per_second": 106.708, "eval_steps_per_second": 26.682, "step": 1450 }, { "epoch": 1.1, "learning_rate": 3.172514619883041e-05, "loss": 0.7477, "step": 1500 }, { "epoch": 1.1, "eval_loss": 0.6810862421989441, "eval_runtime": 99.0227, "eval_samples_per_second": 106.743, "eval_steps_per_second": 26.691, "step": 1500 }, { "epoch": 1.13, "eval_loss": 0.67988121509552, "eval_runtime": 99.0581, "eval_samples_per_second": 106.705, "eval_steps_per_second": 26.681, "step": 1550 }, { "epoch": 1.17, "eval_loss": 0.6793538928031921, "eval_runtime": 99.0598, "eval_samples_per_second": 106.703, "eval_steps_per_second": 26.681, "step": 1600 }, { "epoch": 1.21, "eval_loss": 0.6803789734840393, "eval_runtime": 99.0507, "eval_samples_per_second": 106.713, "eval_steps_per_second": 26.683, "step": 1650 }, { "epoch": 1.24, "eval_loss": 0.6781566739082336, "eval_runtime": 99.0472, "eval_samples_per_second": 106.717, "eval_steps_per_second": 26.684, "step": 1700 }, { "epoch": 1.28, "eval_loss": 0.6781692504882812, "eval_runtime": 99.0434, "eval_samples_per_second": 106.721, "eval_steps_per_second": 26.685, "step": 1750 }, { "epoch": 1.32, "eval_loss": 0.6762599945068359, "eval_runtime": 99.0531, "eval_samples_per_second": 106.71, "eval_steps_per_second": 26.683, "step": 1800 }, { "epoch": 1.35, "eval_loss": 0.6762254238128662, "eval_runtime": 99.0448, "eval_samples_per_second": 106.719, "eval_steps_per_second": 26.685, "step": 1850 }, { "epoch": 1.39, "eval_loss": 0.6738188862800598, "eval_runtime": 99.0427, "eval_samples_per_second": 106.722, "eval_steps_per_second": 26.685, "step": 1900 }, { "epoch": 1.43, "eval_loss": 0.6767028570175171, "eval_runtime": 99.0459, "eval_samples_per_second": 106.718, "eval_steps_per_second": 26.685, "step": 1950 }, { "epoch": 1.46, "learning_rate": 2.5633528265107216e-05, "loss": 0.7251, "step": 2000 }, { "epoch": 1.46, "eval_loss": 0.6748691201210022, "eval_runtime": 98.9904, "eval_samples_per_second": 106.778, "eval_steps_per_second": 26.7, "step": 2000 }, { "epoch": 1.5, "eval_loss": 0.6725715398788452, "eval_runtime": 99.0539, "eval_samples_per_second": 106.71, "eval_steps_per_second": 26.682, "step": 2050 }, { "epoch": 1.53, "eval_loss": 0.676404595375061, "eval_runtime": 99.0537, "eval_samples_per_second": 106.71, "eval_steps_per_second": 26.683, "step": 2100 }, { "epoch": 1.57, "eval_loss": 0.6735058426856995, "eval_runtime": 99.0549, "eval_samples_per_second": 106.709, "eval_steps_per_second": 26.682, "step": 2150 }, { "epoch": 1.61, "eval_loss": 0.6737805604934692, "eval_runtime": 99.0492, "eval_samples_per_second": 106.715, "eval_steps_per_second": 26.684, "step": 2200 }, { "epoch": 1.64, "eval_loss": 0.6724738478660583, "eval_runtime": 99.0578, "eval_samples_per_second": 106.705, "eval_steps_per_second": 26.681, "step": 2250 }, { "epoch": 1.68, "eval_loss": 0.672220766544342, "eval_runtime": 99.0481, "eval_samples_per_second": 106.716, "eval_steps_per_second": 26.684, "step": 2300 }, { "epoch": 1.72, "eval_loss": 0.6724982857704163, "eval_runtime": 99.0532, "eval_samples_per_second": 106.71, "eval_steps_per_second": 26.683, "step": 2350 }, { "epoch": 1.75, "eval_loss": 0.6707101464271545, "eval_runtime": 99.0487, "eval_samples_per_second": 106.715, "eval_steps_per_second": 26.684, "step": 2400 }, { "epoch": 1.79, "eval_loss": 0.6724550127983093, "eval_runtime": 99.0502, "eval_samples_per_second": 106.714, "eval_steps_per_second": 26.683, "step": 2450 }, { "epoch": 1.83, "learning_rate": 1.9541910331384016e-05, "loss": 0.7154, "step": 2500 }, { "epoch": 1.83, "eval_loss": 0.6708072423934937, "eval_runtime": 98.996, "eval_samples_per_second": 106.772, "eval_steps_per_second": 26.698, "step": 2500 }, { "epoch": 1.86, "eval_loss": 0.6708223223686218, "eval_runtime": 99.0485, "eval_samples_per_second": 106.715, "eval_steps_per_second": 26.684, "step": 2550 }, { "epoch": 1.9, "eval_loss": 0.6708201169967651, "eval_runtime": 99.0348, "eval_samples_per_second": 106.73, "eval_steps_per_second": 26.688, "step": 2600 }, { "epoch": 1.94, "eval_loss": 0.6681162714958191, "eval_runtime": 99.0391, "eval_samples_per_second": 106.726, "eval_steps_per_second": 26.686, "step": 2650 }, { "epoch": 1.97, "eval_loss": 0.6703566908836365, "eval_runtime": 99.0413, "eval_samples_per_second": 106.723, "eval_steps_per_second": 26.686, "step": 2700 }, { "epoch": 2.01, "eval_loss": 0.6685038805007935, "eval_runtime": 99.0472, "eval_samples_per_second": 106.717, "eval_steps_per_second": 26.684, "step": 2750 }, { "epoch": 2.05, "eval_loss": 0.6697913408279419, "eval_runtime": 99.0345, "eval_samples_per_second": 106.731, "eval_steps_per_second": 26.688, "step": 2800 }, { "epoch": 2.08, "eval_loss": 0.6686891913414001, "eval_runtime": 99.036, "eval_samples_per_second": 106.729, "eval_steps_per_second": 26.687, "step": 2850 }, { "epoch": 2.12, "eval_loss": 0.6696143746376038, "eval_runtime": 99.0322, "eval_samples_per_second": 106.733, "eval_steps_per_second": 26.688, "step": 2900 }, { "epoch": 2.16, "eval_loss": 0.6674590110778809, "eval_runtime": 99.0384, "eval_samples_per_second": 106.726, "eval_steps_per_second": 26.687, "step": 2950 }, { "epoch": 2.19, "learning_rate": 1.3450292397660819e-05, "loss": 0.7119, "step": 3000 }, { "epoch": 2.19, "eval_loss": 0.6682441830635071, "eval_runtime": 98.9894, "eval_samples_per_second": 106.779, "eval_steps_per_second": 26.7, "step": 3000 }, { "epoch": 2.23, "eval_loss": 0.6684902906417847, "eval_runtime": 99.045, "eval_samples_per_second": 106.719, "eval_steps_per_second": 26.685, "step": 3050 }, { "epoch": 2.27, "eval_loss": 0.6682748198509216, "eval_runtime": 99.0366, "eval_samples_per_second": 106.728, "eval_steps_per_second": 26.687, "step": 3100 }, { "epoch": 2.3, "eval_loss": 0.6677591800689697, "eval_runtime": 99.0364, "eval_samples_per_second": 106.728, "eval_steps_per_second": 26.687, "step": 3150 }, { "epoch": 2.34, "eval_loss": 0.666348397731781, "eval_runtime": 99.0376, "eval_samples_per_second": 106.727, "eval_steps_per_second": 26.687, "step": 3200 }, { "epoch": 2.38, "eval_loss": 0.6670761704444885, "eval_runtime": 99.0329, "eval_samples_per_second": 106.732, "eval_steps_per_second": 26.688, "step": 3250 }, { "epoch": 2.41, "eval_loss": 0.6670991778373718, "eval_runtime": 99.0293, "eval_samples_per_second": 106.736, "eval_steps_per_second": 26.689, "step": 3300 }, { "epoch": 2.45, "eval_loss": 0.6664704084396362, "eval_runtime": 99.0352, "eval_samples_per_second": 106.73, "eval_steps_per_second": 26.687, "step": 3350 }, { "epoch": 2.49, "eval_loss": 0.666100263595581, "eval_runtime": 99.0398, "eval_samples_per_second": 106.725, "eval_steps_per_second": 26.686, "step": 3400 }, { "epoch": 2.52, "eval_loss": 0.6673260927200317, "eval_runtime": 99.0455, "eval_samples_per_second": 106.719, "eval_steps_per_second": 26.685, "step": 3450 }, { "epoch": 2.56, "learning_rate": 7.358674463937622e-06, "loss": 0.7062, "step": 3500 }, { "epoch": 2.56, "eval_loss": 0.666333794593811, "eval_runtime": 98.9963, "eval_samples_per_second": 106.772, "eval_steps_per_second": 26.698, "step": 3500 }, { "epoch": 2.59, "eval_loss": 0.666863739490509, "eval_runtime": 99.0318, "eval_samples_per_second": 106.733, "eval_steps_per_second": 26.688, "step": 3550 }, { "epoch": 2.63, "eval_loss": 0.6672132611274719, "eval_runtime": 99.0274, "eval_samples_per_second": 106.738, "eval_steps_per_second": 26.69, "step": 3600 }, { "epoch": 2.67, "eval_loss": 0.6660241484642029, "eval_runtime": 99.0376, "eval_samples_per_second": 106.727, "eval_steps_per_second": 26.687, "step": 3650 }, { "epoch": 2.7, "eval_loss": 0.6664428114891052, "eval_runtime": 99.0361, "eval_samples_per_second": 106.729, "eval_steps_per_second": 26.687, "step": 3700 }, { "epoch": 2.74, "eval_loss": 0.6664352416992188, "eval_runtime": 99.0223, "eval_samples_per_second": 106.744, "eval_steps_per_second": 26.691, "step": 3750 }, { "epoch": 2.78, "eval_loss": 0.6668286323547363, "eval_runtime": 99.0307, "eval_samples_per_second": 106.735, "eval_steps_per_second": 26.689, "step": 3800 }, { "epoch": 2.81, "eval_loss": 0.6657348275184631, "eval_runtime": 99.0283, "eval_samples_per_second": 106.737, "eval_steps_per_second": 26.689, "step": 3850 }, { "epoch": 2.85, "eval_loss": 0.6659272313117981, "eval_runtime": 99.0306, "eval_samples_per_second": 106.735, "eval_steps_per_second": 26.689, "step": 3900 }, { "epoch": 2.89, "eval_loss": 0.6652637720108032, "eval_runtime": 99.0292, "eval_samples_per_second": 106.736, "eval_steps_per_second": 26.689, "step": 3950 }, { "epoch": 2.92, "learning_rate": 1.2670565302144249e-06, "loss": 0.6933, "step": 4000 }, { "epoch": 2.92, "eval_loss": 0.6652947068214417, "eval_runtime": 98.9797, "eval_samples_per_second": 106.79, "eval_steps_per_second": 26.702, "step": 4000 } ], "max_steps": 4104, "num_train_epochs": 3, "total_flos": 3.078108528813015e+17, "trial_name": null, "trial_params": null }