| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.0321027287319422, | |
| "global_step": 160, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 4.9193548387096775e-05, | |
| "loss": 2.714, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 4.8387096774193554e-05, | |
| "loss": 1.6354, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_loss": 1.7578704357147217, | |
| "eval_rouge2_fmeasure": 0.0827, | |
| "eval_rouge2_precision": 0.0554, | |
| "eval_rouge2_recall": 0.1855, | |
| "eval_runtime": 1288.6002, | |
| "eval_samples_per_second": 0.388, | |
| "eval_steps_per_second": 0.039, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 4.7580645161290326e-05, | |
| "loss": 1.5202, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 4.67741935483871e-05, | |
| "loss": 1.5048, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_loss": 1.6580469608306885, | |
| "eval_rouge2_fmeasure": 0.0917, | |
| "eval_rouge2_precision": 0.0625, | |
| "eval_rouge2_recall": 0.1921, | |
| "eval_runtime": 1296.1289, | |
| "eval_samples_per_second": 0.386, | |
| "eval_steps_per_second": 0.039, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 4.596774193548387e-05, | |
| "loss": 1.4322, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 4.516129032258064e-05, | |
| "loss": 1.4194, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_loss": 1.5461347103118896, | |
| "eval_rouge2_fmeasure": 0.0919, | |
| "eval_rouge2_precision": 0.0613, | |
| "eval_rouge2_recall": 0.2071, | |
| "eval_runtime": 1357.8126, | |
| "eval_samples_per_second": 0.368, | |
| "eval_steps_per_second": 0.037, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 4.435483870967742e-05, | |
| "loss": 1.3347, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 4.3548387096774194e-05, | |
| "loss": 1.3539, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_loss": 1.5223164558410645, | |
| "eval_rouge2_fmeasure": 0.0909, | |
| "eval_rouge2_precision": 0.0606, | |
| "eval_rouge2_recall": 0.2068, | |
| "eval_runtime": 1335.0407, | |
| "eval_samples_per_second": 0.375, | |
| "eval_steps_per_second": 0.037, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 4.2741935483870973e-05, | |
| "loss": 1.351, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.1935483870967746e-05, | |
| "loss": 1.3326, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_loss": 1.483912706375122, | |
| "eval_rouge2_fmeasure": 0.0902, | |
| "eval_rouge2_precision": 0.0594, | |
| "eval_rouge2_recall": 0.2113, | |
| "eval_runtime": 1337.6258, | |
| "eval_samples_per_second": 0.374, | |
| "eval_steps_per_second": 0.037, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 4.112903225806452e-05, | |
| "loss": 1.2886, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 4.032258064516129e-05, | |
| "loss": 1.2789, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_loss": 1.5512304306030273, | |
| "eval_rouge2_fmeasure": 0.0954, | |
| "eval_rouge2_precision": 0.0639, | |
| "eval_rouge2_recall": 0.2123, | |
| "eval_runtime": 1377.3491, | |
| "eval_samples_per_second": 0.363, | |
| "eval_steps_per_second": 0.036, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 3.951612903225806e-05, | |
| "loss": 1.2981, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 3.870967741935484e-05, | |
| "loss": 1.2861, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_loss": 1.461083173751831, | |
| "eval_rouge2_fmeasure": 0.098, | |
| "eval_rouge2_precision": 0.0651, | |
| "eval_rouge2_recall": 0.2234, | |
| "eval_runtime": 1306.3575, | |
| "eval_samples_per_second": 0.383, | |
| "eval_steps_per_second": 0.038, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 3.7903225806451614e-05, | |
| "loss": 1.3188, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 3.7096774193548386e-05, | |
| "loss": 1.2977, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_loss": 1.4521564245224, | |
| "eval_rouge2_fmeasure": 0.1013, | |
| "eval_rouge2_precision": 0.0677, | |
| "eval_rouge2_recall": 0.2272, | |
| "eval_runtime": 1315.4665, | |
| "eval_samples_per_second": 0.38, | |
| "eval_steps_per_second": 0.038, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 3.6290322580645165e-05, | |
| "loss": 1.3004, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 3.548387096774194e-05, | |
| "loss": 1.2403, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_loss": 1.4190800189971924, | |
| "eval_rouge2_fmeasure": 0.1044, | |
| "eval_rouge2_precision": 0.0695, | |
| "eval_rouge2_recall": 0.2376, | |
| "eval_runtime": 1335.9699, | |
| "eval_samples_per_second": 0.374, | |
| "eval_steps_per_second": 0.037, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.467741935483872e-05, | |
| "loss": 1.2567, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.387096774193548e-05, | |
| "loss": 1.2235, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_loss": 1.4258784055709839, | |
| "eval_rouge2_fmeasure": 0.0988, | |
| "eval_rouge2_precision": 0.0658, | |
| "eval_rouge2_recall": 0.2201, | |
| "eval_runtime": 1320.053, | |
| "eval_samples_per_second": 0.379, | |
| "eval_steps_per_second": 0.038, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.306451612903226e-05, | |
| "loss": 1.1957, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 3.2258064516129034e-05, | |
| "loss": 1.2944, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_loss": 1.3971573114395142, | |
| "eval_rouge2_fmeasure": 0.1008, | |
| "eval_rouge2_precision": 0.0668, | |
| "eval_rouge2_recall": 0.2303, | |
| "eval_runtime": 1291.1344, | |
| "eval_samples_per_second": 0.387, | |
| "eval_steps_per_second": 0.039, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 3.1451612903225806e-05, | |
| "loss": 1.236, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 3.0645161290322585e-05, | |
| "loss": 1.2184, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_loss": 1.4044127464294434, | |
| "eval_rouge2_fmeasure": 0.1045, | |
| "eval_rouge2_precision": 0.0688, | |
| "eval_rouge2_recall": 0.2434, | |
| "eval_runtime": 1297.8926, | |
| "eval_samples_per_second": 0.385, | |
| "eval_steps_per_second": 0.039, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.9838709677419357e-05, | |
| "loss": 1.1964, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 2.9032258064516133e-05, | |
| "loss": 1.2579, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_loss": 1.3979231119155884, | |
| "eval_rouge2_fmeasure": 0.102, | |
| "eval_rouge2_precision": 0.0678, | |
| "eval_rouge2_recall": 0.2317, | |
| "eval_runtime": 1320.6473, | |
| "eval_samples_per_second": 0.379, | |
| "eval_steps_per_second": 0.038, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 2.822580645161291e-05, | |
| "loss": 1.2123, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 2.7419354838709678e-05, | |
| "loss": 1.1875, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_loss": 1.431972861289978, | |
| "eval_rouge2_fmeasure": 0.104, | |
| "eval_rouge2_precision": 0.0694, | |
| "eval_rouge2_recall": 0.2333, | |
| "eval_runtime": 1301.699, | |
| "eval_samples_per_second": 0.384, | |
| "eval_steps_per_second": 0.038, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.661290322580645e-05, | |
| "loss": 1.2311, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.5806451612903226e-05, | |
| "loss": 1.2116, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_loss": 1.417688012123108, | |
| "eval_rouge2_fmeasure": 0.1034, | |
| "eval_rouge2_precision": 0.069, | |
| "eval_rouge2_recall": 0.2334, | |
| "eval_runtime": 1305.2333, | |
| "eval_samples_per_second": 0.383, | |
| "eval_steps_per_second": 0.038, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.2515, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 2.4193548387096777e-05, | |
| "loss": 1.3298, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_loss": 1.4143891334533691, | |
| "eval_rouge2_fmeasure": 0.105, | |
| "eval_rouge2_precision": 0.0705, | |
| "eval_rouge2_recall": 0.2332, | |
| "eval_runtime": 1300.2169, | |
| "eval_samples_per_second": 0.385, | |
| "eval_steps_per_second": 0.038, | |
| "step": 160 | |
| } | |
| ], | |
| "max_steps": 310, | |
| "num_train_epochs": 2, | |
| "total_flos": 6.944926835146752e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |