| { | |
| "best_metric": 1.6076909303665161, | |
| "best_model_checkpoint": "output/25-17/checkpoint-1440", | |
| "epoch": 9.0, | |
| "global_step": 1440, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.0001368696722497127, | |
| "loss": 2.507, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0001358818702356616, | |
| "loss": 2.2788, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00013424610703122953, | |
| "loss": 2.1702, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00013197813593027427, | |
| "loss": 2.0479, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00012909979873429716, | |
| "loss": 2.0181, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001256388154039546, | |
| "loss": 2.0362, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00012162851710068375, | |
| "loss": 2.1176, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00011710752518939715, | |
| "loss": 1.9178, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00011211937929362608, | |
| "loss": 2.0539, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00010671211798514472, | |
| "loss": 1.9318, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00010093781614626346, | |
| "loss": 2.0317, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 9.485208346024516e-05, | |
| "loss": 1.9618, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 8.851352885965611e-05, | |
| "loss": 1.9205, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 8.19831960903064e-05, | |
| "loss": 1.8521, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 7.532397582660788e-05, | |
| "loss": 1.8699, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 6.86e-05, | |
| "loss": 1.9852, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 6.187602417339214e-05, | |
| "loss": 1.8576, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 5.52168039096936e-05, | |
| "loss": 1.93, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 4.8686471140343896e-05, | |
| "loss": 2.0017, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 4.2347916539754844e-05, | |
| "loss": 1.9962, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 3.6262183853736556e-05, | |
| "loss": 1.9731, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 3.0487882014855305e-05, | |
| "loss": 1.9197, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 2.5080620706373927e-05, | |
| "loss": 1.8595, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 2.0092474810602843e-05, | |
| "loss": 1.925, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 1.557148289931624e-05, | |
| "loss": 1.9259, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 1.1561184596045389e-05, | |
| "loss": 1.8582, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 8.100201265702852e-06, | |
| "loss": 1.8858, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "learning_rate": 5.22186406972573e-06, | |
| "loss": 1.8995, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "learning_rate": 2.953892968770475e-06, | |
| "loss": 1.9137, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.3181297643383925e-06, | |
| "loss": 1.8872, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 3.303277502872983e-07, | |
| "loss": 1.8137, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.9111, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 1.8344975709915161, | |
| "eval_runtime": 11.0703, | |
| "eval_samples_per_second": 22.583, | |
| "eval_steps_per_second": 2.891, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 3.303277502872907e-07, | |
| "loss": 1.8983, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.3181297643383925e-06, | |
| "loss": 1.8515, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 2.9538929687704672e-06, | |
| "loss": 1.8853, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 5.2218640697257225e-06, | |
| "loss": 1.8512, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "learning_rate": 8.100201265702843e-06, | |
| "loss": 1.8684, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 1.1561184596045382e-05, | |
| "loss": 1.8168, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.5571482899316234e-05, | |
| "loss": 1.876, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.009247481060283e-05, | |
| "loss": 1.8544, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 2.508062070637389e-05, | |
| "loss": 1.7766, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 3.048788201485529e-05, | |
| "loss": 1.9737, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.626218385373654e-05, | |
| "loss": 1.8173, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 4.234791653975481e-05, | |
| "loss": 1.7998, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 4.8686471140343875e-05, | |
| "loss": 1.8463, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 5.5216803909693576e-05, | |
| "loss": 1.8385, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 6.187602417339216e-05, | |
| "loss": 1.8556, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 6.859999999999999e-05, | |
| "loss": 1.7617, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 7.532397582660782e-05, | |
| "loss": 1.7911, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 8.19831960903064e-05, | |
| "loss": 1.7528, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 8.85135288596561e-05, | |
| "loss": 1.8805, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 9.485208346024518e-05, | |
| "loss": 1.9808, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.00010093781614626343, | |
| "loss": 1.8408, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 0.00010671211798514468, | |
| "loss": 1.9133, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.00011211937929362609, | |
| "loss": 1.8695, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.00011710752518939715, | |
| "loss": 1.8373, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00012162851710068373, | |
| "loss": 1.7998, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.0001256388154039546, | |
| "loss": 1.9473, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.00012909979873429716, | |
| "loss": 1.8208, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 0.00013197813593027427, | |
| "loss": 1.8304, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00013424610703122953, | |
| "loss": 1.7997, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.0001358818702356616, | |
| "loss": 1.8045, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 0.0001368696722497127, | |
| "loss": 1.8645, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 1.8994, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.8014581203460693, | |
| "eval_runtime": 11.0626, | |
| "eval_samples_per_second": 22.599, | |
| "eval_steps_per_second": 2.893, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 0.0001368696722497127, | |
| "loss": 1.7444, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.0001358818702356616, | |
| "loss": 1.8609, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 0.00013424610703122953, | |
| "loss": 1.6945, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 0.00013197813593027427, | |
| "loss": 1.7166, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.00012909979873429716, | |
| "loss": 1.738, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "learning_rate": 0.0001256388154039546, | |
| "loss": 1.8311, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "learning_rate": 0.00012162851710068375, | |
| "loss": 1.7936, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 0.00011710752518939717, | |
| "loss": 1.7759, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 0.00011211937929362612, | |
| "loss": 1.8082, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 0.00010671211798514472, | |
| "loss": 1.7213, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.00010093781614626346, | |
| "loss": 1.7968, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 9.48520834602452e-05, | |
| "loss": 1.8208, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "learning_rate": 8.851352885965613e-05, | |
| "loss": 1.6572, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 8.198319609030645e-05, | |
| "loss": 1.8122, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 7.532397582660786e-05, | |
| "loss": 1.7665, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 6.860000000000001e-05, | |
| "loss": 1.8777, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 6.187602417339219e-05, | |
| "loss": 1.6834, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "learning_rate": 5.5216803909693664e-05, | |
| "loss": 1.7483, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "learning_rate": 4.868647114034385e-05, | |
| "loss": 1.6818, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 4.234791653975484e-05, | |
| "loss": 1.7057, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 3.6262183853736576e-05, | |
| "loss": 1.8172, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.0487882014855322e-05, | |
| "loss": 1.7146, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 2.5080620706373965e-05, | |
| "loss": 1.6912, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "learning_rate": 2.0092474810602897e-05, | |
| "loss": 1.7294, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 1.5571482899316234e-05, | |
| "loss": 1.7523, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.1561184596045404e-05, | |
| "loss": 1.768, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 8.100201265702858e-06, | |
| "loss": 1.6234, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 5.221864069725753e-06, | |
| "loss": 1.7473, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 2.9538929687704977e-06, | |
| "loss": 1.8052, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 2.94, | |
| "learning_rate": 1.318129764338385e-06, | |
| "loss": 1.6156, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 3.303277502872907e-07, | |
| "loss": 1.6744, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.7333, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.7123054265975952, | |
| "eval_runtime": 11.0761, | |
| "eval_samples_per_second": 22.571, | |
| "eval_steps_per_second": 2.889, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 3.303277502872907e-07, | |
| "loss": 1.6392, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 1.3181297643383773e-06, | |
| "loss": 1.6924, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 3.09, | |
| "learning_rate": 2.9538929687704825e-06, | |
| "loss": 1.667, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 5.22186406972573e-06, | |
| "loss": 1.6132, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 8.100201265702836e-06, | |
| "loss": 1.6915, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 1.1561184596045374e-05, | |
| "loss": 1.6571, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 3.22, | |
| "learning_rate": 1.5571482899316204e-05, | |
| "loss": 1.5234, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 2.0092474810602867e-05, | |
| "loss": 1.6652, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 3.28, | |
| "learning_rate": 2.5080620706373927e-05, | |
| "loss": 1.6542, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 3.31, | |
| "learning_rate": 3.0487882014855275e-05, | |
| "loss": 1.6505, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 3.626218385373653e-05, | |
| "loss": 1.6654, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 4.234791653975479e-05, | |
| "loss": 1.8283, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 4.868647114034381e-05, | |
| "loss": 1.6716, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 5.5216803909693624e-05, | |
| "loss": 1.7675, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 6.187602417339214e-05, | |
| "loss": 1.6728, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 6.859999999999997e-05, | |
| "loss": 1.6103, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 7.53239758266078e-05, | |
| "loss": 1.6445, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 8.198319609030632e-05, | |
| "loss": 1.6708, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 8.851352885965614e-05, | |
| "loss": 1.7189, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 3.62, | |
| "learning_rate": 9.485208346024516e-05, | |
| "loss": 1.7617, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 0.00010093781614626343, | |
| "loss": 1.6343, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 3.69, | |
| "learning_rate": 0.00010671211798514468, | |
| "loss": 1.7267, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 0.00011211937929362604, | |
| "loss": 1.6865, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 0.0001171075251893971, | |
| "loss": 1.6896, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 0.00012162851710068375, | |
| "loss": 1.716, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 0.0001256388154039546, | |
| "loss": 1.7558, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.84, | |
| "learning_rate": 0.00012909979873429713, | |
| "loss": 1.749, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 3.88, | |
| "learning_rate": 0.00013197813593027427, | |
| "loss": 1.8024, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 0.0001342461070312295, | |
| "loss": 1.6423, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 0.0001358818702356616, | |
| "loss": 1.713, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 0.0001368696722497127, | |
| "loss": 1.7649, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 1.5134, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.7136940956115723, | |
| "eval_runtime": 11.1178, | |
| "eval_samples_per_second": 22.487, | |
| "eval_steps_per_second": 2.878, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 4.03, | |
| "learning_rate": 0.0001368696722497127, | |
| "loss": 1.6853, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 4.06, | |
| "learning_rate": 0.00013588187023566163, | |
| "loss": 1.6863, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 4.09, | |
| "learning_rate": 0.00013424610703122953, | |
| "loss": 1.7064, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 4.12, | |
| "learning_rate": 0.00013197813593027427, | |
| "loss": 1.6835, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 0.0001290997987342972, | |
| "loss": 1.571, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 4.19, | |
| "learning_rate": 0.00012563881540395464, | |
| "loss": 1.5507, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 4.22, | |
| "learning_rate": 0.00012162851710068381, | |
| "loss": 1.5875, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 4.25, | |
| "learning_rate": 0.00011710752518939714, | |
| "loss": 1.705, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 4.28, | |
| "learning_rate": 0.00011211937929362608, | |
| "loss": 1.6864, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 0.00010671211798514474, | |
| "loss": 1.6228, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 4.34, | |
| "learning_rate": 0.00010093781614626347, | |
| "loss": 1.6698, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 9.485208346024522e-05, | |
| "loss": 1.4768, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 4.41, | |
| "learning_rate": 8.851352885965621e-05, | |
| "loss": 1.636, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 8.198319609030639e-05, | |
| "loss": 1.6327, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 4.47, | |
| "learning_rate": 7.532397582660788e-05, | |
| "loss": 1.6404, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 6.860000000000003e-05, | |
| "loss": 1.7279, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 4.53, | |
| "learning_rate": 6.18760241733922e-05, | |
| "loss": 1.5857, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 4.56, | |
| "learning_rate": 5.5216803909693685e-05, | |
| "loss": 1.5979, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 4.8686471140343875e-05, | |
| "loss": 1.5906, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 4.234791653975485e-05, | |
| "loss": 1.6215, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 3.626218385373659e-05, | |
| "loss": 1.6579, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 3.0487882014855336e-05, | |
| "loss": 1.6602, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 4.72, | |
| "learning_rate": 2.508062070637398e-05, | |
| "loss": 1.6282, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 4.75, | |
| "learning_rate": 2.0092474810602914e-05, | |
| "loss": 1.652, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 1.557148289931624e-05, | |
| "loss": 1.5612, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 1.1561184596045413e-05, | |
| "loss": 1.6313, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 8.100201265702867e-06, | |
| "loss": 1.5685, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 5.221864069725753e-06, | |
| "loss": 1.6143, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.91, | |
| "learning_rate": 2.9538929687704977e-06, | |
| "loss": 1.5863, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 4.94, | |
| "learning_rate": 1.3181297643383925e-06, | |
| "loss": 1.4505, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 3.303277502872983e-07, | |
| "loss": 1.6715, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.5892, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_loss": 1.6500141620635986, | |
| "eval_runtime": 11.0924, | |
| "eval_samples_per_second": 22.538, | |
| "eval_steps_per_second": 2.885, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 5.03, | |
| "learning_rate": 3.303277502872907e-07, | |
| "loss": 1.4254, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 1.3181297643383773e-06, | |
| "loss": 1.5428, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 2.9538929687704367e-06, | |
| "loss": 1.5335, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 5.12, | |
| "learning_rate": 5.221864069725677e-06, | |
| "loss": 1.5308, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 5.16, | |
| "learning_rate": 8.100201265702767e-06, | |
| "loss": 1.5882, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 5.19, | |
| "learning_rate": 1.1561184596045435e-05, | |
| "loss": 1.5799, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 1.5571482899316272e-05, | |
| "loss": 1.5356, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 2.0092474810602853e-05, | |
| "loss": 1.5285, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 5.28, | |
| "learning_rate": 2.5080620706373914e-05, | |
| "loss": 1.5356, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 5.31, | |
| "learning_rate": 3.048788201485526e-05, | |
| "loss": 1.5041, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 5.34, | |
| "learning_rate": 3.6262183853736515e-05, | |
| "loss": 1.5689, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 5.38, | |
| "learning_rate": 4.2347916539754777e-05, | |
| "loss": 1.4609, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 5.41, | |
| "learning_rate": 4.868647114034379e-05, | |
| "loss": 1.6113, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 5.44, | |
| "learning_rate": 5.521680390969348e-05, | |
| "loss": 1.6169, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 5.47, | |
| "learning_rate": 6.1876024173392e-05, | |
| "loss": 1.6431, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 6.859999999999984e-05, | |
| "loss": 1.5976, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 5.53, | |
| "learning_rate": 7.532397582660791e-05, | |
| "loss": 1.5969, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 5.56, | |
| "learning_rate": 8.198319609030643e-05, | |
| "loss": 1.5681, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 5.59, | |
| "learning_rate": 8.851352885965613e-05, | |
| "loss": 1.6318, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 5.62, | |
| "learning_rate": 9.485208346024515e-05, | |
| "loss": 1.5222, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 0.00010093781614626339, | |
| "loss": 1.5517, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 0.00010671211798514466, | |
| "loss": 1.5481, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 5.72, | |
| "learning_rate": 0.00011211937929362601, | |
| "loss": 1.6196, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 5.75, | |
| "learning_rate": 0.00011710752518939709, | |
| "loss": 1.488, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 5.78, | |
| "learning_rate": 0.00012162851710068368, | |
| "loss": 1.6155, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 5.81, | |
| "learning_rate": 0.00012563881540395453, | |
| "loss": 1.628, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 0.0001290997987342972, | |
| "loss": 1.6143, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 5.88, | |
| "learning_rate": 0.0001319781359302743, | |
| "loss": 1.6028, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 5.91, | |
| "learning_rate": 0.00013424610703122953, | |
| "loss": 1.4802, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 5.94, | |
| "learning_rate": 0.0001358818702356616, | |
| "loss": 1.6464, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.97, | |
| "learning_rate": 0.0001368696722497127, | |
| "loss": 1.6216, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 1.5782, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_loss": 1.6669092178344727, | |
| "eval_runtime": 11.103, | |
| "eval_samples_per_second": 22.517, | |
| "eval_steps_per_second": 2.882, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 0.00013686967224971273, | |
| "loss": 1.4996, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 0.00013588187023566163, | |
| "loss": 1.5676, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 0.00013424610703122958, | |
| "loss": 1.4658, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 0.00013197813593027432, | |
| "loss": 1.4962, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 6.16, | |
| "learning_rate": 0.00012909979873429724, | |
| "loss": 1.5933, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 6.19, | |
| "learning_rate": 0.00012563881540395458, | |
| "loss": 1.4505, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 0.00012162851710068373, | |
| "loss": 1.5865, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 6.25, | |
| "learning_rate": 0.00011710752518939715, | |
| "loss": 1.665, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 6.28, | |
| "learning_rate": 0.00011211937929362609, | |
| "loss": 1.556, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 6.31, | |
| "learning_rate": 0.00010671211798514474, | |
| "loss": 1.5632, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 6.34, | |
| "learning_rate": 0.00010093781614626349, | |
| "loss": 1.5849, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 6.38, | |
| "learning_rate": 9.485208346024524e-05, | |
| "loss": 1.5667, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 8.851352885965622e-05, | |
| "loss": 1.4537, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 8.198319609030653e-05, | |
| "loss": 1.4866, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 6.47, | |
| "learning_rate": 7.532397582660802e-05, | |
| "loss": 1.4058, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 6.859999999999993e-05, | |
| "loss": 1.4442, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 6.53, | |
| "learning_rate": 6.18760241733921e-05, | |
| "loss": 1.4465, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 6.56, | |
| "learning_rate": 5.5216803909693576e-05, | |
| "loss": 1.5129, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 6.59, | |
| "learning_rate": 4.868647114034389e-05, | |
| "loss": 1.4484, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 6.62, | |
| "learning_rate": 4.2347916539754865e-05, | |
| "loss": 1.6054, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 6.66, | |
| "learning_rate": 3.62621838537366e-05, | |
| "loss": 1.6134, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 6.69, | |
| "learning_rate": 3.0487882014855342e-05, | |
| "loss": 1.4736, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 6.72, | |
| "learning_rate": 2.5080620706373995e-05, | |
| "loss": 1.5124, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 6.75, | |
| "learning_rate": 2.009247481060292e-05, | |
| "loss": 1.528, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 6.78, | |
| "learning_rate": 1.5571482899316333e-05, | |
| "loss": 1.602, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 6.81, | |
| "learning_rate": 1.1561184596045489e-05, | |
| "loss": 1.5229, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 6.84, | |
| "learning_rate": 8.100201265702821e-06, | |
| "loss": 1.5871, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 6.88, | |
| "learning_rate": 5.221864069725715e-06, | |
| "loss": 1.5468, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 6.91, | |
| "learning_rate": 2.9538929687704672e-06, | |
| "loss": 1.4325, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 6.94, | |
| "learning_rate": 1.3181297643383925e-06, | |
| "loss": 1.5777, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 6.97, | |
| "learning_rate": 3.303277502872983e-07, | |
| "loss": 1.5088, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.4601, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_loss": 1.6244831085205078, | |
| "eval_runtime": 11.094, | |
| "eval_samples_per_second": 22.535, | |
| "eval_steps_per_second": 2.884, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 7.03, | |
| "learning_rate": 3.303277502872831e-07, | |
| "loss": 1.487, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 7.06, | |
| "learning_rate": 1.3181297643383697e-06, | |
| "loss": 1.4971, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 7.09, | |
| "learning_rate": 2.9538929687704367e-06, | |
| "loss": 1.4361, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 7.12, | |
| "learning_rate": 5.221864069725669e-06, | |
| "loss": 1.4751, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 7.16, | |
| "learning_rate": 8.10020126570276e-06, | |
| "loss": 1.3822, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 7.19, | |
| "learning_rate": 1.156118459604542e-05, | |
| "loss": 1.4882, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 7.22, | |
| "learning_rate": 1.5571482899316258e-05, | |
| "loss": 1.4465, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 7.25, | |
| "learning_rate": 2.0092474810602836e-05, | |
| "loss": 1.3924, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 2.5080620706373904e-05, | |
| "loss": 1.4828, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 7.31, | |
| "learning_rate": 3.0487882014855254e-05, | |
| "loss": 1.4428, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 7.34, | |
| "learning_rate": 3.6262183853736495e-05, | |
| "loss": 1.6162, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 4.234791653975476e-05, | |
| "loss": 1.368, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 7.41, | |
| "learning_rate": 4.868647114034377e-05, | |
| "loss": 1.4922, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 7.44, | |
| "learning_rate": 5.521680390969347e-05, | |
| "loss": 1.4195, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 7.47, | |
| "learning_rate": 6.187602417339198e-05, | |
| "loss": 1.4896, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 6.859999999999982e-05, | |
| "loss": 1.4594, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 7.53, | |
| "learning_rate": 7.53239758266079e-05, | |
| "loss": 1.4228, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 8.19831960903064e-05, | |
| "loss": 1.4571, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 8.851352885965611e-05, | |
| "loss": 1.4634, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 9.485208346024514e-05, | |
| "loss": 1.5054, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 0.00010093781614626339, | |
| "loss": 1.4162, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 7.69, | |
| "learning_rate": 0.00010671211798514465, | |
| "loss": 1.5479, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 0.00011211937929362601, | |
| "loss": 1.4107, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 0.00011710752518939706, | |
| "loss": 1.4738, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 7.78, | |
| "learning_rate": 0.00012162851710068366, | |
| "loss": 1.4738, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 7.81, | |
| "learning_rate": 0.0001256388154039545, | |
| "loss": 1.497, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 7.84, | |
| "learning_rate": 0.0001290997987342972, | |
| "loss": 1.5329, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 7.88, | |
| "learning_rate": 0.0001319781359302743, | |
| "loss": 1.4608, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 7.91, | |
| "learning_rate": 0.00013424610703122953, | |
| "loss": 1.5795, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 0.0001358818702356616, | |
| "loss": 1.5419, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 7.97, | |
| "learning_rate": 0.0001368696722497127, | |
| "loss": 1.4443, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.0001372, | |
| "loss": 1.4799, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_loss": 1.6402026414871216, | |
| "eval_runtime": 11.1984, | |
| "eval_samples_per_second": 22.325, | |
| "eval_steps_per_second": 2.858, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 8.03, | |
| "learning_rate": 0.00013686967224971273, | |
| "loss": 1.4584, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 8.06, | |
| "learning_rate": 0.00013588187023566163, | |
| "loss": 1.4089, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 8.09, | |
| "learning_rate": 0.00013424610703122958, | |
| "loss": 1.55, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 8.12, | |
| "learning_rate": 0.00013197813593027432, | |
| "loss": 1.4449, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 8.16, | |
| "learning_rate": 0.00012909979873429724, | |
| "loss": 1.367, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 8.19, | |
| "learning_rate": 0.00012563881540395458, | |
| "loss": 1.5241, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 0.00012162851710068375, | |
| "loss": 1.5117, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 8.25, | |
| "learning_rate": 0.00011710752518939715, | |
| "loss": 1.4418, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 8.28, | |
| "learning_rate": 0.00011211937929362609, | |
| "loss": 1.5439, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 8.31, | |
| "learning_rate": 0.00010671211798514476, | |
| "loss": 1.3722, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 8.34, | |
| "learning_rate": 0.0001009378161462635, | |
| "loss": 1.4284, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 8.38, | |
| "learning_rate": 9.485208346024526e-05, | |
| "loss": 1.516, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 8.851352885965623e-05, | |
| "loss": 1.3386, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 8.44, | |
| "learning_rate": 8.198319609030655e-05, | |
| "loss": 1.4559, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 8.47, | |
| "learning_rate": 7.532397582660803e-05, | |
| "loss": 1.6084, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 6.859999999999995e-05, | |
| "loss": 1.4138, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 6.187602417339212e-05, | |
| "loss": 1.3939, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 8.56, | |
| "learning_rate": 5.5216803909693596e-05, | |
| "loss": 1.4058, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 4.86864711403439e-05, | |
| "loss": 1.4439, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 8.62, | |
| "learning_rate": 4.2347916539754885e-05, | |
| "loss": 1.345, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 8.66, | |
| "learning_rate": 3.626218385373662e-05, | |
| "loss": 1.4172, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 3.048788201485536e-05, | |
| "loss": 1.4024, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 8.72, | |
| "learning_rate": 2.5080620706374002e-05, | |
| "loss": 1.4443, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 8.75, | |
| "learning_rate": 2.0092474810602934e-05, | |
| "loss": 1.4758, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 8.78, | |
| "learning_rate": 1.5571482899316343e-05, | |
| "loss": 1.4565, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 8.81, | |
| "learning_rate": 1.1561184596045496e-05, | |
| "loss": 1.3571, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 8.84, | |
| "learning_rate": 8.100201265702828e-06, | |
| "loss": 1.478, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 5.2218640697257225e-06, | |
| "loss": 1.452, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 2.953892968770475e-06, | |
| "loss": 1.3222, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 8.94, | |
| "learning_rate": 1.3181297643384001e-06, | |
| "loss": 1.426, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 3.303277502872983e-07, | |
| "loss": 1.4787, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 0.0, | |
| "loss": 1.353, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_loss": 1.6076909303665161, | |
| "eval_runtime": 11.1829, | |
| "eval_samples_per_second": 22.355, | |
| "eval_steps_per_second": 2.862, | |
| "step": 1440 | |
| } | |
| ], | |
| "max_steps": 1600, | |
| "num_train_epochs": 10, | |
| "total_flos": 1505042104320000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |