| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9997382884061764, | |
| "eval_steps": 100, | |
| "global_step": 955, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "grad_norm": 0.59375, | |
| "learning_rate": 5.208333333333333e-08, | |
| "logits/chosen": -2.1666858196258545, | |
| "logits/rejected": -2.182244300842285, | |
| "logps/chosen": -12.368609428405762, | |
| "logps/rejected": -24.687644958496094, | |
| "loss": 0.6931, | |
| "pred_label": 0.0, | |
| "rewards/accuracies": 0.0, | |
| "rewards/chosen": 0.0, | |
| "rewards/margins": 0.0, | |
| "rewards/rejected": 0.0, | |
| "step": 1, | |
| "use_label": 10.0 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "grad_norm": 0.6796875, | |
| "learning_rate": 1.0416666666666667e-06, | |
| "logits/chosen": -2.227864980697632, | |
| "logits/rejected": -2.276106834411621, | |
| "logps/chosen": -57.02927780151367, | |
| "logps/rejected": -66.8729019165039, | |
| "loss": 0.6927, | |
| "pred_label": 0.0, | |
| "rewards/accuracies": 0.24671052396297455, | |
| "rewards/chosen": 0.003993770573288202, | |
| "rewards/margins": 0.0009077258291654289, | |
| "rewards/rejected": 0.003086044918745756, | |
| "step": 20, | |
| "use_label": 170.0 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 2.0833333333333334e-06, | |
| "logits/chosen": -2.2728817462921143, | |
| "logits/rejected": -2.261592388153076, | |
| "logps/chosen": -54.7827033996582, | |
| "logps/rejected": -67.2376708984375, | |
| "loss": 0.6915, | |
| "pred_label": 0.0, | |
| "rewards/accuracies": 0.24687500298023224, | |
| "rewards/chosen": 0.017464743927121162, | |
| "rewards/margins": 0.0016005486249923706, | |
| "rewards/rejected": 0.015864195302128792, | |
| "step": 40, | |
| "use_label": 482.0 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "grad_norm": 0.625, | |
| "learning_rate": 3.125e-06, | |
| "logits/chosen": -2.321017026901245, | |
| "logits/rejected": -2.318946123123169, | |
| "logps/chosen": -75.58020782470703, | |
| "logps/rejected": -87.66261291503906, | |
| "loss": 0.6905, | |
| "pred_label": 4.987500190734863, | |
| "rewards/accuracies": 0.3187499940395355, | |
| "rewards/chosen": 0.031644098460674286, | |
| "rewards/margins": 0.009459299966692924, | |
| "rewards/rejected": 0.02218480221927166, | |
| "step": 60, | |
| "use_label": 797.0125122070312 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "grad_norm": 0.57421875, | |
| "learning_rate": 4.166666666666667e-06, | |
| "logits/chosen": -2.2973294258117676, | |
| "logits/rejected": -2.2655692100524902, | |
| "logps/chosen": -77.97566223144531, | |
| "logps/rejected": -81.31121826171875, | |
| "loss": 0.6909, | |
| "pred_label": 29.850000381469727, | |
| "rewards/accuracies": 0.3343749940395355, | |
| "rewards/chosen": 0.02917659282684326, | |
| "rewards/margins": 0.014682939276099205, | |
| "rewards/rejected": 0.014493651688098907, | |
| "step": 80, | |
| "use_label": 1092.1500244140625 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "grad_norm": 0.625, | |
| "learning_rate": 4.9997324926814375e-06, | |
| "logits/chosen": -2.210732936859131, | |
| "logits/rejected": -2.2176434993743896, | |
| "logps/chosen": -66.4733657836914, | |
| "logps/rejected": -74.55338287353516, | |
| "loss": 0.6911, | |
| "pred_label": 68.07499694824219, | |
| "rewards/accuracies": 0.33125001192092896, | |
| "rewards/chosen": 0.013588580302894115, | |
| "rewards/margins": 0.026675995439291, | |
| "rewards/rejected": -0.01308741606771946, | |
| "step": 100, | |
| "use_label": 1373.925048828125 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_logits/chosen": -2.120985984802246, | |
| "eval_logits/rejected": -2.093513250350952, | |
| "eval_logps/chosen": -69.42622375488281, | |
| "eval_logps/rejected": -78.9540786743164, | |
| "eval_loss": 0.691917359828949, | |
| "eval_pred_label": 150.14285278320312, | |
| "eval_rewards/accuracies": 0.3392857015132904, | |
| "eval_rewards/chosen": -0.005256766453385353, | |
| "eval_rewards/margins": 0.030303288251161575, | |
| "eval_rewards/rejected": -0.03556005656719208, | |
| "eval_runtime": 247.9513, | |
| "eval_samples_per_second": 8.066, | |
| "eval_steps_per_second": 0.254, | |
| "eval_use_label": 1705.857177734375, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "grad_norm": 0.53125, | |
| "learning_rate": 4.9903757462135984e-06, | |
| "logits/chosen": -2.24790620803833, | |
| "logits/rejected": -2.1782658100128174, | |
| "logps/chosen": -67.23531341552734, | |
| "logps/rejected": -80.04717254638672, | |
| "loss": 0.6914, | |
| "pred_label": 243.0, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": 0.00682856747880578, | |
| "rewards/margins": 0.03467796370387077, | |
| "rewards/rejected": -0.02784939482808113, | |
| "step": 120, | |
| "use_label": 2023.0 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "grad_norm": 0.341796875, | |
| "learning_rate": 4.967700826904229e-06, | |
| "logits/chosen": -2.1205825805664062, | |
| "logits/rejected": -2.150360584259033, | |
| "logps/chosen": -58.376564025878906, | |
| "logps/rejected": -71.84730529785156, | |
| "loss": 0.6917, | |
| "pred_label": 304.9125061035156, | |
| "rewards/accuracies": 0.2874999940395355, | |
| "rewards/chosen": 0.0049698068760335445, | |
| "rewards/margins": 0.025117725133895874, | |
| "rewards/rejected": -0.02014791965484619, | |
| "step": 140, | |
| "use_label": 2281.08740234375 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 4.931828996974498e-06, | |
| "logits/chosen": -2.1872148513793945, | |
| "logits/rejected": -2.1555256843566895, | |
| "logps/chosen": -66.3367919921875, | |
| "logps/rejected": -69.24983215332031, | |
| "loss": 0.6918, | |
| "pred_label": 371.5375061035156, | |
| "rewards/accuracies": 0.3218750059604645, | |
| "rewards/chosen": -0.017877796664834023, | |
| "rewards/margins": 0.03909943252801895, | |
| "rewards/rejected": -0.05697723478078842, | |
| "step": 160, | |
| "use_label": 2534.46240234375 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "grad_norm": 0.4921875, | |
| "learning_rate": 4.882952093833628e-06, | |
| "logits/chosen": -2.1010584831237793, | |
| "logits/rejected": -2.112929582595825, | |
| "logps/chosen": -67.18075561523438, | |
| "logps/rejected": -77.23786163330078, | |
| "loss": 0.6925, | |
| "pred_label": 444.2124938964844, | |
| "rewards/accuracies": 0.3031249940395355, | |
| "rewards/chosen": -0.03951570764183998, | |
| "rewards/margins": 0.03356783464550972, | |
| "rewards/rejected": -0.0730835422873497, | |
| "step": 180, | |
| "use_label": 2781.78759765625 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "grad_norm": 0.546875, | |
| "learning_rate": 4.821331504159906e-06, | |
| "logits/chosen": -2.181281805038452, | |
| "logits/rejected": -2.155298948287964, | |
| "logps/chosen": -78.88096618652344, | |
| "logps/rejected": -77.27136993408203, | |
| "loss": 0.692, | |
| "pred_label": 513.2125244140625, | |
| "rewards/accuracies": 0.3812499940395355, | |
| "rewards/chosen": -0.019123973324894905, | |
| "rewards/margins": 0.040298379957675934, | |
| "rewards/rejected": -0.05942235141992569, | |
| "step": 200, | |
| "use_label": 3032.78759765625 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_logits/chosen": -2.1267549991607666, | |
| "eval_logits/rejected": -2.1057066917419434, | |
| "eval_logps/chosen": -71.54093170166016, | |
| "eval_logps/rejected": -82.35039520263672, | |
| "eval_loss": 0.6926834583282471, | |
| "eval_pred_label": 622.952392578125, | |
| "eval_rewards/accuracies": 0.3432539701461792, | |
| "eval_rewards/chosen": -0.026403911411762238, | |
| "eval_rewards/margins": 0.043119337409734726, | |
| "eval_rewards/rejected": -0.06952324509620667, | |
| "eval_runtime": 248.2687, | |
| "eval_samples_per_second": 8.056, | |
| "eval_steps_per_second": 0.254, | |
| "eval_use_label": 3337.047607421875, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "grad_norm": 0.609375, | |
| "learning_rate": 4.747296766042161e-06, | |
| "logits/chosen": -2.2548727989196777, | |
| "logits/rejected": -2.2427258491516113, | |
| "logps/chosen": -74.4991683959961, | |
| "logps/rejected": -75.8321762084961, | |
| "loss": 0.6924, | |
| "pred_label": 738.5, | |
| "rewards/accuracies": 0.3531250059604645, | |
| "rewards/chosen": -0.024670986458659172, | |
| "rewards/margins": 0.04779377579689026, | |
| "rewards/rejected": -0.07246476411819458, | |
| "step": 220, | |
| "use_label": 3631.5 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 0.46875, | |
| "learning_rate": 4.661243806657256e-06, | |
| "logits/chosen": -2.2358717918395996, | |
| "logits/rejected": -2.216477870941162, | |
| "logps/chosen": -72.57451629638672, | |
| "logps/rejected": -79.20014953613281, | |
| "loss": 0.6921, | |
| "pred_label": 830.7750244140625, | |
| "rewards/accuracies": 0.3499999940395355, | |
| "rewards/chosen": -0.013481785543262959, | |
| "rewards/margins": 0.0440317802131176, | |
| "rewards/rejected": -0.05751357227563858, | |
| "step": 240, | |
| "use_label": 3859.22509765625 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "grad_norm": 0.5390625, | |
| "learning_rate": 4.563632824908252e-06, | |
| "logits/chosen": -2.204738140106201, | |
| "logits/rejected": -2.2045350074768066, | |
| "logps/chosen": -64.52825164794922, | |
| "logps/rejected": -74.71345520019531, | |
| "loss": 0.6919, | |
| "pred_label": 912.1624755859375, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -0.01493888907134533, | |
| "rewards/margins": 0.038629818707704544, | |
| "rewards/rejected": -0.05356870964169502, | |
| "step": 260, | |
| "use_label": 4097.83740234375 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "grad_norm": 0.431640625, | |
| "learning_rate": 4.454985830346574e-06, | |
| "logits/chosen": -2.224844455718994, | |
| "logits/rejected": -2.247999668121338, | |
| "logps/chosen": -72.3452377319336, | |
| "logps/rejected": -75.01800537109375, | |
| "loss": 0.6916, | |
| "pred_label": 993.7874755859375, | |
| "rewards/accuracies": 0.3125, | |
| "rewards/chosen": -0.04014473780989647, | |
| "rewards/margins": 0.030534306541085243, | |
| "rewards/rejected": -0.07067903876304626, | |
| "step": 280, | |
| "use_label": 4336.21240234375 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "grad_norm": 0.423828125, | |
| "learning_rate": 4.335883851539693e-06, | |
| "logits/chosen": -2.2155380249023438, | |
| "logits/rejected": -2.2151846885681152, | |
| "logps/chosen": -67.15587615966797, | |
| "logps/rejected": -74.2086181640625, | |
| "loss": 0.6924, | |
| "pred_label": 1083.4625244140625, | |
| "rewards/accuracies": 0.34687501192092896, | |
| "rewards/chosen": -0.020541679114103317, | |
| "rewards/margins": 0.06299655884504318, | |
| "rewards/rejected": -0.0835382491350174, | |
| "step": 300, | |
| "use_label": 4566.53759765625 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_logits/chosen": -2.2169294357299805, | |
| "eval_logits/rejected": -2.1932876110076904, | |
| "eval_logps/chosen": -72.5876693725586, | |
| "eval_logps/rejected": -84.35366821289062, | |
| "eval_loss": 0.6928625702857971, | |
| "eval_pred_label": 1200.2698974609375, | |
| "eval_rewards/accuracies": 0.3392857015132904, | |
| "eval_rewards/chosen": -0.03687124699354172, | |
| "eval_rewards/margins": 0.0526847243309021, | |
| "eval_rewards/rejected": -0.08955597132444382, | |
| "eval_runtime": 247.9119, | |
| "eval_samples_per_second": 8.067, | |
| "eval_steps_per_second": 0.254, | |
| "eval_use_label": 4863.72998046875, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 4.206963828813555e-06, | |
| "logits/chosen": -2.291391134262085, | |
| "logits/rejected": -2.3002986907958984, | |
| "logps/chosen": -68.5405502319336, | |
| "logps/rejected": -83.0180435180664, | |
| "loss": 0.6927, | |
| "pred_label": 1323.074951171875, | |
| "rewards/accuracies": 0.30000001192092896, | |
| "rewards/chosen": -0.07074997574090958, | |
| "rewards/margins": 0.04067195579409599, | |
| "rewards/rejected": -0.11142192780971527, | |
| "step": 320, | |
| "use_label": 5150.9248046875 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "grad_norm": 0.455078125, | |
| "learning_rate": 4.068915207986931e-06, | |
| "logits/chosen": -2.2867865562438965, | |
| "logits/rejected": -2.2617173194885254, | |
| "logps/chosen": -64.90373229980469, | |
| "logps/rejected": -74.42888641357422, | |
| "loss": 0.692, | |
| "pred_label": 1427.7750244140625, | |
| "rewards/accuracies": 0.3531250059604645, | |
| "rewards/chosen": -0.016644436866044998, | |
| "rewards/margins": 0.052551619708538055, | |
| "rewards/rejected": -0.06919606029987335, | |
| "step": 340, | |
| "use_label": 5366.22509765625 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "grad_norm": 0.458984375, | |
| "learning_rate": 3.922476253313921e-06, | |
| "logits/chosen": -2.249298572540283, | |
| "logits/rejected": -2.253566265106201, | |
| "logps/chosen": -68.57295989990234, | |
| "logps/rejected": -73.1113510131836, | |
| "loss": 0.693, | |
| "pred_label": 1522.0999755859375, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.037180084735155106, | |
| "rewards/margins": 0.045733559876680374, | |
| "rewards/rejected": -0.08291363716125488, | |
| "step": 360, | |
| "use_label": 5591.89990234375 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "grad_norm": 0.4453125, | |
| "learning_rate": 3.768430099352445e-06, | |
| "logits/chosen": -2.2458603382110596, | |
| "logits/rejected": -2.2051453590393066, | |
| "logps/chosen": -70.38607788085938, | |
| "logps/rejected": -78.15666198730469, | |
| "loss": 0.6923, | |
| "pred_label": 1625.5374755859375, | |
| "rewards/accuracies": 0.3531250059604645, | |
| "rewards/chosen": -0.03562153875827789, | |
| "rewards/margins": 0.054723359644412994, | |
| "rewards/rejected": -0.09034489840269089, | |
| "step": 380, | |
| "use_label": 5808.46240234375 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "grad_norm": 0.59765625, | |
| "learning_rate": 3.607600562872785e-06, | |
| "logits/chosen": -2.196977138519287, | |
| "logits/rejected": -2.197218656539917, | |
| "logps/chosen": -81.0395736694336, | |
| "logps/rejected": -81.44091033935547, | |
| "loss": 0.6927, | |
| "pred_label": 1725.362548828125, | |
| "rewards/accuracies": 0.32499998807907104, | |
| "rewards/chosen": -0.03092697635293007, | |
| "rewards/margins": 0.049932099878787994, | |
| "rewards/rejected": -0.08085907250642776, | |
| "step": 400, | |
| "use_label": 6028.6376953125 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_logits/chosen": -2.118962526321411, | |
| "eval_logits/rejected": -2.093430995941162, | |
| "eval_logps/chosen": -71.01036071777344, | |
| "eval_logps/rejected": -83.43638610839844, | |
| "eval_loss": 0.6925376653671265, | |
| "eval_pred_label": 1843.920654296875, | |
| "eval_rewards/accuracies": 0.341269850730896, | |
| "eval_rewards/chosen": -0.021098149940371513, | |
| "eval_rewards/margins": 0.05928494408726692, | |
| "eval_rewards/rejected": -0.08038310706615448, | |
| "eval_runtime": 248.0095, | |
| "eval_samples_per_second": 8.064, | |
| "eval_steps_per_second": 0.254, | |
| "eval_use_label": 6324.07958984375, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "grad_norm": 0.30078125, | |
| "learning_rate": 3.4408477372034743e-06, | |
| "logits/chosen": -2.146075487136841, | |
| "logits/rejected": -2.152238607406616, | |
| "logps/chosen": -65.8438720703125, | |
| "logps/rejected": -70.74162292480469, | |
| "loss": 0.692, | |
| "pred_label": 1975.637451171875, | |
| "rewards/accuracies": 0.3531250059604645, | |
| "rewards/chosen": -0.017682421952486038, | |
| "rewards/margins": 0.05984373763203621, | |
| "rewards/rejected": -0.07752615213394165, | |
| "step": 420, | |
| "use_label": 6602.3623046875 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "grad_norm": 0.9296875, | |
| "learning_rate": 3.269063392575352e-06, | |
| "logits/chosen": -2.2523856163024902, | |
| "logits/rejected": -2.2490224838256836, | |
| "logps/chosen": -74.74308013916016, | |
| "logps/rejected": -74.57176208496094, | |
| "loss": 0.6927, | |
| "pred_label": 2072.27490234375, | |
| "rewards/accuracies": 0.3218750059604645, | |
| "rewards/chosen": -0.027858540415763855, | |
| "rewards/margins": 0.05976608395576477, | |
| "rewards/rejected": -0.08762462437152863, | |
| "step": 440, | |
| "use_label": 6825.72509765625 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 0.34375, | |
| "learning_rate": 3.09316620706208e-06, | |
| "logits/chosen": -2.2484962940216064, | |
| "logits/rejected": -2.253873109817505, | |
| "logps/chosen": -68.02134704589844, | |
| "logps/rejected": -73.40286254882812, | |
| "loss": 0.6929, | |
| "pred_label": 2175.53759765625, | |
| "rewards/accuracies": 0.328125, | |
| "rewards/chosen": -0.03111925721168518, | |
| "rewards/margins": 0.06376632302999496, | |
| "rewards/rejected": -0.09488557279109955, | |
| "step": 460, | |
| "use_label": 7042.46240234375 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 0.3984375, | |
| "learning_rate": 2.91409685362137e-06, | |
| "logits/chosen": -2.2359812259674072, | |
| "logits/rejected": -2.2330563068389893, | |
| "logps/chosen": -75.03883361816406, | |
| "logps/rejected": -84.55928039550781, | |
| "loss": 0.6922, | |
| "pred_label": 2276.949951171875, | |
| "rewards/accuracies": 0.3687500059604645, | |
| "rewards/chosen": -0.040116917341947556, | |
| "rewards/margins": 0.0741645023226738, | |
| "rewards/rejected": -0.11428143084049225, | |
| "step": 480, | |
| "use_label": 7261.0498046875 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "grad_norm": 0.5703125, | |
| "learning_rate": 2.7328129695107205e-06, | |
| "logits/chosen": -2.2053210735321045, | |
| "logits/rejected": -2.2094616889953613, | |
| "logps/chosen": -75.30181121826172, | |
| "logps/rejected": -77.61902618408203, | |
| "loss": 0.6924, | |
| "pred_label": 2379.137451171875, | |
| "rewards/accuracies": 0.3656249940395355, | |
| "rewards/chosen": -0.039206866174936295, | |
| "rewards/margins": 0.05418051406741142, | |
| "rewards/rejected": -0.09338738024234772, | |
| "step": 500, | |
| "use_label": 7478.8623046875 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_logits/chosen": -2.176236152648926, | |
| "eval_logits/rejected": -2.151799201965332, | |
| "eval_logps/chosen": -70.96183776855469, | |
| "eval_logps/rejected": -83.7112045288086, | |
| "eval_loss": 0.6929337382316589, | |
| "eval_pred_label": 2499.22216796875, | |
| "eval_rewards/accuracies": 0.3432539701461792, | |
| "eval_rewards/chosen": -0.02061287872493267, | |
| "eval_rewards/margins": 0.06251849234104156, | |
| "eval_rewards/rejected": -0.08313137292861938, | |
| "eval_runtime": 248.0888, | |
| "eval_samples_per_second": 8.062, | |
| "eval_steps_per_second": 0.254, | |
| "eval_use_label": 7772.77783203125, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "grad_norm": 0.61328125, | |
| "learning_rate": 2.5502840349805074e-06, | |
| "logits/chosen": -2.195094347000122, | |
| "logits/rejected": -2.237112045288086, | |
| "logps/chosen": -70.13484954833984, | |
| "logps/rejected": -79.53434753417969, | |
| "loss": 0.692, | |
| "pred_label": 2632.125, | |
| "rewards/accuracies": 0.3656249940395355, | |
| "rewards/chosen": -0.018158430233597755, | |
| "rewards/margins": 0.061979226768016815, | |
| "rewards/rejected": -0.08013766258955002, | |
| "step": 520, | |
| "use_label": 8049.875 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 2.367486188632446e-06, | |
| "logits/chosen": -2.1844329833984375, | |
| "logits/rejected": -2.1980721950531006, | |
| "logps/chosen": -78.40437316894531, | |
| "logps/rejected": -80.49110412597656, | |
| "loss": 0.6925, | |
| "pred_label": 2729.66259765625, | |
| "rewards/accuracies": 0.3656249940395355, | |
| "rewards/chosen": -0.028946753591299057, | |
| "rewards/margins": 0.0717843621969223, | |
| "rewards/rejected": -0.10073111951351166, | |
| "step": 540, | |
| "use_label": 8272.337890625 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "grad_norm": 0.48828125, | |
| "learning_rate": 2.1853970071701415e-06, | |
| "logits/chosen": -2.19417667388916, | |
| "logits/rejected": -2.1900599002838135, | |
| "logps/chosen": -73.69783020019531, | |
| "logps/rejected": -72.62937927246094, | |
| "loss": 0.6926, | |
| "pred_label": 2827.875, | |
| "rewards/accuracies": 0.2874999940395355, | |
| "rewards/chosen": -0.04889845848083496, | |
| "rewards/margins": 0.04425561800599098, | |
| "rewards/rejected": -0.09315408021211624, | |
| "step": 560, | |
| "use_label": 8494.125 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "grad_norm": 0.328125, | |
| "learning_rate": 2.00499027745888e-06, | |
| "logits/chosen": -2.224670171737671, | |
| "logits/rejected": -2.230435371398926, | |
| "logps/chosen": -76.27436065673828, | |
| "logps/rejected": -87.6956787109375, | |
| "loss": 0.6922, | |
| "pred_label": 2926.862548828125, | |
| "rewards/accuracies": 0.3687500059604645, | |
| "rewards/chosen": -0.055185507982969284, | |
| "rewards/margins": 0.05776001885533333, | |
| "rewards/rejected": -0.11294553428888321, | |
| "step": 580, | |
| "use_label": 8715.1376953125 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "grad_norm": 0.5546875, | |
| "learning_rate": 1.8272307888529276e-06, | |
| "logits/chosen": -2.231316089630127, | |
| "logits/rejected": -2.258852481842041, | |
| "logps/chosen": -84.25640106201172, | |
| "logps/rejected": -99.73040771484375, | |
| "loss": 0.6929, | |
| "pred_label": 3042.83740234375, | |
| "rewards/accuracies": 0.40312498807907104, | |
| "rewards/chosen": -0.07248945534229279, | |
| "rewards/margins": 0.06686891615390778, | |
| "rewards/rejected": -0.13935837149620056, | |
| "step": 600, | |
| "use_label": 8919.162109375 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_logits/chosen": -2.1407980918884277, | |
| "eval_logits/rejected": -2.1125032901763916, | |
| "eval_logps/chosen": -73.41705322265625, | |
| "eval_logps/rejected": -86.9944839477539, | |
| "eval_loss": 0.6927017569541931, | |
| "eval_pred_label": 3177.142822265625, | |
| "eval_rewards/accuracies": 0.3511904776096344, | |
| "eval_rewards/chosen": -0.04516514018177986, | |
| "eval_rewards/margins": 0.07079902291297913, | |
| "eval_rewards/rejected": -0.11596415936946869, | |
| "eval_runtime": 248.1359, | |
| "eval_samples_per_second": 8.06, | |
| "eval_steps_per_second": 0.254, | |
| "eval_use_label": 9198.857421875, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "grad_norm": 0.404296875, | |
| "learning_rate": 1.6530691736402317e-06, | |
| "logits/chosen": -2.1386609077453613, | |
| "logits/rejected": -2.1743404865264893, | |
| "logps/chosen": -65.55394744873047, | |
| "logps/rejected": -88.32081604003906, | |
| "loss": 0.6924, | |
| "pred_label": 3318.58740234375, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -0.05246468633413315, | |
| "rewards/margins": 0.059620797634124756, | |
| "rewards/rejected": -0.1120854839682579, | |
| "step": 620, | |
| "use_label": 9467.412109375 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "grad_norm": 0.51171875, | |
| "learning_rate": 1.4834368231970922e-06, | |
| "logits/chosen": -2.1956310272216797, | |
| "logits/rejected": -2.2024998664855957, | |
| "logps/chosen": -77.41986846923828, | |
| "logps/rejected": -82.58815002441406, | |
| "loss": 0.692, | |
| "pred_label": 3414.199951171875, | |
| "rewards/accuracies": 0.3656249940395355, | |
| "rewards/chosen": -0.05061299726366997, | |
| "rewards/margins": 0.07674984633922577, | |
| "rewards/rejected": -0.12736284732818604, | |
| "step": 640, | |
| "use_label": 9691.7998046875 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "grad_norm": 0.2890625, | |
| "learning_rate": 1.3192409070404582e-06, | |
| "logits/chosen": -2.1827545166015625, | |
| "logits/rejected": -2.1392319202423096, | |
| "logps/chosen": -71.07948303222656, | |
| "logps/rejected": -78.78751373291016, | |
| "loss": 0.6924, | |
| "pred_label": 3519.35009765625, | |
| "rewards/accuracies": 0.3375000059604645, | |
| "rewards/chosen": -0.0542152114212513, | |
| "rewards/margins": 0.06142013147473335, | |
| "rewards/rejected": -0.11563535034656525, | |
| "step": 660, | |
| "use_label": 9906.650390625 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "grad_norm": 0.435546875, | |
| "learning_rate": 1.1613595214152713e-06, | |
| "logits/chosen": -2.2185826301574707, | |
| "logits/rejected": -2.2344555854797363, | |
| "logps/chosen": -81.96281433105469, | |
| "logps/rejected": -87.13890838623047, | |
| "loss": 0.6923, | |
| "pred_label": 3610.012451171875, | |
| "rewards/accuracies": 0.34375, | |
| "rewards/chosen": -0.06083091348409653, | |
| "rewards/margins": 0.0633452981710434, | |
| "rewards/rejected": -0.12417621910572052, | |
| "step": 680, | |
| "use_label": 10135.9873046875 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "grad_norm": 0.5078125, | |
| "learning_rate": 1.0106369933615043e-06, | |
| "logits/chosen": -2.2393274307250977, | |
| "logits/rejected": -2.2085208892822266, | |
| "logps/chosen": -90.31179809570312, | |
| "logps/rejected": -96.00973510742188, | |
| "loss": 0.6928, | |
| "pred_label": 3716.97509765625, | |
| "rewards/accuracies": 0.3843750059604645, | |
| "rewards/chosen": -0.06285654008388519, | |
| "rewards/margins": 0.07485760748386383, | |
| "rewards/rejected": -0.13771414756774902, | |
| "step": 700, | |
| "use_label": 10349.025390625 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_logits/chosen": -2.1372170448303223, | |
| "eval_logits/rejected": -2.1086459159851074, | |
| "eval_logps/chosen": -73.96572875976562, | |
| "eval_logps/rejected": -87.70773315429688, | |
| "eval_loss": 0.6929500102996826, | |
| "eval_pred_label": 3852.730224609375, | |
| "eval_rewards/accuracies": 0.3511904776096344, | |
| "eval_rewards/chosen": -0.05065184459090233, | |
| "eval_rewards/margins": 0.07244490087032318, | |
| "eval_rewards/rejected": -0.12309674173593521, | |
| "eval_runtime": 248.0038, | |
| "eval_samples_per_second": 8.064, | |
| "eval_steps_per_second": 0.254, | |
| "eval_use_label": 10627.26953125, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 0.55078125, | |
| "learning_rate": 8.678793653740633e-07, | |
| "logits/chosen": -2.1876041889190674, | |
| "logits/rejected": -2.1966712474823, | |
| "logps/chosen": -64.94602966308594, | |
| "logps/rejected": -77.46949005126953, | |
| "loss": 0.6927, | |
| "pred_label": 3992.16259765625, | |
| "rewards/accuracies": 0.31562501192092896, | |
| "rewards/chosen": -0.04975567013025284, | |
| "rewards/margins": 0.06240048259496689, | |
| "rewards/rejected": -0.11215615272521973, | |
| "step": 720, | |
| "use_label": 10897.837890625 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "grad_norm": 0.416015625, | |
| "learning_rate": 7.338500848029603e-07, | |
| "logits/chosen": -2.194794178009033, | |
| "logits/rejected": -2.2083091735839844, | |
| "logps/chosen": -69.16300201416016, | |
| "logps/rejected": -74.87442779541016, | |
| "loss": 0.6927, | |
| "pred_label": 4088.0625, | |
| "rewards/accuracies": 0.3187499940395355, | |
| "rewards/chosen": -0.03673207014799118, | |
| "rewards/margins": 0.07390830665826797, | |
| "rewards/rejected": -0.11064038425683975, | |
| "step": 740, | |
| "use_label": 11121.9375 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 0.47265625, | |
| "learning_rate": 6.092659210462232e-07, | |
| "logits/chosen": -2.2297511100769043, | |
| "logits/rejected": -2.232818841934204, | |
| "logps/chosen": -70.27059173583984, | |
| "logps/rejected": -88.61542510986328, | |
| "loss": 0.6927, | |
| "pred_label": 4190.375, | |
| "rewards/accuracies": 0.33125001192092896, | |
| "rewards/chosen": -0.04521505907177925, | |
| "rewards/margins": 0.05956338718533516, | |
| "rewards/rejected": -0.10477845370769501, | |
| "step": 760, | |
| "use_label": 11339.625 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "grad_norm": 0.515625, | |
| "learning_rate": 4.947931323697983e-07, | |
| "logits/chosen": -2.224112033843994, | |
| "logits/rejected": -2.241053581237793, | |
| "logps/chosen": -82.8070068359375, | |
| "logps/rejected": -85.62196350097656, | |
| "loss": 0.6927, | |
| "pred_label": 4299.97509765625, | |
| "rewards/accuracies": 0.375, | |
| "rewards/chosen": -0.04633576422929764, | |
| "rewards/margins": 0.0844966396689415, | |
| "rewards/rejected": -0.13083240389823914, | |
| "step": 780, | |
| "use_label": 11550.025390625 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "grad_norm": 0.498046875, | |
| "learning_rate": 3.910439028537638e-07, | |
| "logits/chosen": -2.201280117034912, | |
| "logits/rejected": -2.177452325820923, | |
| "logps/chosen": -65.0578842163086, | |
| "logps/rejected": -66.19197082519531, | |
| "loss": 0.6927, | |
| "pred_label": 4407.78759765625, | |
| "rewards/accuracies": 0.3499999940395355, | |
| "rewards/chosen": -0.026169428601861, | |
| "rewards/margins": 0.06455135345458984, | |
| "rewards/rejected": -0.0907207801938057, | |
| "step": 800, | |
| "use_label": 11762.212890625 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_logits/chosen": -2.1430623531341553, | |
| "eval_logits/rejected": -2.1141114234924316, | |
| "eval_logps/chosen": -71.62469482421875, | |
| "eval_logps/rejected": -85.3831787109375, | |
| "eval_loss": 0.6928467750549316, | |
| "eval_pred_label": 4538.47607421875, | |
| "eval_rewards/accuracies": 0.3551587164402008, | |
| "eval_rewards/chosen": -0.027241550385951996, | |
| "eval_rewards/margins": 0.072609543800354, | |
| "eval_rewards/rejected": -0.099851094186306, | |
| "eval_runtime": 247.951, | |
| "eval_samples_per_second": 8.066, | |
| "eval_steps_per_second": 0.254, | |
| "eval_use_label": 12045.5234375, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "grad_norm": 0.48046875, | |
| "learning_rate": 2.98573068519539e-07, | |
| "logits/chosen": -2.228102684020996, | |
| "logits/rejected": -2.2112691402435303, | |
| "logps/chosen": -68.63658142089844, | |
| "logps/rejected": -75.33064270019531, | |
| "loss": 0.6923, | |
| "pred_label": 4678.53759765625, | |
| "rewards/accuracies": 0.3218750059604645, | |
| "rewards/chosen": -0.03714119642972946, | |
| "rewards/margins": 0.05530167371034622, | |
| "rewards/rejected": -0.09244287014007568, | |
| "step": 820, | |
| "use_label": 12315.462890625 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "grad_norm": 0.56640625, | |
| "learning_rate": 2.178751501463036e-07, | |
| "logits/chosen": -2.204557418823242, | |
| "logits/rejected": -2.2018847465515137, | |
| "logps/chosen": -61.4800910949707, | |
| "logps/rejected": -63.1760139465332, | |
| "loss": 0.6929, | |
| "pred_label": 4777.375, | |
| "rewards/accuracies": 0.29374998807907104, | |
| "rewards/chosen": -0.02809613011777401, | |
| "rewards/margins": 0.05226613208651543, | |
| "rewards/rejected": -0.08036227524280548, | |
| "step": 840, | |
| "use_label": 12536.625 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "grad_norm": 0.8515625, | |
| "learning_rate": 1.4938170864468636e-07, | |
| "logits/chosen": -2.252244234085083, | |
| "logits/rejected": -2.242299795150757, | |
| "logps/chosen": -84.9459228515625, | |
| "logps/rejected": -90.69441223144531, | |
| "loss": 0.6922, | |
| "pred_label": 4874.3251953125, | |
| "rewards/accuracies": 0.4124999940395355, | |
| "rewards/chosen": -0.022363774478435516, | |
| "rewards/margins": 0.09057153016328812, | |
| "rewards/rejected": -0.11293530464172363, | |
| "step": 860, | |
| "use_label": 12759.6748046875 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "grad_norm": 0.4296875, | |
| "learning_rate": 9.345903713082305e-08, | |
| "logits/chosen": -2.2364704608917236, | |
| "logits/rejected": -2.224773406982422, | |
| "logps/chosen": -75.7426528930664, | |
| "logps/rejected": -91.20499420166016, | |
| "loss": 0.6925, | |
| "pred_label": 4988.4873046875, | |
| "rewards/accuracies": 0.3843750059604645, | |
| "rewards/chosen": -0.03711060434579849, | |
| "rewards/margins": 0.08993253856897354, | |
| "rewards/rejected": -0.12704312801361084, | |
| "step": 880, | |
| "use_label": 12965.5126953125 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "grad_norm": 0.6015625, | |
| "learning_rate": 5.0406202043228604e-08, | |
| "logits/chosen": -2.121796131134033, | |
| "logits/rejected": -2.15610671043396, | |
| "logps/chosen": -69.87088775634766, | |
| "logps/rejected": -90.85367584228516, | |
| "loss": 0.6929, | |
| "pred_label": 5089.85009765625, | |
| "rewards/accuracies": 0.3375000059604645, | |
| "rewards/chosen": -0.032511431723833084, | |
| "rewards/margins": 0.06634987145662308, | |
| "rewards/rejected": -0.09886129945516586, | |
| "step": 900, | |
| "use_label": 13184.150390625 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_logits/chosen": -2.1427581310272217, | |
| "eval_logits/rejected": -2.113929510116577, | |
| "eval_logps/chosen": -71.7841567993164, | |
| "eval_logps/rejected": -85.5160140991211, | |
| "eval_loss": 0.6928035020828247, | |
| "eval_pred_label": 5226.619140625, | |
| "eval_rewards/accuracies": 0.3492063581943512, | |
| "eval_rewards/chosen": -0.02883605659008026, | |
| "eval_rewards/margins": 0.0723433569073677, | |
| "eval_rewards/rejected": -0.10117942094802856, | |
| "eval_runtime": 246.4796, | |
| "eval_samples_per_second": 8.114, | |
| "eval_steps_per_second": 0.256, | |
| "eval_use_label": 13461.380859375, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 0.52734375, | |
| "learning_rate": 2.0453443778310766e-08, | |
| "logits/chosen": -2.1679275035858154, | |
| "logits/rejected": -2.1737468242645264, | |
| "logps/chosen": -59.419395446777344, | |
| "logps/rejected": -76.71382141113281, | |
| "loss": 0.6925, | |
| "pred_label": 5365.3876953125, | |
| "rewards/accuracies": 0.3187499940395355, | |
| "rewards/chosen": -0.021122563630342484, | |
| "rewards/margins": 0.0707126036286354, | |
| "rewards/rejected": -0.09183517098426819, | |
| "step": 920, | |
| "use_label": 13732.6123046875 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "grad_norm": 0.64453125, | |
| "learning_rate": 3.760945397705828e-09, | |
| "logits/chosen": -2.1522116661071777, | |
| "logits/rejected": -2.1893556118011475, | |
| "logps/chosen": -68.75323486328125, | |
| "logps/rejected": -82.70423889160156, | |
| "loss": 0.6926, | |
| "pred_label": 5459.0751953125, | |
| "rewards/accuracies": 0.32499998807907104, | |
| "rewards/chosen": -0.02701050415635109, | |
| "rewards/margins": 0.06467042118310928, | |
| "rewards/rejected": -0.09168092906475067, | |
| "step": 940, | |
| "use_label": 13958.9248046875 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 955, | |
| "total_flos": 0.0, | |
| "train_loss": 0.692275420283772, | |
| "train_runtime": 20019.5915, | |
| "train_samples_per_second": 3.054, | |
| "train_steps_per_second": 0.048 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 955, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 100, | |
| "total_flos": 0.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |