| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.998027613412229, | |
| "eval_steps": 50000, | |
| "global_step": 1824, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.00821827744904668, | |
| "grad_norm": 48.55606780690502, | |
| "learning_rate": 1.358695652173913e-08, | |
| "logits/chosen": 26.16689682006836, | |
| "logits/rejected": 25.511425018310547, | |
| "logps/chosen": -189.36741638183594, | |
| "logps/rejected": -78.73792266845703, | |
| "loss": 1.79, | |
| "rewards/accuracies": 0.2800000011920929, | |
| "rewards/chosen": -0.006983796134591103, | |
| "rewards/margins": 3.662884410005063e-05, | |
| "rewards/rejected": -0.007020425051450729, | |
| "sft_loss": 0.661233127117157, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.01643655489809336, | |
| "grad_norm": 50.84809399854242, | |
| "learning_rate": 2.717391304347826e-08, | |
| "logits/chosen": 25.634292602539062, | |
| "logits/rejected": 25.165508270263672, | |
| "logps/chosen": -175.30511474609375, | |
| "logps/rejected": -79.45011901855469, | |
| "loss": 1.7672, | |
| "rewards/accuracies": 0.6100000143051147, | |
| "rewards/chosen": -0.01493214163929224, | |
| "rewards/margins": 0.032123688608407974, | |
| "rewards/rejected": -0.04705582931637764, | |
| "sft_loss": 0.6432023644447327, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.02465483234714004, | |
| "grad_norm": 41.52221293409133, | |
| "learning_rate": 4.076086956521739e-08, | |
| "logits/chosen": 25.897306442260742, | |
| "logits/rejected": 25.234777450561523, | |
| "logps/chosen": -204.5565643310547, | |
| "logps/rejected": -85.37405395507812, | |
| "loss": 1.6603, | |
| "rewards/accuracies": 0.8799999952316284, | |
| "rewards/chosen": -0.028912657871842384, | |
| "rewards/margins": 0.18977542221546173, | |
| "rewards/rejected": -0.21868810057640076, | |
| "sft_loss": 0.7592554688453674, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.03287310979618672, | |
| "grad_norm": 30.024466917447533, | |
| "learning_rate": 5.434782608695652e-08, | |
| "logits/chosen": 26.472496032714844, | |
| "logits/rejected": 26.013669967651367, | |
| "logps/chosen": -178.9062042236328, | |
| "logps/rejected": -87.18224334716797, | |
| "loss": 1.5519, | |
| "rewards/accuracies": 0.9300000071525574, | |
| "rewards/chosen": -0.11060313880443573, | |
| "rewards/margins": 0.3851660490036011, | |
| "rewards/rejected": -0.495769202709198, | |
| "sft_loss": 0.6785654425621033, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.041091387245233396, | |
| "grad_norm": 23.574332052101575, | |
| "learning_rate": 6.793478260869565e-08, | |
| "logits/chosen": 26.571308135986328, | |
| "logits/rejected": 26.069765090942383, | |
| "logps/chosen": -204.71995544433594, | |
| "logps/rejected": -95.25181579589844, | |
| "loss": 1.4535, | |
| "rewards/accuracies": 0.9300000071525574, | |
| "rewards/chosen": -0.2731512486934662, | |
| "rewards/margins": 0.7024775743484497, | |
| "rewards/rejected": -0.9756287336349487, | |
| "sft_loss": 0.6605415344238281, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.04930966469428008, | |
| "grad_norm": 18.127113157576492, | |
| "learning_rate": 8.152173913043478e-08, | |
| "logits/chosen": 26.70085906982422, | |
| "logits/rejected": 26.199695587158203, | |
| "logps/chosen": -189.0041961669922, | |
| "logps/rejected": -95.67135620117188, | |
| "loss": 1.3598, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -0.4376958906650543, | |
| "rewards/margins": 0.9910183548927307, | |
| "rewards/rejected": -1.4287142753601074, | |
| "sft_loss": 0.6798427700996399, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.05752794214332676, | |
| "grad_norm": 16.856249874916603, | |
| "learning_rate": 9.510869565217392e-08, | |
| "logits/chosen": 27.086894989013672, | |
| "logits/rejected": 26.779054641723633, | |
| "logps/chosen": -202.5185546875, | |
| "logps/rejected": -98.5663070678711, | |
| "loss": 1.2944, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -0.5852899551391602, | |
| "rewards/margins": 1.2753018140792847, | |
| "rewards/rejected": -1.8605915307998657, | |
| "sft_loss": 0.6831802129745483, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 0.06574621959237344, | |
| "grad_norm": 15.222314216803584, | |
| "learning_rate": 1.0869565217391303e-07, | |
| "logits/chosen": 26.470937728881836, | |
| "logits/rejected": 26.266651153564453, | |
| "logps/chosen": -185.2868194580078, | |
| "logps/rejected": -96.5091781616211, | |
| "loss": 1.2027, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -0.6554566025733948, | |
| "rewards/margins": 1.4152508974075317, | |
| "rewards/rejected": -2.0707075595855713, | |
| "sft_loss": 0.6970738768577576, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07396449704142012, | |
| "grad_norm": 14.365159397400335, | |
| "learning_rate": 1.2228260869565216e-07, | |
| "logits/chosen": 25.881906509399414, | |
| "logits/rejected": 25.525175094604492, | |
| "logps/chosen": -202.46238708496094, | |
| "logps/rejected": -108.43726348876953, | |
| "loss": 1.1328, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -0.787525475025177, | |
| "rewards/margins": 1.8143333196640015, | |
| "rewards/rejected": -2.6018588542938232, | |
| "sft_loss": 0.6782786846160889, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 0.08218277449046679, | |
| "grad_norm": 13.924602084521048, | |
| "learning_rate": 1.358695652173913e-07, | |
| "logits/chosen": 24.610755920410156, | |
| "logits/rejected": 24.408979415893555, | |
| "logps/chosen": -206.8500213623047, | |
| "logps/rejected": -113.42557525634766, | |
| "loss": 1.0599, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -0.9198395609855652, | |
| "rewards/margins": 1.9545520544052124, | |
| "rewards/rejected": -2.874391555786133, | |
| "sft_loss": 0.7132790088653564, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09040105193951348, | |
| "grad_norm": 11.972485852637668, | |
| "learning_rate": 1.4945652173913042e-07, | |
| "logits/chosen": 23.996862411499023, | |
| "logits/rejected": 24.392988204956055, | |
| "logps/chosen": -176.3905487060547, | |
| "logps/rejected": -110.62020874023438, | |
| "loss": 1.0223, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -0.8744373321533203, | |
| "rewards/margins": 2.172375440597534, | |
| "rewards/rejected": -3.0468130111694336, | |
| "sft_loss": 0.7045189738273621, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 0.09861932938856016, | |
| "grad_norm": 13.242028156676367, | |
| "learning_rate": 1.6304347826086955e-07, | |
| "logits/chosen": 23.04694366455078, | |
| "logits/rejected": 23.079355239868164, | |
| "logps/chosen": -186.1154327392578, | |
| "logps/rejected": -107.23130798339844, | |
| "loss": 1.0046, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -1.0562888383865356, | |
| "rewards/margins": 2.0806047916412354, | |
| "rewards/rejected": -3.1368932723999023, | |
| "sft_loss": 0.6290792226791382, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.10683760683760683, | |
| "grad_norm": 11.030176141313747, | |
| "learning_rate": 1.766304347826087e-07, | |
| "logits/chosen": 21.996606826782227, | |
| "logits/rejected": 22.384113311767578, | |
| "logps/chosen": -199.38589477539062, | |
| "logps/rejected": -116.91275024414062, | |
| "loss": 0.9338, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -1.1412394046783447, | |
| "rewards/margins": 2.474609613418579, | |
| "rewards/rejected": -3.615849018096924, | |
| "sft_loss": 0.697711706161499, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 0.11505588428665352, | |
| "grad_norm": 11.76117705302215, | |
| "learning_rate": 1.9021739130434784e-07, | |
| "logits/chosen": 22.534835815429688, | |
| "logits/rejected": 23.107168197631836, | |
| "logps/chosen": -216.9481964111328, | |
| "logps/rejected": -129.04183959960938, | |
| "loss": 0.8671, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -1.3997070789337158, | |
| "rewards/margins": 2.9236786365509033, | |
| "rewards/rejected": -4.323385715484619, | |
| "sft_loss": 0.728801965713501, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.1232741617357002, | |
| "grad_norm": 32.386219318167385, | |
| "learning_rate": 2.0380434782608694e-07, | |
| "logits/chosen": 20.90481948852539, | |
| "logits/rejected": 21.215843200683594, | |
| "logps/chosen": -247.61224365234375, | |
| "logps/rejected": -138.62893676757812, | |
| "loss": 0.8076, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -1.5252928733825684, | |
| "rewards/margins": 3.3426883220672607, | |
| "rewards/rejected": -4.86798095703125, | |
| "sft_loss": 0.7596563696861267, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.13149243918474687, | |
| "grad_norm": 11.700521911598706, | |
| "learning_rate": 2.1739130434782607e-07, | |
| "logits/chosen": 20.761672973632812, | |
| "logits/rejected": 20.871828079223633, | |
| "logps/chosen": -236.5396728515625, | |
| "logps/rejected": -138.31297302246094, | |
| "loss": 0.842, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -1.9364999532699585, | |
| "rewards/margins": 3.281285047531128, | |
| "rewards/rejected": -5.217784881591797, | |
| "sft_loss": 0.7300873398780823, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.13971071663379356, | |
| "grad_norm": 11.895414317868761, | |
| "learning_rate": 2.309782608695652e-07, | |
| "logits/chosen": 21.150850296020508, | |
| "logits/rejected": 21.817951202392578, | |
| "logps/chosen": -223.0463104248047, | |
| "logps/rejected": -139.8596954345703, | |
| "loss": 0.7489, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -2.007277250289917, | |
| "rewards/margins": 3.5759541988372803, | |
| "rewards/rejected": -5.5832319259643555, | |
| "sft_loss": 0.7483465075492859, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 0.14792899408284024, | |
| "grad_norm": 11.018586570679572, | |
| "learning_rate": 2.445652173913043e-07, | |
| "logits/chosen": 22.40447998046875, | |
| "logits/rejected": 22.448156356811523, | |
| "logps/chosen": -201.39810180664062, | |
| "logps/rejected": -126.50525665283203, | |
| "loss": 0.8269, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -2.1027634143829346, | |
| "rewards/margins": 3.118117332458496, | |
| "rewards/rejected": -5.220880508422852, | |
| "sft_loss": 0.7317149639129639, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.15614727153188693, | |
| "grad_norm": 9.026135528071627, | |
| "learning_rate": 2.499981493451693e-07, | |
| "logits/chosen": 20.40322494506836, | |
| "logits/rejected": 20.44278907775879, | |
| "logps/chosen": -203.20326232910156, | |
| "logps/rejected": -124.00860595703125, | |
| "loss": 0.8771, | |
| "rewards/accuracies": 0.9100000262260437, | |
| "rewards/chosen": -1.6680656671524048, | |
| "rewards/margins": 3.2214581966400146, | |
| "rewards/rejected": -4.889523983001709, | |
| "sft_loss": 0.7273903489112854, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 0.16436554898093358, | |
| "grad_norm": 10.366938012622036, | |
| "learning_rate": 2.499868399863186e-07, | |
| "logits/chosen": 20.907590866088867, | |
| "logits/rejected": 21.92055892944336, | |
| "logps/chosen": -226.97225952148438, | |
| "logps/rejected": -144.5021514892578, | |
| "loss": 0.7676, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -2.1906163692474365, | |
| "rewards/margins": 3.6612253189086914, | |
| "rewards/rejected": -5.851841449737549, | |
| "sft_loss": 0.7680675983428955, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.17258382642998027, | |
| "grad_norm": 9.779078878164054, | |
| "learning_rate": 2.4996525033926786e-07, | |
| "logits/chosen": 19.350120544433594, | |
| "logits/rejected": 19.718740463256836, | |
| "logps/chosen": -209.20166015625, | |
| "logps/rejected": -136.57321166992188, | |
| "loss": 0.7133, | |
| "rewards/accuracies": 0.9300000071525574, | |
| "rewards/chosen": -2.251823663711548, | |
| "rewards/margins": 3.696510076522827, | |
| "rewards/rejected": -5.948334217071533, | |
| "sft_loss": 0.7179654836654663, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 0.18080210387902695, | |
| "grad_norm": 8.45489237540799, | |
| "learning_rate": 2.499333821797864e-07, | |
| "logits/chosen": 20.7148380279541, | |
| "logits/rejected": 20.950342178344727, | |
| "logps/chosen": -197.59976196289062, | |
| "logps/rejected": -124.13175964355469, | |
| "loss": 0.7642, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -2.359647750854492, | |
| "rewards/margins": 3.3463170528411865, | |
| "rewards/rejected": -5.705965042114258, | |
| "sft_loss": 0.7615786790847778, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.18902038132807364, | |
| "grad_norm": 10.762078567025862, | |
| "learning_rate": 2.4989123812906105e-07, | |
| "logits/chosen": 19.379554748535156, | |
| "logits/rejected": 20.651145935058594, | |
| "logps/chosen": -219.8887176513672, | |
| "logps/rejected": -148.8833770751953, | |
| "loss": 0.7483, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -2.958165168762207, | |
| "rewards/margins": 3.9372713565826416, | |
| "rewards/rejected": -6.895437240600586, | |
| "sft_loss": 0.7731737494468689, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 0.19723865877712032, | |
| "grad_norm": 10.354433872987686, | |
| "learning_rate": 2.498388216534807e-07, | |
| "logits/chosen": 19.773361206054688, | |
| "logits/rejected": 21.142953872680664, | |
| "logps/chosen": -238.31101989746094, | |
| "logps/rejected": -152.0144500732422, | |
| "loss": 0.7063, | |
| "rewards/accuracies": 0.8799999952316284, | |
| "rewards/chosen": -2.7792108058929443, | |
| "rewards/margins": 4.163509845733643, | |
| "rewards/rejected": -6.942720413208008, | |
| "sft_loss": 0.7693167328834534, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.205456936226167, | |
| "grad_norm": 11.490346482929228, | |
| "learning_rate": 2.49776137064351e-07, | |
| "logits/chosen": 19.508024215698242, | |
| "logits/rejected": 19.62151527404785, | |
| "logps/chosen": -232.81178283691406, | |
| "logps/rejected": -151.69398498535156, | |
| "loss": 0.7148, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -2.6999313831329346, | |
| "rewards/margins": 3.9598686695098877, | |
| "rewards/rejected": -6.659799575805664, | |
| "sft_loss": 0.8186704516410828, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.21367521367521367, | |
| "grad_norm": 13.390026452837366, | |
| "learning_rate": 2.4970318951754e-07, | |
| "logits/chosen": 19.62987518310547, | |
| "logits/rejected": 20.120250701904297, | |
| "logps/chosen": -247.29205322265625, | |
| "logps/rejected": -159.60348510742188, | |
| "loss": 0.6619, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -2.8834011554718018, | |
| "rewards/margins": 4.369426727294922, | |
| "rewards/rejected": -7.252828598022461, | |
| "sft_loss": 0.7933542728424072, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.22189349112426035, | |
| "grad_norm": 20.479502968540558, | |
| "learning_rate": 2.496199850130537e-07, | |
| "logits/chosen": 18.90142059326172, | |
| "logits/rejected": 19.151918411254883, | |
| "logps/chosen": -231.70069885253906, | |
| "logps/rejected": -141.99693298339844, | |
| "loss": 0.7109, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -2.819154977798462, | |
| "rewards/margins": 3.806306838989258, | |
| "rewards/rejected": -6.625460624694824, | |
| "sft_loss": 0.7920200228691101, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 0.23011176857330704, | |
| "grad_norm": 16.190350556337812, | |
| "learning_rate": 2.4952653039454297e-07, | |
| "logits/chosen": 18.546707153320312, | |
| "logits/rejected": 18.616119384765625, | |
| "logps/chosen": -251.7685089111328, | |
| "logps/rejected": -160.7568817138672, | |
| "loss": 0.703, | |
| "rewards/accuracies": 0.9300000071525574, | |
| "rewards/chosen": -3.2368268966674805, | |
| "rewards/margins": 4.385184288024902, | |
| "rewards/rejected": -7.622011184692383, | |
| "sft_loss": 0.8116011023521423, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.23833004602235372, | |
| "grad_norm": 14.348906773180857, | |
| "learning_rate": 2.494228333487403e-07, | |
| "logits/chosen": 18.956235885620117, | |
| "logits/rejected": 19.919641494750977, | |
| "logps/chosen": -210.7549591064453, | |
| "logps/rejected": -144.51132202148438, | |
| "loss": 0.6182, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -3.001668691635132, | |
| "rewards/margins": 3.866687536239624, | |
| "rewards/rejected": -6.868356227874756, | |
| "sft_loss": 0.7950787544250488, | |
| "step": 145 | |
| }, | |
| { | |
| "epoch": 0.2465483234714004, | |
| "grad_norm": 11.009157695890236, | |
| "learning_rate": 2.4930890240482784e-07, | |
| "logits/chosen": 18.876365661621094, | |
| "logits/rejected": 19.30438804626465, | |
| "logps/chosen": -229.18504333496094, | |
| "logps/rejected": -150.90707397460938, | |
| "loss": 0.671, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -3.3249759674072266, | |
| "rewards/margins": 4.095080375671387, | |
| "rewards/rejected": -7.4200568199157715, | |
| "sft_loss": 0.7879451513290405, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.25476660092044706, | |
| "grad_norm": 11.264576367918604, | |
| "learning_rate": 2.491847469337356e-07, | |
| "logits/chosen": 18.14313316345215, | |
| "logits/rejected": 18.77975082397461, | |
| "logps/chosen": -219.8468780517578, | |
| "logps/rejected": -150.99098205566406, | |
| "loss": 0.6461, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -3.250223159790039, | |
| "rewards/margins": 4.443104267120361, | |
| "rewards/rejected": -7.6933274269104, | |
| "sft_loss": 0.8351505994796753, | |
| "step": 155 | |
| }, | |
| { | |
| "epoch": 0.26298487836949375, | |
| "grad_norm": 17.15390685304222, | |
| "learning_rate": 2.4905037714737094e-07, | |
| "logits/chosen": 19.779348373413086, | |
| "logits/rejected": 19.593463897705078, | |
| "logps/chosen": -259.2501220703125, | |
| "logps/rejected": -162.26368713378906, | |
| "loss": 0.7398, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -3.7065176963806152, | |
| "rewards/margins": 4.470663070678711, | |
| "rewards/rejected": -8.177180290222168, | |
| "sft_loss": 0.8221470713615417, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.27120315581854043, | |
| "grad_norm": 10.266952014618042, | |
| "learning_rate": 2.489058040977784e-07, | |
| "logits/chosen": 19.731273651123047, | |
| "logits/rejected": 19.947425842285156, | |
| "logps/chosen": -222.83753967285156, | |
| "logps/rejected": -142.3966522216797, | |
| "loss": 0.6633, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -2.9937241077423096, | |
| "rewards/margins": 4.092346668243408, | |
| "rewards/rejected": -7.086071968078613, | |
| "sft_loss": 0.8631803393363953, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 0.2794214332675871, | |
| "grad_norm": 13.183734224346434, | |
| "learning_rate": 2.487510396762309e-07, | |
| "logits/chosen": 18.506755828857422, | |
| "logits/rejected": 19.725309371948242, | |
| "logps/chosen": -246.2398223876953, | |
| "logps/rejected": -171.14974975585938, | |
| "loss": 0.6512, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -3.316751480102539, | |
| "rewards/margins": 4.549408912658691, | |
| "rewards/rejected": -7.8661603927612305, | |
| "sft_loss": 0.9392525553703308, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.2876397107166338, | |
| "grad_norm": 12.820383998338311, | |
| "learning_rate": 2.485860966122514e-07, | |
| "logits/chosen": 18.673315048217773, | |
| "logits/rejected": 19.47124671936035, | |
| "logps/chosen": -239.1477508544922, | |
| "logps/rejected": -168.49923706054688, | |
| "loss": 0.6218, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -3.3230719566345215, | |
| "rewards/margins": 4.699094295501709, | |
| "rewards/rejected": -8.022165298461914, | |
| "sft_loss": 0.8536433577537537, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.2958579881656805, | |
| "grad_norm": 10.336252791103886, | |
| "learning_rate": 2.484109884725661e-07, | |
| "logits/chosen": 17.68476104736328, | |
| "logits/rejected": 18.92132568359375, | |
| "logps/chosen": -248.71087646484375, | |
| "logps/rejected": -164.80517578125, | |
| "loss": 0.6908, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -3.7376978397369385, | |
| "rewards/margins": 4.469425678253174, | |
| "rewards/rejected": -8.207123756408691, | |
| "sft_loss": 0.7900984883308411, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.30407626561472717, | |
| "grad_norm": 9.07674205143479, | |
| "learning_rate": 2.4822572965998844e-07, | |
| "logits/chosen": 17.927953720092773, | |
| "logits/rejected": 18.744905471801758, | |
| "logps/chosen": -256.3652038574219, | |
| "logps/rejected": -169.36451721191406, | |
| "loss": 0.6008, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -3.603369951248169, | |
| "rewards/margins": 4.865907192230225, | |
| "rewards/rejected": -8.469277381896973, | |
| "sft_loss": 0.8645619750022888, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 0.31229454306377386, | |
| "grad_norm": 11.293965527732967, | |
| "learning_rate": 2.4803033541223455e-07, | |
| "logits/chosen": 19.39400863647461, | |
| "logits/rejected": 19.796106338500977, | |
| "logps/chosen": -245.06739807128906, | |
| "logps/rejected": -164.10296630859375, | |
| "loss": 0.6281, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -3.5845892429351807, | |
| "rewards/margins": 4.6414408683776855, | |
| "rewards/rejected": -8.226030349731445, | |
| "sft_loss": 0.8358697295188904, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.32051282051282054, | |
| "grad_norm": 11.390930360072153, | |
| "learning_rate": 2.478248218006699e-07, | |
| "logits/chosen": 17.902259826660156, | |
| "logits/rejected": 18.019027709960938, | |
| "logps/chosen": -265.0622253417969, | |
| "logps/rejected": -175.5810546875, | |
| "loss": 0.6158, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -3.9043285846710205, | |
| "rewards/margins": 5.115177154541016, | |
| "rewards/rejected": -9.019506454467773, | |
| "sft_loss": 0.8782904148101807, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 0.32873109796186717, | |
| "grad_norm": 52.895489458940915, | |
| "learning_rate": 2.476092057289873e-07, | |
| "logits/chosen": 17.241554260253906, | |
| "logits/rejected": 18.226573944091797, | |
| "logps/chosen": -249.59454345703125, | |
| "logps/rejected": -181.9971923828125, | |
| "loss": 0.6044, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -4.305534362792969, | |
| "rewards/margins": 5.0905351638793945, | |
| "rewards/rejected": -9.396068572998047, | |
| "sft_loss": 0.9349213242530823, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.33694937541091385, | |
| "grad_norm": 13.12464260474008, | |
| "learning_rate": 2.473835049318167e-07, | |
| "logits/chosen": 18.299766540527344, | |
| "logits/rejected": 19.57137107849121, | |
| "logps/chosen": -248.37832641601562, | |
| "logps/rejected": -171.3523406982422, | |
| "loss": 0.6532, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -4.206078052520752, | |
| "rewards/margins": 4.699835300445557, | |
| "rewards/rejected": -8.905913352966309, | |
| "sft_loss": 0.9326413869857788, | |
| "step": 205 | |
| }, | |
| { | |
| "epoch": 0.34516765285996054, | |
| "grad_norm": 8.71116895518069, | |
| "learning_rate": 2.4714773797326657e-07, | |
| "logits/chosen": 18.58841896057129, | |
| "logits/rejected": 19.255895614624023, | |
| "logps/chosen": -247.08616638183594, | |
| "logps/rejected": -165.45547485351562, | |
| "loss": 0.6183, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -3.8454854488372803, | |
| "rewards/margins": 4.812742710113525, | |
| "rewards/rejected": -8.658228874206543, | |
| "sft_loss": 0.853776752948761, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.3533859303090072, | |
| "grad_norm": 17.852596870413777, | |
| "learning_rate": 2.4690192424539663e-07, | |
| "logits/chosen": 18.283300399780273, | |
| "logits/rejected": 19.169416427612305, | |
| "logps/chosen": -241.07122802734375, | |
| "logps/rejected": -173.18699645996094, | |
| "loss": 0.6071, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -4.187161445617676, | |
| "rewards/margins": 5.0552144050598145, | |
| "rewards/rejected": -9.242376327514648, | |
| "sft_loss": 0.8952550292015076, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 0.3616042077580539, | |
| "grad_norm": 12.136136465528743, | |
| "learning_rate": 2.466460839666233e-07, | |
| "logits/chosen": 17.772991180419922, | |
| "logits/rejected": 18.684547424316406, | |
| "logps/chosen": -255.16156005859375, | |
| "logps/rejected": -183.1548614501953, | |
| "loss": 0.562, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -4.293615341186523, | |
| "rewards/margins": 5.553874969482422, | |
| "rewards/rejected": -9.847491264343262, | |
| "sft_loss": 0.8942830562591553, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.3698224852071006, | |
| "grad_norm": 13.249996024918259, | |
| "learning_rate": 2.463802381800563e-07, | |
| "logits/chosen": 17.9425106048584, | |
| "logits/rejected": 18.508359909057617, | |
| "logps/chosen": -260.12322998046875, | |
| "logps/rejected": -176.5136260986328, | |
| "loss": 0.6343, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -3.9985711574554443, | |
| "rewards/margins": 5.279909133911133, | |
| "rewards/rejected": -9.278480529785156, | |
| "sft_loss": 0.890729546546936, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.3780407626561473, | |
| "grad_norm": 13.483286780837357, | |
| "learning_rate": 2.461044087517682e-07, | |
| "logits/chosen": 19.322052001953125, | |
| "logits/rejected": 19.914690017700195, | |
| "logps/chosen": -267.1094970703125, | |
| "logps/rejected": -181.53118896484375, | |
| "loss": 0.59, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -4.28004264831543, | |
| "rewards/margins": 5.2816243171691895, | |
| "rewards/rejected": -9.561667442321777, | |
| "sft_loss": 0.8358654975891113, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.38625904010519396, | |
| "grad_norm": 10.134479758320998, | |
| "learning_rate": 2.458186183689957e-07, | |
| "logits/chosen": 18.751750946044922, | |
| "logits/rejected": 18.550024032592773, | |
| "logps/chosen": -237.7452392578125, | |
| "logps/rejected": -155.38726806640625, | |
| "loss": 0.6427, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -3.9234371185302734, | |
| "rewards/margins": 4.515294075012207, | |
| "rewards/rejected": -8.438732147216797, | |
| "sft_loss": 0.9805070757865906, | |
| "step": 235 | |
| }, | |
| { | |
| "epoch": 0.39447731755424065, | |
| "grad_norm": 13.771161444519256, | |
| "learning_rate": 2.4552289053827344e-07, | |
| "logits/chosen": 18.025060653686523, | |
| "logits/rejected": 18.463733673095703, | |
| "logps/chosen": -252.61175537109375, | |
| "logps/rejected": -171.77259826660156, | |
| "loss": 0.5599, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -4.3357720375061035, | |
| "rewards/margins": 5.04067325592041, | |
| "rewards/rejected": -9.376445770263672, | |
| "sft_loss": 0.7902787923812866, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.40269559500328733, | |
| "grad_norm": 9.886456705994728, | |
| "learning_rate": 2.4521724958350093e-07, | |
| "logits/chosen": 18.645158767700195, | |
| "logits/rejected": 19.603240966796875, | |
| "logps/chosen": -239.74526977539062, | |
| "logps/rejected": -162.94131469726562, | |
| "loss": 0.6344, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -4.351040840148926, | |
| "rewards/margins": 4.734447002410889, | |
| "rewards/rejected": -9.085487365722656, | |
| "sft_loss": 0.8848291635513306, | |
| "step": 245 | |
| }, | |
| { | |
| "epoch": 0.410913872452334, | |
| "grad_norm": 16.574947299413026, | |
| "learning_rate": 2.449017206439417e-07, | |
| "logits/chosen": 18.770355224609375, | |
| "logits/rejected": 19.167869567871094, | |
| "logps/chosen": -257.2867431640625, | |
| "logps/rejected": -180.79721069335938, | |
| "loss": 0.5475, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -4.755511283874512, | |
| "rewards/margins": 5.377356052398682, | |
| "rewards/rejected": -10.132868766784668, | |
| "sft_loss": 0.9855692982673645, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.41913214990138065, | |
| "grad_norm": 15.729142249690554, | |
| "learning_rate": 2.445763296721554e-07, | |
| "logits/chosen": 18.016155242919922, | |
| "logits/rejected": 18.655664443969727, | |
| "logps/chosen": -243.2661590576172, | |
| "logps/rejected": -178.59429931640625, | |
| "loss": 0.6424, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.371219635009766, | |
| "rewards/margins": 5.091875076293945, | |
| "rewards/rejected": -10.463094711303711, | |
| "sft_loss": 1.0052944421768188, | |
| "step": 255 | |
| }, | |
| { | |
| "epoch": 0.42735042735042733, | |
| "grad_norm": 14.846371154809418, | |
| "learning_rate": 2.4424110343186345e-07, | |
| "logits/chosen": 18.64227867126465, | |
| "logits/rejected": 19.062152862548828, | |
| "logps/chosen": -241.11070251464844, | |
| "logps/rejected": -167.0811767578125, | |
| "loss": 0.6183, | |
| "rewards/accuracies": 0.9300000071525574, | |
| "rewards/chosen": -3.9312877655029297, | |
| "rewards/margins": 4.8627119064331055, | |
| "rewards/rejected": -8.793999671936035, | |
| "sft_loss": 0.8778759837150574, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.435568704799474, | |
| "grad_norm": 16.788820590336183, | |
| "learning_rate": 2.4389606949574767e-07, | |
| "logits/chosen": 18.801990509033203, | |
| "logits/rejected": 20.348352432250977, | |
| "logps/chosen": -266.7105407714844, | |
| "logps/rejected": -190.86622619628906, | |
| "loss": 0.5961, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -4.232571601867676, | |
| "rewards/margins": 5.312459945678711, | |
| "rewards/rejected": -9.545029640197754, | |
| "sft_loss": 0.8269821405410767, | |
| "step": 265 | |
| }, | |
| { | |
| "epoch": 0.4437869822485207, | |
| "grad_norm": 9.660029588751273, | |
| "learning_rate": 2.435412562431823e-07, | |
| "logits/chosen": 18.019432067871094, | |
| "logits/rejected": 18.232667922973633, | |
| "logps/chosen": -254.80136108398438, | |
| "logps/rejected": -172.0924835205078, | |
| "loss": 0.547, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -4.478307723999023, | |
| "rewards/margins": 5.105349540710449, | |
| "rewards/rejected": -9.583656311035156, | |
| "sft_loss": 0.8911004662513733, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.4520052596975674, | |
| "grad_norm": 8.447767610497143, | |
| "learning_rate": 2.4317669285789964e-07, | |
| "logits/chosen": 18.408342361450195, | |
| "logits/rejected": 18.87084197998047, | |
| "logps/chosen": -296.8369445800781, | |
| "logps/rejected": -195.3644561767578, | |
| "loss": 0.5759, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -4.8854217529296875, | |
| "rewards/margins": 5.9024529457092285, | |
| "rewards/rejected": -10.787875175476074, | |
| "sft_loss": 0.8718220591545105, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.46022353714661407, | |
| "grad_norm": 14.077509009393875, | |
| "learning_rate": 2.428024093255901e-07, | |
| "logits/chosen": 17.676301956176758, | |
| "logits/rejected": 19.232654571533203, | |
| "logps/chosen": -261.8072509765625, | |
| "logps/rejected": -193.81626892089844, | |
| "loss": 0.6028, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -4.590798854827881, | |
| "rewards/margins": 5.75556755065918, | |
| "rewards/rejected": -10.346365928649902, | |
| "sft_loss": 0.8692941069602966, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.46844181459566075, | |
| "grad_norm": 12.255103077032402, | |
| "learning_rate": 2.424184364314352e-07, | |
| "logits/chosen": 19.874698638916016, | |
| "logits/rejected": 19.855077743530273, | |
| "logps/chosen": -263.8525085449219, | |
| "logps/rejected": -174.5958251953125, | |
| "loss": 0.5687, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -4.0808610916137695, | |
| "rewards/margins": 5.203913688659668, | |
| "rewards/rejected": -9.284773826599121, | |
| "sft_loss": 0.8956073522567749, | |
| "step": 285 | |
| }, | |
| { | |
| "epoch": 0.47666009204470744, | |
| "grad_norm": 15.082062203409798, | |
| "learning_rate": 2.420248057575761e-07, | |
| "logits/chosen": 17.83322525024414, | |
| "logits/rejected": 17.633359909057617, | |
| "logps/chosen": -278.74298095703125, | |
| "logps/rejected": -181.1900634765625, | |
| "loss": 0.5783, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -4.548935890197754, | |
| "rewards/margins": 5.899779796600342, | |
| "rewards/rejected": -10.448714256286621, | |
| "sft_loss": 0.8952395915985107, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.4848783694937541, | |
| "grad_norm": 11.834958728287821, | |
| "learning_rate": 2.416215496805156e-07, | |
| "logits/chosen": 18.121597290039062, | |
| "logits/rejected": 19.50238037109375, | |
| "logps/chosen": -252.4333038330078, | |
| "logps/rejected": -197.94659423828125, | |
| "loss": 0.5665, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.617161273956299, | |
| "rewards/margins": 5.908203125, | |
| "rewards/rejected": -11.52536392211914, | |
| "sft_loss": 0.9183645844459534, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 0.4930966469428008, | |
| "grad_norm": 13.662146621659161, | |
| "learning_rate": 2.412087013684552e-07, | |
| "logits/chosen": 16.815900802612305, | |
| "logits/rejected": 17.304187774658203, | |
| "logps/chosen": -276.7563781738281, | |
| "logps/rejected": -191.68553161621094, | |
| "loss": 0.6409, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -5.5067291259765625, | |
| "rewards/margins": 5.485719680786133, | |
| "rewards/rejected": -10.992449760437012, | |
| "sft_loss": 0.9233679175376892, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5013149243918474, | |
| "grad_norm": 12.176993675847571, | |
| "learning_rate": 2.407862947785669e-07, | |
| "logits/chosen": 18.833539962768555, | |
| "logits/rejected": 18.9912109375, | |
| "logps/chosen": -301.635498046875, | |
| "logps/rejected": -204.53671264648438, | |
| "loss": 0.487, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -5.389955997467041, | |
| "rewards/margins": 6.232929706573486, | |
| "rewards/rejected": -11.622885704040527, | |
| "sft_loss": 0.92539381980896, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 0.5095332018408941, | |
| "grad_norm": 8.075422505238562, | |
| "learning_rate": 2.403543646542003e-07, | |
| "logits/chosen": 18.5779972076416, | |
| "logits/rejected": 19.133594512939453, | |
| "logps/chosen": -267.43695068359375, | |
| "logps/rejected": -186.43345642089844, | |
| "loss": 0.6388, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -4.979398727416992, | |
| "rewards/margins": 5.5010504722595215, | |
| "rewards/rejected": -10.480450630187988, | |
| "sft_loss": 0.9564525485038757, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.5177514792899408, | |
| "grad_norm": 8.97962168945258, | |
| "learning_rate": 2.39912946522025e-07, | |
| "logits/chosen": 19.53040313720703, | |
| "logits/rejected": 20.46470069885254, | |
| "logps/chosen": -244.89207458496094, | |
| "logps/rejected": -172.9203643798828, | |
| "loss": 0.5741, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -4.482312202453613, | |
| "rewards/margins": 5.123040676116943, | |
| "rewards/rejected": -9.605354309082031, | |
| "sft_loss": 0.9498026371002197, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 0.5259697567389875, | |
| "grad_norm": 12.054702965132526, | |
| "learning_rate": 2.3946207668910833e-07, | |
| "logits/chosen": 18.005373001098633, | |
| "logits/rejected": 18.470924377441406, | |
| "logps/chosen": -231.72732543945312, | |
| "logps/rejected": -168.2989044189453, | |
| "loss": 0.5869, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -4.519069671630859, | |
| "rewards/margins": 4.713679313659668, | |
| "rewards/rejected": -9.232749938964844, | |
| "sft_loss": 0.8408420085906982, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.5341880341880342, | |
| "grad_norm": 25.950655473924865, | |
| "learning_rate": 2.390017922399292e-07, | |
| "logits/chosen": 18.79814910888672, | |
| "logits/rejected": 19.250444412231445, | |
| "logps/chosen": -247.69647216796875, | |
| "logps/rejected": -174.35218811035156, | |
| "loss": 0.6145, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -4.683900833129883, | |
| "rewards/margins": 5.248979568481445, | |
| "rewards/rejected": -9.932881355285645, | |
| "sft_loss": 0.9410896301269531, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.5424063116370809, | |
| "grad_norm": 10.907505413471052, | |
| "learning_rate": 2.385321310333276e-07, | |
| "logits/chosen": 17.780803680419922, | |
| "logits/rejected": 18.34245491027832, | |
| "logps/chosen": -248.3139190673828, | |
| "logps/rejected": -172.43350219726562, | |
| "loss": 0.6284, | |
| "rewards/accuracies": 0.9100000262260437, | |
| "rewards/chosen": -5.367508411407471, | |
| "rewards/margins": 4.742012977600098, | |
| "rewards/rejected": -10.109521865844727, | |
| "sft_loss": 0.9266583323478699, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5506245890861275, | |
| "grad_norm": 29.199966853282145, | |
| "learning_rate": 2.38053131699391e-07, | |
| "logits/chosen": 18.024690628051758, | |
| "logits/rejected": 18.614425659179688, | |
| "logps/chosen": -290.8337707519531, | |
| "logps/rejected": -203.31809997558594, | |
| "loss": 0.5688, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -5.330504894256592, | |
| "rewards/margins": 6.163724422454834, | |
| "rewards/rejected": -11.49422836303711, | |
| "sft_loss": 0.9595879316329956, | |
| "step": 335 | |
| }, | |
| { | |
| "epoch": 0.5588428665351742, | |
| "grad_norm": 11.416242977585302, | |
| "learning_rate": 2.3756483363627694e-07, | |
| "logits/chosen": 17.60715103149414, | |
| "logits/rejected": 18.161012649536133, | |
| "logps/chosen": -250.91665649414062, | |
| "logps/rejected": -184.4646453857422, | |
| "loss": 0.5981, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.246757984161377, | |
| "rewards/margins": 5.438488006591797, | |
| "rewards/rejected": -10.685246467590332, | |
| "sft_loss": 0.9181762933731079, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.5670611439842209, | |
| "grad_norm": 9.90321260332983, | |
| "learning_rate": 2.3706727700697226e-07, | |
| "logits/chosen": 17.566362380981445, | |
| "logits/rejected": 18.253488540649414, | |
| "logps/chosen": -284.3514404296875, | |
| "logps/rejected": -193.24594116210938, | |
| "loss": 0.5567, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -4.994836807250977, | |
| "rewards/margins": 5.735879421234131, | |
| "rewards/rejected": -10.73071575164795, | |
| "sft_loss": 1.0169059038162231, | |
| "step": 345 | |
| }, | |
| { | |
| "epoch": 0.5752794214332676, | |
| "grad_norm": 15.546918377467371, | |
| "learning_rate": 2.3656050273598986e-07, | |
| "logits/chosen": 17.2511043548584, | |
| "logits/rejected": 18.237810134887695, | |
| "logps/chosen": -258.5328369140625, | |
| "logps/rejected": -191.0077362060547, | |
| "loss": 0.5363, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -5.097340106964111, | |
| "rewards/margins": 5.559810638427734, | |
| "rewards/rejected": -10.657149314880371, | |
| "sft_loss": 0.8693541884422302, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.5834976988823143, | |
| "grad_norm": 10.563639895115125, | |
| "learning_rate": 2.3604455250600256e-07, | |
| "logits/chosen": 18.051647186279297, | |
| "logits/rejected": 18.685161590576172, | |
| "logps/chosen": -273.46368408203125, | |
| "logps/rejected": -202.36537170410156, | |
| "loss": 0.516, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -5.360798358917236, | |
| "rewards/margins": 5.977966785430908, | |
| "rewards/rejected": -11.338766098022461, | |
| "sft_loss": 0.9063312411308289, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 0.591715976331361, | |
| "grad_norm": 11.326441657016302, | |
| "learning_rate": 2.3551946875441467e-07, | |
| "logits/chosen": 19.21741485595703, | |
| "logits/rejected": 19.171350479125977, | |
| "logps/chosen": -265.16619873046875, | |
| "logps/rejected": -185.63027954101562, | |
| "loss": 0.586, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -4.824009895324707, | |
| "rewards/margins": 5.92770528793335, | |
| "rewards/rejected": -10.751714706420898, | |
| "sft_loss": 0.967497706413269, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.5999342537804077, | |
| "grad_norm": 16.154882276044376, | |
| "learning_rate": 2.3498529466987147e-07, | |
| "logits/chosen": 18.083656311035156, | |
| "logits/rejected": 19.166841506958008, | |
| "logps/chosen": -275.3788146972656, | |
| "logps/rejected": -196.90736389160156, | |
| "loss": 0.6121, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.075117111206055, | |
| "rewards/margins": 6.357577323913574, | |
| "rewards/rejected": -11.432694435119629, | |
| "sft_loss": 0.9689314961433411, | |
| "step": 365 | |
| }, | |
| { | |
| "epoch": 0.6081525312294543, | |
| "grad_norm": 12.069410065037287, | |
| "learning_rate": 2.3444207418870688e-07, | |
| "logits/chosen": 17.682310104370117, | |
| "logits/rejected": 18.865554809570312, | |
| "logps/chosen": -277.48114013671875, | |
| "logps/rejected": -195.44508361816406, | |
| "loss": 0.5471, | |
| "rewards/accuracies": 0.9100000262260437, | |
| "rewards/chosen": -4.911283493041992, | |
| "rewards/margins": 5.8411865234375, | |
| "rewards/rejected": -10.75246810913086, | |
| "sft_loss": 0.8908612728118896, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.616370808678501, | |
| "grad_norm": 17.941774722560346, | |
| "learning_rate": 2.3388985199132962e-07, | |
| "logits/chosen": 17.635793685913086, | |
| "logits/rejected": 18.530078887939453, | |
| "logps/chosen": -265.6659240722656, | |
| "logps/rejected": -185.41099548339844, | |
| "loss": 0.5578, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -4.487802982330322, | |
| "rewards/margins": 5.8236083984375, | |
| "rewards/rejected": -10.311410903930664, | |
| "sft_loss": 0.8852910399436951, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.6245890861275477, | |
| "grad_norm": 16.222798143855407, | |
| "learning_rate": 2.3332867349854844e-07, | |
| "logits/chosen": 18.22924041748047, | |
| "logits/rejected": 19.445384979248047, | |
| "logps/chosen": -267.8589172363281, | |
| "logps/rejected": -200.61328125, | |
| "loss": 0.6283, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -5.089979648590088, | |
| "rewards/margins": 6.0606160163879395, | |
| "rewards/rejected": -11.150596618652344, | |
| "sft_loss": 0.85948646068573, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.6328073635765944, | |
| "grad_norm": 58.78518201844404, | |
| "learning_rate": 2.3275858486783578e-07, | |
| "logits/chosen": 17.743967056274414, | |
| "logits/rejected": 19.073143005371094, | |
| "logps/chosen": -229.31361389160156, | |
| "logps/rejected": -178.3441162109375, | |
| "loss": 0.5824, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -4.969345569610596, | |
| "rewards/margins": 5.37393045425415, | |
| "rewards/rejected": -10.343276023864746, | |
| "sft_loss": 0.9465056657791138, | |
| "step": 385 | |
| }, | |
| { | |
| "epoch": 0.6410256410256411, | |
| "grad_norm": 15.400545086822072, | |
| "learning_rate": 2.321796329895317e-07, | |
| "logits/chosen": 16.995241165161133, | |
| "logits/rejected": 18.397994995117188, | |
| "logps/chosen": -266.69647216796875, | |
| "logps/rejected": -193.65902709960938, | |
| "loss": 0.5813, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.233003616333008, | |
| "rewards/margins": 6.00741720199585, | |
| "rewards/rejected": -11.240421295166016, | |
| "sft_loss": 0.9756826758384705, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.6492439184746877, | |
| "grad_norm": 11.604457345989609, | |
| "learning_rate": 2.3159186548298688e-07, | |
| "logits/chosen": 16.9737606048584, | |
| "logits/rejected": 18.478750228881836, | |
| "logps/chosen": -257.61419677734375, | |
| "logps/rejected": -194.60252380371094, | |
| "loss": 0.5278, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -5.19744873046875, | |
| "rewards/margins": 6.024503707885742, | |
| "rewards/rejected": -11.221953392028809, | |
| "sft_loss": 0.972574770450592, | |
| "step": 395 | |
| }, | |
| { | |
| "epoch": 0.6574621959237343, | |
| "grad_norm": 14.695134059357779, | |
| "learning_rate": 2.3099533069264594e-07, | |
| "logits/chosen": 17.685321807861328, | |
| "logits/rejected": 18.130495071411133, | |
| "logps/chosen": -257.6887512207031, | |
| "logps/rejected": -180.2339324951172, | |
| "loss": 0.5419, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -5.080874919891357, | |
| "rewards/margins": 5.387575626373291, | |
| "rewards/rejected": -10.468450546264648, | |
| "sft_loss": 1.00028657913208, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.665680473372781, | |
| "grad_norm": 14.2588021174925, | |
| "learning_rate": 2.3039007768407098e-07, | |
| "logits/chosen": 17.992835998535156, | |
| "logits/rejected": 18.434703826904297, | |
| "logps/chosen": -278.3475341796875, | |
| "logps/rejected": -196.46011352539062, | |
| "loss": 0.581, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.298067092895508, | |
| "rewards/margins": 6.079626560211182, | |
| "rewards/rejected": -11.377694129943848, | |
| "sft_loss": 0.9695589542388916, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 0.6738987508218277, | |
| "grad_norm": 14.653004208659825, | |
| "learning_rate": 2.2977615623990603e-07, | |
| "logits/chosen": 18.65854263305664, | |
| "logits/rejected": 19.244489669799805, | |
| "logps/chosen": -263.1656188964844, | |
| "logps/rejected": -193.50169372558594, | |
| "loss": 0.555, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.245527267456055, | |
| "rewards/margins": 5.687096118927002, | |
| "rewards/rejected": -10.932621955871582, | |
| "sft_loss": 0.9538100957870483, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.6821170282708744, | |
| "grad_norm": 16.632773914957095, | |
| "learning_rate": 2.2915361685578235e-07, | |
| "logits/chosen": 18.390525817871094, | |
| "logits/rejected": 19.31244468688965, | |
| "logps/chosen": -259.29205322265625, | |
| "logps/rejected": -189.3291015625, | |
| "loss": 0.5501, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.290169715881348, | |
| "rewards/margins": 5.542262077331543, | |
| "rewards/rejected": -10.83243179321289, | |
| "sft_loss": 0.9607923030853271, | |
| "step": 415 | |
| }, | |
| { | |
| "epoch": 0.6903353057199211, | |
| "grad_norm": 14.010413486772263, | |
| "learning_rate": 2.2852251073616503e-07, | |
| "logits/chosen": 17.323869705200195, | |
| "logits/rejected": 18.94650650024414, | |
| "logps/chosen": -282.4395751953125, | |
| "logps/rejected": -215.9941864013672, | |
| "loss": 0.4948, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.772212505340576, | |
| "rewards/margins": 6.878769397735596, | |
| "rewards/rejected": -12.650981903076172, | |
| "sft_loss": 0.993140697479248, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.6985535831689678, | |
| "grad_norm": 14.508340310090572, | |
| "learning_rate": 2.2788288979014132e-07, | |
| "logits/chosen": 18.25994300842285, | |
| "logits/rejected": 19.41350555419922, | |
| "logps/chosen": -279.428955078125, | |
| "logps/rejected": -197.93687438964844, | |
| "loss": 0.5473, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -5.4432454109191895, | |
| "rewards/margins": 5.909384250640869, | |
| "rewards/rejected": -11.352629661560059, | |
| "sft_loss": 0.9294517040252686, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.7067718606180144, | |
| "grad_norm": 15.828121421000128, | |
| "learning_rate": 2.2723480662715134e-07, | |
| "logits/chosen": 17.447628021240234, | |
| "logits/rejected": 18.819887161254883, | |
| "logps/chosen": -253.06153869628906, | |
| "logps/rejected": -190.72598266601562, | |
| "loss": 0.5712, | |
| "rewards/accuracies": 0.8899999856948853, | |
| "rewards/chosen": -5.495950698852539, | |
| "rewards/margins": 5.677833080291748, | |
| "rewards/rejected": -11.173783302307129, | |
| "sft_loss": 1.0165560245513916, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.7149901380670611, | |
| "grad_norm": 21.070659832772854, | |
| "learning_rate": 2.2657831455266063e-07, | |
| "logits/chosen": 19.03611946105957, | |
| "logits/rejected": 19.757238388061523, | |
| "logps/chosen": -281.93084716796875, | |
| "logps/rejected": -194.18865966796875, | |
| "loss": 0.6137, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -5.303485870361328, | |
| "rewards/margins": 5.8611884117126465, | |
| "rewards/rejected": -11.164673805236816, | |
| "sft_loss": 1.0157676935195923, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 0.7232084155161078, | |
| "grad_norm": 10.044668338093802, | |
| "learning_rate": 2.2591346756377588e-07, | |
| "logits/chosen": 18.8349666595459, | |
| "logits/rejected": 19.587926864624023, | |
| "logps/chosen": -245.26052856445312, | |
| "logps/rejected": -174.76011657714844, | |
| "loss": 0.5325, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -4.77711820602417, | |
| "rewards/margins": 5.245749473571777, | |
| "rewards/rejected": -10.022867202758789, | |
| "sft_loss": 0.9105268120765686, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.7314266929651545, | |
| "grad_norm": 13.114453854538773, | |
| "learning_rate": 2.252403203448034e-07, | |
| "logits/chosen": 19.10161781311035, | |
| "logits/rejected": 20.04970932006836, | |
| "logps/chosen": -325.4466552734375, | |
| "logps/rejected": -227.55043029785156, | |
| "loss": 0.5582, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -5.897343635559082, | |
| "rewards/margins": 6.912624359130859, | |
| "rewards/rejected": -12.809967994689941, | |
| "sft_loss": 0.9535994529724121, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 0.7396449704142012, | |
| "grad_norm": 12.5969825666755, | |
| "learning_rate": 2.2455892826275155e-07, | |
| "logits/chosen": 18.5415096282959, | |
| "logits/rejected": 19.55573844909668, | |
| "logps/chosen": -302.2394714355469, | |
| "logps/rejected": -217.98895263671875, | |
| "loss": 0.5556, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -6.171204090118408, | |
| "rewards/margins": 6.812131881713867, | |
| "rewards/rejected": -12.9833345413208, | |
| "sft_loss": 0.9671850800514221, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7478632478632479, | |
| "grad_norm": 11.483896112432117, | |
| "learning_rate": 2.2386934736277666e-07, | |
| "logits/chosen": 18.071735382080078, | |
| "logits/rejected": 19.025733947753906, | |
| "logps/chosen": -237.59962463378906, | |
| "logps/rejected": -185.32635498046875, | |
| "loss": 0.577, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -5.7157416343688965, | |
| "rewards/margins": 5.618371963500977, | |
| "rewards/rejected": -11.334112167358398, | |
| "sft_loss": 0.9591123461723328, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 0.7560815253122946, | |
| "grad_norm": 13.120210730356671, | |
| "learning_rate": 2.2317163436357317e-07, | |
| "logits/chosen": 16.842187881469727, | |
| "logits/rejected": 18.437271118164062, | |
| "logps/chosen": -282.98541259765625, | |
| "logps/rejected": -213.07257080078125, | |
| "loss": 0.5363, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.7529802322387695, | |
| "rewards/margins": 6.836727142333984, | |
| "rewards/rejected": -12.589707374572754, | |
| "sft_loss": 0.9440767168998718, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.7642998027613412, | |
| "grad_norm": 12.516354265498741, | |
| "learning_rate": 2.2246584665270855e-07, | |
| "logits/chosen": 18.161880493164062, | |
| "logits/rejected": 19.371177673339844, | |
| "logps/chosen": -298.9051513671875, | |
| "logps/rejected": -213.79953002929688, | |
| "loss": 0.4837, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.515788555145264, | |
| "rewards/margins": 6.570387363433838, | |
| "rewards/rejected": -12.086176872253418, | |
| "sft_loss": 0.9586593508720398, | |
| "step": 465 | |
| }, | |
| { | |
| "epoch": 0.7725180802103879, | |
| "grad_norm": 11.048153129151439, | |
| "learning_rate": 2.2175204228190308e-07, | |
| "logits/chosen": 18.859655380249023, | |
| "logits/rejected": 20.116731643676758, | |
| "logps/chosen": -261.10186767578125, | |
| "logps/rejected": -194.5068817138672, | |
| "loss": 0.6008, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.460696220397949, | |
| "rewards/margins": 5.922670841217041, | |
| "rewards/rejected": -11.383367538452148, | |
| "sft_loss": 0.9851782321929932, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.7807363576594346, | |
| "grad_norm": 34.036831132798504, | |
| "learning_rate": 2.2103027996225512e-07, | |
| "logits/chosen": 17.431440353393555, | |
| "logits/rejected": 18.033245086669922, | |
| "logps/chosen": -278.5311584472656, | |
| "logps/rejected": -198.3171844482422, | |
| "loss": 0.5997, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -5.360807418823242, | |
| "rewards/margins": 6.381589412689209, | |
| "rewards/rejected": -11.74239730834961, | |
| "sft_loss": 1.0034022331237793, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.7889546351084813, | |
| "grad_norm": 14.859702493359293, | |
| "learning_rate": 2.2030061905941193e-07, | |
| "logits/chosen": 18.73612403869629, | |
| "logits/rejected": 18.83433723449707, | |
| "logps/chosen": -264.3339538574219, | |
| "logps/rejected": -190.15017700195312, | |
| "loss": 0.5072, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -5.439321517944336, | |
| "rewards/margins": 5.989686489105225, | |
| "rewards/rejected": -11.429006576538086, | |
| "sft_loss": 0.9705156087875366, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.797172912557528, | |
| "grad_norm": 10.75919165569494, | |
| "learning_rate": 2.1956311958868684e-07, | |
| "logits/chosen": 19.243186950683594, | |
| "logits/rejected": 19.267446517944336, | |
| "logps/chosen": -267.3321228027344, | |
| "logps/rejected": -196.00926208496094, | |
| "loss": 0.4832, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -5.953473091125488, | |
| "rewards/margins": 5.860842227935791, | |
| "rewards/rejected": -11.814314842224121, | |
| "sft_loss": 0.9466427564620972, | |
| "step": 485 | |
| }, | |
| { | |
| "epoch": 0.8053911900065747, | |
| "grad_norm": 17.23206010012729, | |
| "learning_rate": 2.1881784221012307e-07, | |
| "logits/chosen": 17.544191360473633, | |
| "logits/rejected": 18.491127014160156, | |
| "logps/chosen": -250.6893768310547, | |
| "logps/rejected": -189.68630981445312, | |
| "loss": 0.5522, | |
| "rewards/accuracies": 0.9100000262260437, | |
| "rewards/chosen": -6.418759822845459, | |
| "rewards/margins": 5.677851676940918, | |
| "rewards/rejected": -12.096611022949219, | |
| "sft_loss": 1.0340924263000488, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.8136094674556213, | |
| "grad_norm": 14.38672703795697, | |
| "learning_rate": 2.1806484822350417e-07, | |
| "logits/chosen": 17.07558250427246, | |
| "logits/rejected": 17.701539993286133, | |
| "logps/chosen": -301.8546142578125, | |
| "logps/rejected": -211.86402893066406, | |
| "loss": 0.511, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -6.485326766967773, | |
| "rewards/margins": 6.297828197479248, | |
| "rewards/rejected": -12.78315544128418, | |
| "sft_loss": 1.0085182189941406, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 0.821827744904668, | |
| "grad_norm": 11.220505543423183, | |
| "learning_rate": 2.1730419956331215e-07, | |
| "logits/chosen": 17.45648956298828, | |
| "logits/rejected": 18.378616333007812, | |
| "logps/chosen": -281.8039245605469, | |
| "logps/rejected": -211.0707550048828, | |
| "loss": 0.4967, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -5.824225425720215, | |
| "rewards/margins": 6.649372577667236, | |
| "rewards/rejected": -12.47359848022461, | |
| "sft_loss": 0.9624088406562805, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8300460223537146, | |
| "grad_norm": 19.974838378014, | |
| "learning_rate": 2.1653595879363335e-07, | |
| "logits/chosen": 18.410470962524414, | |
| "logits/rejected": 18.558494567871094, | |
| "logps/chosen": -267.88653564453125, | |
| "logps/rejected": -197.4770050048828, | |
| "loss": 0.5762, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.227014541625977, | |
| "rewards/margins": 6.174468517303467, | |
| "rewards/rejected": -12.401481628417969, | |
| "sft_loss": 0.9929137229919434, | |
| "step": 505 | |
| }, | |
| { | |
| "epoch": 0.8382642998027613, | |
| "grad_norm": 20.289642932843638, | |
| "learning_rate": 2.1576018910301238e-07, | |
| "logits/chosen": 18.445819854736328, | |
| "logits/rejected": 18.456052780151367, | |
| "logps/chosen": -268.7127990722656, | |
| "logps/rejected": -191.65673828125, | |
| "loss": 0.5308, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -6.160595893859863, | |
| "rewards/margins": 5.674745559692383, | |
| "rewards/rejected": -11.835343360900879, | |
| "sft_loss": 0.9606292843818665, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.846482577251808, | |
| "grad_norm": 12.060714182430129, | |
| "learning_rate": 2.1497695429925497e-07, | |
| "logits/chosen": 17.933076858520508, | |
| "logits/rejected": 18.939220428466797, | |
| "logps/chosen": -267.7327575683594, | |
| "logps/rejected": -197.41754150390625, | |
| "loss": 0.5127, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -5.445801258087158, | |
| "rewards/margins": 6.1840291023254395, | |
| "rewards/rejected": -11.629830360412598, | |
| "sft_loss": 0.8621335029602051, | |
| "step": 515 | |
| }, | |
| { | |
| "epoch": 0.8547008547008547, | |
| "grad_norm": 10.501846825508975, | |
| "learning_rate": 2.1418631880417954e-07, | |
| "logits/chosen": 17.952999114990234, | |
| "logits/rejected": 19.42998504638672, | |
| "logps/chosen": -270.5357360839844, | |
| "logps/rejected": -212.4191436767578, | |
| "loss": 0.5705, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -6.491232872009277, | |
| "rewards/margins": 6.157339096069336, | |
| "rewards/rejected": -12.648571968078613, | |
| "sft_loss": 1.0165194272994995, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.8629191321499013, | |
| "grad_norm": 38.938347224135214, | |
| "learning_rate": 2.1338834764831843e-07, | |
| "logits/chosen": 18.03480339050293, | |
| "logits/rejected": 18.895524978637695, | |
| "logps/chosen": -288.3295593261719, | |
| "logps/rejected": -212.9174041748047, | |
| "loss": 0.5076, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.212762355804443, | |
| "rewards/margins": 6.556905746459961, | |
| "rewards/rejected": -12.769665718078613, | |
| "sft_loss": 1.0657466650009155, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.871137409598948, | |
| "grad_norm": 23.662606552485556, | |
| "learning_rate": 2.125831064655693e-07, | |
| "logits/chosen": 18.570951461791992, | |
| "logits/rejected": 19.01372528076172, | |
| "logps/chosen": -299.0896911621094, | |
| "logps/rejected": -218.2689666748047, | |
| "loss": 0.4869, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -6.196591377258301, | |
| "rewards/margins": 6.7210693359375, | |
| "rewards/rejected": -12.9176607131958, | |
| "sft_loss": 1.0185062885284424, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.8793556870479947, | |
| "grad_norm": 19.788570154737137, | |
| "learning_rate": 2.1177066148779655e-07, | |
| "logits/chosen": 18.860197067260742, | |
| "logits/rejected": 19.767044067382812, | |
| "logps/chosen": -318.2361755371094, | |
| "logps/rejected": -226.54783630371094, | |
| "loss": 0.5328, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.095911979675293, | |
| "rewards/margins": 7.498478412628174, | |
| "rewards/rejected": -13.594389915466309, | |
| "sft_loss": 0.9245139360427856, | |
| "step": 535 | |
| }, | |
| { | |
| "epoch": 0.8875739644970414, | |
| "grad_norm": 9.861201904757298, | |
| "learning_rate": 2.1095107953938348e-07, | |
| "logits/chosen": 18.201683044433594, | |
| "logits/rejected": 18.54186248779297, | |
| "logps/chosen": -252.76708984375, | |
| "logps/rejected": -189.79519653320312, | |
| "loss": 0.491, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -6.304187774658203, | |
| "rewards/margins": 5.595078945159912, | |
| "rewards/rejected": -11.899266242980957, | |
| "sft_loss": 1.0021482706069946, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.8957922419460881, | |
| "grad_norm": 12.854026542061266, | |
| "learning_rate": 2.1012442803173634e-07, | |
| "logits/chosen": 16.392040252685547, | |
| "logits/rejected": 18.43426513671875, | |
| "logps/chosen": -268.9873962402344, | |
| "logps/rejected": -213.36622619628906, | |
| "loss": 0.452, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -6.529672622680664, | |
| "rewards/margins": 6.670236110687256, | |
| "rewards/rejected": -13.199908256530762, | |
| "sft_loss": 1.0502568483352661, | |
| "step": 545 | |
| }, | |
| { | |
| "epoch": 0.9040105193951348, | |
| "grad_norm": 14.317934082382363, | |
| "learning_rate": 2.0929077495773927e-07, | |
| "logits/chosen": 17.196094512939453, | |
| "logits/rejected": 18.512819290161133, | |
| "logps/chosen": -301.5859375, | |
| "logps/rejected": -215.9300994873047, | |
| "loss": 0.5177, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -6.289539813995361, | |
| "rewards/margins": 7.147468090057373, | |
| "rewards/rejected": -13.43700885772705, | |
| "sft_loss": 1.052231788635254, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9122287968441815, | |
| "grad_norm": 13.793660373919764, | |
| "learning_rate": 2.0845018888616212e-07, | |
| "logits/chosen": 17.761926651000977, | |
| "logits/rejected": 18.349868774414062, | |
| "logps/chosen": -275.8336486816406, | |
| "logps/rejected": -202.1535186767578, | |
| "loss": 0.4794, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -5.62368106842041, | |
| "rewards/margins": 6.281108856201172, | |
| "rewards/rejected": -11.904790878295898, | |
| "sft_loss": 0.9447892904281616, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 0.9204470742932281, | |
| "grad_norm": 13.501353742225147, | |
| "learning_rate": 2.0760273895602037e-07, | |
| "logits/chosen": 17.632814407348633, | |
| "logits/rejected": 17.65854263305664, | |
| "logps/chosen": -254.25704956054688, | |
| "logps/rejected": -177.63784790039062, | |
| "loss": 0.5335, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -5.114619255065918, | |
| "rewards/margins": 5.592235565185547, | |
| "rewards/rejected": -10.706855773925781, | |
| "sft_loss": 0.9995157718658447, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.9286653517422748, | |
| "grad_norm": 19.535542998103256, | |
| "learning_rate": 2.0674849487088864e-07, | |
| "logits/chosen": 18.379846572875977, | |
| "logits/rejected": 19.475313186645508, | |
| "logps/chosen": -249.86785888671875, | |
| "logps/rejected": -187.93824768066406, | |
| "loss": 0.5958, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -5.827848434448242, | |
| "rewards/margins": 5.467617034912109, | |
| "rewards/rejected": -11.295466423034668, | |
| "sft_loss": 0.9322109222412109, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 0.9368836291913215, | |
| "grad_norm": 25.195757238729385, | |
| "learning_rate": 2.0588752689316723e-07, | |
| "logits/chosen": 18.46122169494629, | |
| "logits/rejected": 18.586881637573242, | |
| "logps/chosen": -286.5140075683594, | |
| "logps/rejected": -202.23248291015625, | |
| "loss": 0.5319, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -6.130897045135498, | |
| "rewards/margins": 6.1991753578186035, | |
| "rewards/rejected": -12.330072402954102, | |
| "sft_loss": 0.924500048160553, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.9451019066403682, | |
| "grad_norm": 14.694663908634908, | |
| "learning_rate": 2.0501990583830315e-07, | |
| "logits/chosen": 17.5371036529541, | |
| "logits/rejected": 18.469070434570312, | |
| "logps/chosen": -274.0564270019531, | |
| "logps/rejected": -211.01268005371094, | |
| "loss": 0.4981, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -6.664008617401123, | |
| "rewards/margins": 6.217647552490234, | |
| "rewards/rejected": -12.8816556930542, | |
| "sft_loss": 1.0239460468292236, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.9533201840894149, | |
| "grad_norm": 8.507356630817076, | |
| "learning_rate": 2.0414570306896536e-07, | |
| "logits/chosen": 17.411376953125, | |
| "logits/rejected": 18.47208023071289, | |
| "logps/chosen": -295.3019714355469, | |
| "logps/rejected": -213.13792419433594, | |
| "loss": 0.5512, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -6.6735124588012695, | |
| "rewards/margins": 6.6261305809021, | |
| "rewards/rejected": -13.299642562866211, | |
| "sft_loss": 1.529820442199707, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.9615384615384616, | |
| "grad_norm": 25.681414018757476, | |
| "learning_rate": 2.0326499048917527e-07, | |
| "logits/chosen": 17.31963348388672, | |
| "logits/rejected": 18.280134201049805, | |
| "logps/chosen": -282.2524108886719, | |
| "logps/rejected": -218.47996520996094, | |
| "loss": 0.5755, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.157464981079102, | |
| "rewards/margins": 6.622015953063965, | |
| "rewards/rejected": -13.779480934143066, | |
| "sft_loss": 0.9510271549224854, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 0.9697567389875082, | |
| "grad_norm": 13.878204470039535, | |
| "learning_rate": 2.023778405383925e-07, | |
| "logits/chosen": 18.141050338745117, | |
| "logits/rejected": 18.204177856445312, | |
| "logps/chosen": -273.6821594238281, | |
| "logps/rejected": -200.89984130859375, | |
| "loss": 0.4418, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -5.963834762573242, | |
| "rewards/margins": 6.579600811004639, | |
| "rewards/rejected": -12.543435096740723, | |
| "sft_loss": 0.9940951466560364, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.9779750164365549, | |
| "grad_norm": 11.452199407752436, | |
| "learning_rate": 2.0148432618555651e-07, | |
| "logits/chosen": 18.627866744995117, | |
| "logits/rejected": 18.42972755432129, | |
| "logps/chosen": -258.9418029785156, | |
| "logps/rejected": -185.6231231689453, | |
| "loss": 0.5262, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -5.471505165100098, | |
| "rewards/margins": 5.772936820983887, | |
| "rewards/rejected": -11.244441032409668, | |
| "sft_loss": 0.9383735060691833, | |
| "step": 595 | |
| }, | |
| { | |
| "epoch": 0.9861932938856016, | |
| "grad_norm": 11.942794396918284, | |
| "learning_rate": 2.005845209230851e-07, | |
| "logits/chosen": 18.03531265258789, | |
| "logits/rejected": 18.720346450805664, | |
| "logps/chosen": -292.6284484863281, | |
| "logps/rejected": -217.44017028808594, | |
| "loss": 0.5167, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -6.853020191192627, | |
| "rewards/margins": 6.340816497802734, | |
| "rewards/rejected": -13.193839073181152, | |
| "sft_loss": 1.0825438499450684, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9944115713346483, | |
| "grad_norm": 11.995957867465538, | |
| "learning_rate": 1.9967849876082937e-07, | |
| "logits/chosen": 16.612958908081055, | |
| "logits/rejected": 17.676807403564453, | |
| "logps/chosen": -290.99993896484375, | |
| "logps/rejected": -217.08941650390625, | |
| "loss": 0.5367, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -6.961750030517578, | |
| "rewards/margins": 6.5437798500061035, | |
| "rewards/rejected": -13.505529403686523, | |
| "sft_loss": 1.0639195442199707, | |
| "step": 605 | |
| }, | |
| { | |
| "epoch": 1.0026298487836949, | |
| "grad_norm": 10.297644271924568, | |
| "learning_rate": 1.9876633421998652e-07, | |
| "logits/chosen": 17.37873649597168, | |
| "logits/rejected": 18.0369815826416, | |
| "logps/chosen": -277.8174133300781, | |
| "logps/rejected": -203.3291473388672, | |
| "loss": 0.4734, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.353253364562988, | |
| "rewards/margins": 6.258001804351807, | |
| "rewards/rejected": -12.611254692077637, | |
| "sft_loss": 0.9542250037193298, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.0108481262327416, | |
| "grad_norm": 11.471429971847657, | |
| "learning_rate": 1.9784810232697024e-07, | |
| "logits/chosen": 17.6014461517334, | |
| "logits/rejected": 18.502716064453125, | |
| "logps/chosen": -295.8468017578125, | |
| "logps/rejected": -225.82949829101562, | |
| "loss": 0.4473, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -6.305618762969971, | |
| "rewards/margins": 7.557163238525391, | |
| "rewards/rejected": -13.862781524658203, | |
| "sft_loss": 0.9756129384040833, | |
| "step": 615 | |
| }, | |
| { | |
| "epoch": 1.0190664036817882, | |
| "grad_norm": 14.22423049629626, | |
| "learning_rate": 1.969238786072398e-07, | |
| "logits/chosen": 17.072832107543945, | |
| "logits/rejected": 17.857742309570312, | |
| "logps/chosen": -318.9200134277344, | |
| "logps/rejected": -236.0108184814453, | |
| "loss": 0.423, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.207548141479492, | |
| "rewards/margins": 7.39101505279541, | |
| "rewards/rejected": -14.598563194274902, | |
| "sft_loss": 0.9570875763893127, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.027284681130835, | |
| "grad_norm": 14.863752308544749, | |
| "learning_rate": 1.9599373907908803e-07, | |
| "logits/chosen": 18.62479591369629, | |
| "logits/rejected": 19.332067489624023, | |
| "logps/chosen": -311.5079650878906, | |
| "logps/rejected": -230.38861083984375, | |
| "loss": 0.4746, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.520875453948975, | |
| "rewards/margins": 7.087317943572998, | |
| "rewards/rejected": -14.608192443847656, | |
| "sft_loss": 1.0305228233337402, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 1.0355029585798816, | |
| "grad_norm": 11.389098298703924, | |
| "learning_rate": 1.9505776024738873e-07, | |
| "logits/chosen": 17.646556854248047, | |
| "logits/rejected": 18.52758026123047, | |
| "logps/chosen": -267.45611572265625, | |
| "logps/rejected": -202.84034729003906, | |
| "loss": 0.494, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -6.995048999786377, | |
| "rewards/margins": 5.844033241271973, | |
| "rewards/rejected": -12.839081764221191, | |
| "sft_loss": 1.0837846994400024, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.0437212360289283, | |
| "grad_norm": 17.383619355827555, | |
| "learning_rate": 1.9411601909730397e-07, | |
| "logits/chosen": 16.90384292602539, | |
| "logits/rejected": 17.69657325744629, | |
| "logps/chosen": -276.2812805175781, | |
| "logps/rejected": -210.5614471435547, | |
| "loss": 0.5568, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -6.192663669586182, | |
| "rewards/margins": 6.900697231292725, | |
| "rewards/rejected": -13.093358993530273, | |
| "sft_loss": 1.2382417917251587, | |
| "step": 635 | |
| }, | |
| { | |
| "epoch": 1.051939513477975, | |
| "grad_norm": 15.094044445712935, | |
| "learning_rate": 1.9316859308795215e-07, | |
| "logits/chosen": 16.81202507019043, | |
| "logits/rejected": 18.695880889892578, | |
| "logps/chosen": -257.9354553222656, | |
| "logps/rejected": -203.78866577148438, | |
| "loss": 0.5268, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -6.167855262756348, | |
| "rewards/margins": 6.644321441650391, | |
| "rewards/rejected": -12.812177658081055, | |
| "sft_loss": 1.173020839691162, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.0601577909270217, | |
| "grad_norm": 15.863163074258626, | |
| "learning_rate": 1.9221556014603674e-07, | |
| "logits/chosen": 16.538555145263672, | |
| "logits/rejected": 18.44594955444336, | |
| "logps/chosen": -299.3294982910156, | |
| "logps/rejected": -236.79315185546875, | |
| "loss": 0.4933, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.601771354675293, | |
| "rewards/margins": 7.276884078979492, | |
| "rewards/rejected": -14.878654479980469, | |
| "sft_loss": 1.1147685050964355, | |
| "step": 645 | |
| }, | |
| { | |
| "epoch": 1.0683760683760684, | |
| "grad_norm": 12.95009158653796, | |
| "learning_rate": 1.9125699865943696e-07, | |
| "logits/chosen": 17.819013595581055, | |
| "logits/rejected": 18.056425094604492, | |
| "logps/chosen": -280.44134521484375, | |
| "logps/rejected": -211.0347900390625, | |
| "loss": 0.4992, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -6.4677534103393555, | |
| "rewards/margins": 6.797198295593262, | |
| "rewards/rejected": -13.26495361328125, | |
| "sft_loss": 1.0369815826416016, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.076594345825115, | |
| "grad_norm": 9.53030890727526, | |
| "learning_rate": 1.9029298747076e-07, | |
| "logits/chosen": 18.56303596496582, | |
| "logits/rejected": 19.128713607788086, | |
| "logps/chosen": -301.52069091796875, | |
| "logps/rejected": -222.11752319335938, | |
| "loss": 0.4653, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.511043071746826, | |
| "rewards/margins": 7.3326520919799805, | |
| "rewards/rejected": -13.843696594238281, | |
| "sft_loss": 1.039981722831726, | |
| "step": 655 | |
| }, | |
| { | |
| "epoch": 1.0848126232741617, | |
| "grad_norm": 12.49460951335956, | |
| "learning_rate": 1.893236058708565e-07, | |
| "logits/chosen": 17.331298828125, | |
| "logits/rejected": 18.1816463470459, | |
| "logps/chosen": -290.297607421875, | |
| "logps/rejected": -212.6442413330078, | |
| "loss": 0.4897, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.593270301818848, | |
| "rewards/margins": 6.5445356369018555, | |
| "rewards/rejected": -13.137805938720703, | |
| "sft_loss": 1.0305876731872559, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.0930309007232084, | |
| "grad_norm": 10.084660494140396, | |
| "learning_rate": 1.8834893359229839e-07, | |
| "logits/chosen": 17.249683380126953, | |
| "logits/rejected": 18.377492904663086, | |
| "logps/chosen": -317.7668151855469, | |
| "logps/rejected": -234.8712158203125, | |
| "loss": 0.4925, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -6.917696952819824, | |
| "rewards/margins": 7.316926956176758, | |
| "rewards/rejected": -14.234623908996582, | |
| "sft_loss": 1.0477817058563232, | |
| "step": 665 | |
| }, | |
| { | |
| "epoch": 1.101249178172255, | |
| "grad_norm": 11.370135962731284, | |
| "learning_rate": 1.8736905080282117e-07, | |
| "logits/chosen": 17.393232345581055, | |
| "logits/rejected": 18.21647071838379, | |
| "logps/chosen": -291.6396789550781, | |
| "logps/rejected": -215.71307373046875, | |
| "loss": 0.5118, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.400353908538818, | |
| "rewards/margins": 6.503895282745361, | |
| "rewards/rejected": -12.904250144958496, | |
| "sft_loss": 1.0789752006530762, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.1094674556213018, | |
| "grad_norm": 14.128398069389478, | |
| "learning_rate": 1.8638403809872988e-07, | |
| "logits/chosen": 18.000486373901367, | |
| "logits/rejected": 19.02123260498047, | |
| "logps/chosen": -238.9346923828125, | |
| "logps/rejected": -187.83901977539062, | |
| "loss": 0.4881, | |
| "rewards/accuracies": 0.9100000262260437, | |
| "rewards/chosen": -5.991827011108398, | |
| "rewards/margins": 6.166553974151611, | |
| "rewards/rejected": -12.158380508422852, | |
| "sft_loss": 1.0633037090301514, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 1.1176857330703485, | |
| "grad_norm": 10.039232848979895, | |
| "learning_rate": 1.8539397649826993e-07, | |
| "logits/chosen": 17.416231155395508, | |
| "logits/rejected": 18.53554344177246, | |
| "logps/chosen": -271.6786193847656, | |
| "logps/rejected": -208.55459594726562, | |
| "loss": 0.4408, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.946457386016846, | |
| "rewards/margins": 6.493756294250488, | |
| "rewards/rejected": -13.440213203430176, | |
| "sft_loss": 1.0465832948684692, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.1259040105193951, | |
| "grad_norm": 17.7290983481912, | |
| "learning_rate": 1.8439894743496336e-07, | |
| "logits/chosen": 17.006452560424805, | |
| "logits/rejected": 17.804595947265625, | |
| "logps/chosen": -289.0384826660156, | |
| "logps/rejected": -228.98916625976562, | |
| "loss": 0.464, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.2547478675842285, | |
| "rewards/margins": 7.524634838104248, | |
| "rewards/rejected": -14.779382705688477, | |
| "sft_loss": 1.0623209476470947, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 1.1341222879684418, | |
| "grad_norm": 15.995020113178853, | |
| "learning_rate": 1.8339903275091085e-07, | |
| "logits/chosen": 17.363964080810547, | |
| "logits/rejected": 18.096250534057617, | |
| "logps/chosen": -313.4389343261719, | |
| "logps/rejected": -239.9541015625, | |
| "loss": 0.4292, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.249270439147949, | |
| "rewards/margins": 7.737963676452637, | |
| "rewards/rejected": -14.987234115600586, | |
| "sft_loss": 1.1172467470169067, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.1423405654174885, | |
| "grad_norm": 10.290356468777885, | |
| "learning_rate": 1.8239431469006e-07, | |
| "logits/chosen": 16.6265811920166, | |
| "logits/rejected": 18.333799362182617, | |
| "logps/chosen": -268.6365966796875, | |
| "logps/rejected": -221.0557098388672, | |
| "loss": 0.4627, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -6.95206356048584, | |
| "rewards/margins": 7.242475986480713, | |
| "rewards/rejected": -14.194538116455078, | |
| "sft_loss": 1.2080581188201904, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 1.1505588428665352, | |
| "grad_norm": 12.079608347733119, | |
| "learning_rate": 1.8138487589144093e-07, | |
| "logits/chosen": 16.631559371948242, | |
| "logits/rejected": 16.87362289428711, | |
| "logps/chosen": -273.40997314453125, | |
| "logps/rejected": -210.4160614013672, | |
| "loss": 0.5063, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.272107124328613, | |
| "rewards/margins": 6.501527786254883, | |
| "rewards/rejected": -13.77363395690918, | |
| "sft_loss": 1.0478310585021973, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.1587771203155819, | |
| "grad_norm": 17.778097749378432, | |
| "learning_rate": 1.8037079938236894e-07, | |
| "logits/chosen": 17.234224319458008, | |
| "logits/rejected": 18.432863235473633, | |
| "logps/chosen": -281.38458251953125, | |
| "logps/rejected": -223.9882049560547, | |
| "loss": 0.4823, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.636561870574951, | |
| "rewards/margins": 7.072784423828125, | |
| "rewards/rejected": -14.709345817565918, | |
| "sft_loss": 0.9729472398757935, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 1.1669953977646286, | |
| "grad_norm": 13.760102505142987, | |
| "learning_rate": 1.793521685716154e-07, | |
| "logits/chosen": 17.158409118652344, | |
| "logits/rejected": 18.147829055786133, | |
| "logps/chosen": -339.1050720214844, | |
| "logps/rejected": -257.5541687011719, | |
| "loss": 0.4268, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.170562744140625, | |
| "rewards/margins": 8.136800765991211, | |
| "rewards/rejected": -16.307363510131836, | |
| "sft_loss": 1.087196946144104, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.1752136752136753, | |
| "grad_norm": 12.543576537196508, | |
| "learning_rate": 1.7832906724254747e-07, | |
| "logits/chosen": 16.710582733154297, | |
| "logits/rejected": 17.746997833251953, | |
| "logps/chosen": -279.0878601074219, | |
| "logps/rejected": -217.86927795410156, | |
| "loss": 0.4347, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.324019908905029, | |
| "rewards/margins": 6.887091636657715, | |
| "rewards/rejected": -14.211112022399902, | |
| "sft_loss": 1.0954669713974, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 1.183431952662722, | |
| "grad_norm": 14.156394204679035, | |
| "learning_rate": 1.7730157954623685e-07, | |
| "logits/chosen": 17.9290828704834, | |
| "logits/rejected": 17.706289291381836, | |
| "logps/chosen": -284.99176025390625, | |
| "logps/rejected": -210.2812957763672, | |
| "loss": 0.5001, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.146309852600098, | |
| "rewards/margins": 6.681734085083008, | |
| "rewards/rejected": -13.828044891357422, | |
| "sft_loss": 1.0680426359176636, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.1916502301117686, | |
| "grad_norm": 12.575179703681824, | |
| "learning_rate": 1.7626978999453794e-07, | |
| "logits/chosen": 17.4116268157959, | |
| "logits/rejected": 17.362062454223633, | |
| "logps/chosen": -319.6551818847656, | |
| "logps/rejected": -242.6376495361328, | |
| "loss": 0.3929, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.572165012359619, | |
| "rewards/margins": 7.830206871032715, | |
| "rewards/rejected": -15.402371406555176, | |
| "sft_loss": 1.0497316122055054, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 1.1998685075608153, | |
| "grad_norm": 9.969097695004054, | |
| "learning_rate": 1.7523378345313714e-07, | |
| "logits/chosen": 17.700010299682617, | |
| "logits/rejected": 18.3839168548584, | |
| "logps/chosen": -291.83917236328125, | |
| "logps/rejected": -215.37081909179688, | |
| "loss": 0.5242, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.1273322105407715, | |
| "rewards/margins": 6.290266036987305, | |
| "rewards/rejected": -13.417597770690918, | |
| "sft_loss": 1.382573127746582, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.208086785009862, | |
| "grad_norm": 17.17576749860381, | |
| "learning_rate": 1.741936451345722e-07, | |
| "logits/chosen": 18.578615188598633, | |
| "logits/rejected": 19.108678817749023, | |
| "logps/chosen": -271.18505859375, | |
| "logps/rejected": -205.25746154785156, | |
| "loss": 0.4562, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -6.272554397583008, | |
| "rewards/margins": 6.781675815582275, | |
| "rewards/rejected": -13.054230690002441, | |
| "sft_loss": 1.151402473449707, | |
| "step": 735 | |
| }, | |
| { | |
| "epoch": 1.2163050624589087, | |
| "grad_norm": 17.314304500732653, | |
| "learning_rate": 1.731494605912235e-07, | |
| "logits/chosen": 17.34149932861328, | |
| "logits/rejected": 18.757190704345703, | |
| "logps/chosen": -262.0509948730469, | |
| "logps/rejected": -208.38226318359375, | |
| "loss": 0.4598, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -6.2556867599487305, | |
| "rewards/margins": 6.655214309692383, | |
| "rewards/rejected": -12.910900115966797, | |
| "sft_loss": 1.0516655445098877, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.2245233399079554, | |
| "grad_norm": 15.379389005940164, | |
| "learning_rate": 1.721013157082774e-07, | |
| "logits/chosen": 16.926176071166992, | |
| "logits/rejected": 18.068889617919922, | |
| "logps/chosen": -276.72833251953125, | |
| "logps/rejected": -224.33856201171875, | |
| "loss": 0.4921, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.5205397605896, | |
| "rewards/margins": 6.801075458526611, | |
| "rewards/rejected": -14.321615219116211, | |
| "sft_loss": 1.0424396991729736, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 1.232741617357002, | |
| "grad_norm": 16.009052812361457, | |
| "learning_rate": 1.7104929669666194e-07, | |
| "logits/chosen": 16.49311065673828, | |
| "logits/rejected": 17.206867218017578, | |
| "logps/chosen": -299.70855712890625, | |
| "logps/rejected": -234.7362060546875, | |
| "loss": 0.5132, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.55043888092041, | |
| "rewards/margins": 7.260469436645508, | |
| "rewards/rejected": -14.810908317565918, | |
| "sft_loss": 1.148091197013855, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.2409598948060487, | |
| "grad_norm": 12.479892072042215, | |
| "learning_rate": 1.69993490085956e-07, | |
| "logits/chosen": 16.645790100097656, | |
| "logits/rejected": 18.348690032958984, | |
| "logps/chosen": -289.54217529296875, | |
| "logps/rejected": -232.9552001953125, | |
| "loss": 0.4746, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.2228593826293945, | |
| "rewards/margins": 7.266669273376465, | |
| "rewards/rejected": -14.48952865600586, | |
| "sft_loss": 1.0830727815628052, | |
| "step": 755 | |
| }, | |
| { | |
| "epoch": 1.2491781722550954, | |
| "grad_norm": 13.701336630947893, | |
| "learning_rate": 1.6893398271727222e-07, | |
| "logits/chosen": 17.36661148071289, | |
| "logits/rejected": 18.305465698242188, | |
| "logps/chosen": -300.6762390136719, | |
| "logps/rejected": -228.61175537109375, | |
| "loss": 0.4574, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.493809223175049, | |
| "rewards/margins": 7.260177135467529, | |
| "rewards/rejected": -14.753986358642578, | |
| "sft_loss": 1.016793966293335, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.2573964497041419, | |
| "grad_norm": 10.12301776047569, | |
| "learning_rate": 1.6787086173611407e-07, | |
| "logits/chosen": 17.593551635742188, | |
| "logits/rejected": 18.34381675720215, | |
| "logps/chosen": -280.0817565917969, | |
| "logps/rejected": -211.71542358398438, | |
| "loss": 0.4631, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.457971096038818, | |
| "rewards/margins": 6.6875996589660645, | |
| "rewards/rejected": -14.1455717086792, | |
| "sft_loss": 1.0228469371795654, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 1.2656147271531886, | |
| "grad_norm": 7.684067785358655, | |
| "learning_rate": 1.6680421458520813e-07, | |
| "logits/chosen": 18.189321517944336, | |
| "logits/rejected": 18.308818817138672, | |
| "logps/chosen": -280.6365966796875, | |
| "logps/rejected": -212.9956817626953, | |
| "loss": 0.4905, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.9928879737854, | |
| "rewards/margins": 6.62729024887085, | |
| "rewards/rejected": -13.62017822265625, | |
| "sft_loss": 1.4820358753204346, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.2738330046022353, | |
| "grad_norm": 12.91245370337745, | |
| "learning_rate": 1.6573412899731187e-07, | |
| "logits/chosen": 17.40738868713379, | |
| "logits/rejected": 18.874313354492188, | |
| "logps/chosen": -299.2168884277344, | |
| "logps/rejected": -221.5058135986328, | |
| "loss": 0.4091, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.799927234649658, | |
| "rewards/margins": 6.812719821929932, | |
| "rewards/rejected": -13.612646102905273, | |
| "sft_loss": 1.1041682958602905, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 1.282051282051282, | |
| "grad_norm": 10.002770129869452, | |
| "learning_rate": 1.646606929879975e-07, | |
| "logits/chosen": 18.40058135986328, | |
| "logits/rejected": 19.07294273376465, | |
| "logps/chosen": -323.3199157714844, | |
| "logps/rejected": -239.97935485839844, | |
| "loss": 0.4266, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.476480484008789, | |
| "rewards/margins": 8.036779403686523, | |
| "rewards/rejected": -15.513258934020996, | |
| "sft_loss": 1.0359128713607788, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.2902695595003286, | |
| "grad_norm": 13.874094233494837, | |
| "learning_rate": 1.6358399484841268e-07, | |
| "logits/chosen": 16.465330123901367, | |
| "logits/rejected": 17.001684188842773, | |
| "logps/chosen": -302.719482421875, | |
| "logps/rejected": -224.98745727539062, | |
| "loss": 0.5129, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.293752670288086, | |
| "rewards/margins": 7.167456150054932, | |
| "rewards/rejected": -14.46120834350586, | |
| "sft_loss": 1.1338067054748535, | |
| "step": 785 | |
| }, | |
| { | |
| "epoch": 1.2984878369493753, | |
| "grad_norm": 16.794137790287348, | |
| "learning_rate": 1.625041231380184e-07, | |
| "logits/chosen": 16.809955596923828, | |
| "logits/rejected": 18.395627975463867, | |
| "logps/chosen": -310.674560546875, | |
| "logps/rejected": -239.32200622558594, | |
| "loss": 0.4581, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.000899791717529, | |
| "rewards/margins": 7.625972747802734, | |
| "rewards/rejected": -14.626873016357422, | |
| "sft_loss": 0.9849548935890198, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.306706114398422, | |
| "grad_norm": 12.439364730991043, | |
| "learning_rate": 1.6142116667730482e-07, | |
| "logits/chosen": 19.75507164001465, | |
| "logits/rejected": 20.32160758972168, | |
| "logps/chosen": -293.4500732421875, | |
| "logps/rejected": -214.4062042236328, | |
| "loss": 0.4713, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -6.081357955932617, | |
| "rewards/margins": 7.148606777191162, | |
| "rewards/rejected": -13.229966163635254, | |
| "sft_loss": 0.9287933111190796, | |
| "step": 795 | |
| }, | |
| { | |
| "epoch": 1.3149243918474687, | |
| "grad_norm": 11.945683940407063, | |
| "learning_rate": 1.6033521454048597e-07, | |
| "logits/chosen": 18.249954223632812, | |
| "logits/rejected": 19.019634246826172, | |
| "logps/chosen": -271.8877258300781, | |
| "logps/rejected": -217.09132385253906, | |
| "loss": 0.4673, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -6.703191757202148, | |
| "rewards/margins": 7.068259239196777, | |
| "rewards/rejected": -13.77145004272461, | |
| "sft_loss": 1.0365476608276367, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.3231426692965154, | |
| "grad_norm": 10.191092591520466, | |
| "learning_rate": 1.5924635604817306e-07, | |
| "logits/chosen": 17.222694396972656, | |
| "logits/rejected": 18.468660354614258, | |
| "logps/chosen": -288.8092041015625, | |
| "logps/rejected": -236.29319763183594, | |
| "loss": 0.4065, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.580431938171387, | |
| "rewards/margins": 7.9504780769348145, | |
| "rewards/rejected": -15.530909538269043, | |
| "sft_loss": 1.162276268005371, | |
| "step": 805 | |
| }, | |
| { | |
| "epoch": 1.331360946745562, | |
| "grad_norm": 9.751260919138856, | |
| "learning_rate": 1.5815468076002771e-07, | |
| "logits/chosen": 16.873342514038086, | |
| "logits/rejected": 18.183860778808594, | |
| "logps/chosen": -312.6845397949219, | |
| "logps/rejected": -240.49859619140625, | |
| "loss": 0.429, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.772741794586182, | |
| "rewards/margins": 8.080373764038086, | |
| "rewards/rejected": -15.853116035461426, | |
| "sft_loss": 0.9787502288818359, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.3395792241946087, | |
| "grad_norm": 13.966159704549986, | |
| "learning_rate": 1.5706027846739588e-07, | |
| "logits/chosen": 17.78404426574707, | |
| "logits/rejected": 18.716482162475586, | |
| "logps/chosen": -265.793701171875, | |
| "logps/rejected": -212.49057006835938, | |
| "loss": 0.4521, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -6.772706985473633, | |
| "rewards/margins": 6.92323112487793, | |
| "rewards/rejected": -13.695940017700195, | |
| "sft_loss": 1.0237793922424316, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 1.3477975016436554, | |
| "grad_norm": 32.697820524211366, | |
| "learning_rate": 1.5596323918592227e-07, | |
| "logits/chosen": 18.034412384033203, | |
| "logits/rejected": 18.671672821044922, | |
| "logps/chosen": -253.35609436035156, | |
| "logps/rejected": -206.98895263671875, | |
| "loss": 0.4833, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -7.246993541717529, | |
| "rewards/margins": 6.500965595245361, | |
| "rewards/rejected": -13.74795913696289, | |
| "sft_loss": 1.0642235279083252, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.356015779092702, | |
| "grad_norm": 12.398186085004639, | |
| "learning_rate": 1.5486365314814637e-07, | |
| "logits/chosen": 17.62421226501465, | |
| "logits/rejected": 18.33708953857422, | |
| "logps/chosen": -292.3586120605469, | |
| "logps/rejected": -230.61155700683594, | |
| "loss": 0.4084, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.588433742523193, | |
| "rewards/margins": 7.831187725067139, | |
| "rewards/rejected": -15.4196195602417, | |
| "sft_loss": 1.0407756567001343, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 1.3642340565417488, | |
| "grad_norm": 12.166605913363364, | |
| "learning_rate": 1.5376161079608088e-07, | |
| "logits/chosen": 17.150541305541992, | |
| "logits/rejected": 18.62920379638672, | |
| "logps/chosen": -296.70465087890625, | |
| "logps/rejected": -242.9381866455078, | |
| "loss": 0.46, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.590549945831299, | |
| "rewards/margins": 8.183311462402344, | |
| "rewards/rejected": -15.773859977722168, | |
| "sft_loss": 1.191388487815857, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.3724523339907955, | |
| "grad_norm": 10.880603493347238, | |
| "learning_rate": 1.5265720277377273e-07, | |
| "logits/chosen": 17.14630889892578, | |
| "logits/rejected": 19.08263397216797, | |
| "logps/chosen": -288.0076904296875, | |
| "logps/rejected": -237.15341186523438, | |
| "loss": 0.4435, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.4387054443359375, | |
| "rewards/margins": 7.707547664642334, | |
| "rewards/rejected": -15.146254539489746, | |
| "sft_loss": 1.0695911645889282, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 1.3806706114398422, | |
| "grad_norm": 50.18720477246092, | |
| "learning_rate": 1.5155051991984745e-07, | |
| "logits/chosen": 18.334110260009766, | |
| "logits/rejected": 18.69322967529297, | |
| "logps/chosen": -315.9974365234375, | |
| "logps/rejected": -228.48602294921875, | |
| "loss": 0.4849, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.224093437194824, | |
| "rewards/margins": 7.033995151519775, | |
| "rewards/rejected": -14.258088111877441, | |
| "sft_loss": 0.9990159869194031, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.3888888888888888, | |
| "grad_norm": 8.437783211213006, | |
| "learning_rate": 1.504416532600378e-07, | |
| "logits/chosen": 17.403743743896484, | |
| "logits/rejected": 18.235454559326172, | |
| "logps/chosen": -242.6099853515625, | |
| "logps/rejected": -199.91429138183594, | |
| "loss": 0.4367, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.768044948577881, | |
| "rewards/margins": 6.265518665313721, | |
| "rewards/rejected": -13.033564567565918, | |
| "sft_loss": 1.0013427734375, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 1.3971071663379355, | |
| "grad_norm": 14.969642809049821, | |
| "learning_rate": 1.4933069399969653e-07, | |
| "logits/chosen": 17.80324935913086, | |
| "logits/rejected": 18.639148712158203, | |
| "logps/chosen": -272.4168395996094, | |
| "logps/rejected": -217.99310302734375, | |
| "loss": 0.4617, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.3702874183654785, | |
| "rewards/margins": 6.988955020904541, | |
| "rewards/rejected": -14.359243392944336, | |
| "sft_loss": 1.1217681169509888, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.4053254437869822, | |
| "grad_norm": 14.289009158482923, | |
| "learning_rate": 1.4821773351629487e-07, | |
| "logits/chosen": 18.467451095581055, | |
| "logits/rejected": 19.347543716430664, | |
| "logps/chosen": -302.4975280761719, | |
| "logps/rejected": -243.9453125, | |
| "loss": 0.4132, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.117691040039062, | |
| "rewards/margins": 8.244772911071777, | |
| "rewards/rejected": -16.362462997436523, | |
| "sft_loss": 1.1255364418029785, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 1.413543721236029, | |
| "grad_norm": 10.706706272611981, | |
| "learning_rate": 1.4710286335190664e-07, | |
| "logits/chosen": 18.262802124023438, | |
| "logits/rejected": 18.210296630859375, | |
| "logps/chosen": -306.64691162109375, | |
| "logps/rejected": -234.53460693359375, | |
| "loss": 0.4363, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.4043498039245605, | |
| "rewards/margins": 7.886282920837402, | |
| "rewards/rejected": -15.290633201599121, | |
| "sft_loss": 1.080936074256897, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.4217619986850756, | |
| "grad_norm": 13.539503399960063, | |
| "learning_rate": 1.4598617520567863e-07, | |
| "logits/chosen": 18.688413619995117, | |
| "logits/rejected": 19.166378021240234, | |
| "logps/chosen": -295.90008544921875, | |
| "logps/rejected": -231.57505798339844, | |
| "loss": 0.4445, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.257371425628662, | |
| "rewards/margins": 7.788801193237305, | |
| "rewards/rejected": -15.046174049377441, | |
| "sft_loss": 1.04954195022583, | |
| "step": 865 | |
| }, | |
| { | |
| "epoch": 1.4299802761341223, | |
| "grad_norm": 20.41588952283392, | |
| "learning_rate": 1.448677609262885e-07, | |
| "logits/chosen": 17.124914169311523, | |
| "logits/rejected": 18.068174362182617, | |
| "logps/chosen": -291.83245849609375, | |
| "logps/rejected": -229.2489776611328, | |
| "loss": 0.4916, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -7.751894474029541, | |
| "rewards/margins": 7.248422145843506, | |
| "rewards/rejected": -15.000316619873047, | |
| "sft_loss": 1.1058861017227173, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.438198553583169, | |
| "grad_norm": 10.416378982514427, | |
| "learning_rate": 1.4374771250438997e-07, | |
| "logits/chosen": 17.683748245239258, | |
| "logits/rejected": 18.105945587158203, | |
| "logps/chosen": -338.9434814453125, | |
| "logps/rejected": -252.90367126464844, | |
| "loss": 0.353, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.734278678894043, | |
| "rewards/margins": 8.11069107055664, | |
| "rewards/rejected": -16.844970703125, | |
| "sft_loss": 1.1128793954849243, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 1.4464168310322156, | |
| "grad_norm": 15.631489594193368, | |
| "learning_rate": 1.4262612206504653e-07, | |
| "logits/chosen": 19.22788429260254, | |
| "logits/rejected": 18.560340881347656, | |
| "logps/chosen": -288.2774658203125, | |
| "logps/rejected": -221.1851806640625, | |
| "loss": 0.4398, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.204787254333496, | |
| "rewards/margins": 6.889291763305664, | |
| "rewards/rejected": -15.094079971313477, | |
| "sft_loss": 1.0347801446914673, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.4546351084812623, | |
| "grad_norm": 22.470025016143673, | |
| "learning_rate": 1.4150308186015428e-07, | |
| "logits/chosen": 18.78541374206543, | |
| "logits/rejected": 19.072355270385742, | |
| "logps/chosen": -266.7073669433594, | |
| "logps/rejected": -214.3734130859375, | |
| "loss": 0.4864, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.12351131439209, | |
| "rewards/margins": 7.012777328491211, | |
| "rewards/rejected": -14.1362886428833, | |
| "sft_loss": 1.0819884538650513, | |
| "step": 885 | |
| }, | |
| { | |
| "epoch": 1.462853385930309, | |
| "grad_norm": 11.047306179137715, | |
| "learning_rate": 1.4037868426085368e-07, | |
| "logits/chosen": 17.600828170776367, | |
| "logits/rejected": 17.870738983154297, | |
| "logps/chosen": -321.2472229003906, | |
| "logps/rejected": -237.96395874023438, | |
| "loss": 0.4823, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.274439811706543, | |
| "rewards/margins": 8.21683120727539, | |
| "rewards/rejected": -15.49127197265625, | |
| "sft_loss": 1.1358665227890015, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.4710716633793557, | |
| "grad_norm": 9.894309836137355, | |
| "learning_rate": 1.3925302174993233e-07, | |
| "logits/chosen": 16.768348693847656, | |
| "logits/rejected": 18.076475143432617, | |
| "logps/chosen": -295.2914123535156, | |
| "logps/rejected": -222.6123504638672, | |
| "loss": 0.4288, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -6.861530780792236, | |
| "rewards/margins": 7.223613262176514, | |
| "rewards/rejected": -14.085144996643066, | |
| "sft_loss": 0.9808722734451294, | |
| "step": 895 | |
| }, | |
| { | |
| "epoch": 1.4792899408284024, | |
| "grad_norm": 15.122256978486702, | |
| "learning_rate": 1.3812618691421803e-07, | |
| "logits/chosen": 17.618257522583008, | |
| "logits/rejected": 18.547971725463867, | |
| "logps/chosen": -307.7926025390625, | |
| "logps/rejected": -228.6370849609375, | |
| "loss": 0.4755, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.855221748352051, | |
| "rewards/margins": 7.493732929229736, | |
| "rewards/rejected": -14.348955154418945, | |
| "sft_loss": 0.975628137588501, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.487508218277449, | |
| "grad_norm": 14.990640701163656, | |
| "learning_rate": 1.3699827243696336e-07, | |
| "logits/chosen": 17.19367027282715, | |
| "logits/rejected": 18.374305725097656, | |
| "logps/chosen": -286.5935363769531, | |
| "logps/rejected": -236.76593017578125, | |
| "loss": 0.4732, | |
| "rewards/accuracies": 0.9300000071525574, | |
| "rewards/chosen": -7.718534469604492, | |
| "rewards/margins": 7.860580921173096, | |
| "rewards/rejected": -15.57911491394043, | |
| "sft_loss": 1.1146594285964966, | |
| "step": 905 | |
| }, | |
| { | |
| "epoch": 1.4957264957264957, | |
| "grad_norm": 10.50314444472379, | |
| "learning_rate": 1.3586937109022251e-07, | |
| "logits/chosen": 16.421382904052734, | |
| "logits/rejected": 17.77210235595703, | |
| "logps/chosen": -324.25927734375, | |
| "logps/rejected": -260.9275207519531, | |
| "loss": 0.4663, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.703363418579102, | |
| "rewards/margins": 8.462730407714844, | |
| "rewards/rejected": -17.166095733642578, | |
| "sft_loss": 1.0979522466659546, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.5039447731755424, | |
| "grad_norm": 16.690789592498312, | |
| "learning_rate": 1.347395757272207e-07, | |
| "logits/chosen": 19.563251495361328, | |
| "logits/rejected": 19.970426559448242, | |
| "logps/chosen": -271.6186218261719, | |
| "logps/rejected": -212.50277709960938, | |
| "loss": 0.4515, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -6.6580634117126465, | |
| "rewards/margins": 7.265621185302734, | |
| "rewards/rejected": -13.923684120178223, | |
| "sft_loss": 1.0007566213607788, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 1.5121630506245891, | |
| "grad_norm": 21.799881591539336, | |
| "learning_rate": 1.3360897927471668e-07, | |
| "logits/chosen": 18.252246856689453, | |
| "logits/rejected": 18.873050689697266, | |
| "logps/chosen": -278.3526611328125, | |
| "logps/rejected": -221.5440216064453, | |
| "loss": 0.4632, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -7.180948257446289, | |
| "rewards/margins": 7.29295539855957, | |
| "rewards/rejected": -14.473901748657227, | |
| "sft_loss": 1.0442688465118408, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.5203813280736358, | |
| "grad_norm": 10.712033452260947, | |
| "learning_rate": 1.3247767472535972e-07, | |
| "logits/chosen": 18.07443618774414, | |
| "logits/rejected": 19.142240524291992, | |
| "logps/chosen": -294.86700439453125, | |
| "logps/rejected": -238.5161895751953, | |
| "loss": 0.4686, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.611084461212158, | |
| "rewards/margins": 8.040576934814453, | |
| "rewards/rejected": -15.651662826538086, | |
| "sft_loss": 1.0576171875, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 1.5285996055226825, | |
| "grad_norm": 7.019511894014553, | |
| "learning_rate": 1.3134575513004073e-07, | |
| "logits/chosen": 18.114564895629883, | |
| "logits/rejected": 18.515487670898438, | |
| "logps/chosen": -303.06329345703125, | |
| "logps/rejected": -237.0087432861328, | |
| "loss": 0.3908, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.551575183868408, | |
| "rewards/margins": 7.9892473220825195, | |
| "rewards/rejected": -15.540822982788086, | |
| "sft_loss": 1.048262119293213, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.5368178829717292, | |
| "grad_norm": 14.009760349607332, | |
| "learning_rate": 1.3021331359023874e-07, | |
| "logits/chosen": 17.101354598999023, | |
| "logits/rejected": 18.246139526367188, | |
| "logps/chosen": -310.4385070800781, | |
| "logps/rejected": -244.6991424560547, | |
| "loss": 0.4262, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.569284439086914, | |
| "rewards/margins": 8.347086906433105, | |
| "rewards/rejected": -15.916370391845703, | |
| "sft_loss": 1.0606290102005005, | |
| "step": 935 | |
| }, | |
| { | |
| "epoch": 1.5450361604207759, | |
| "grad_norm": 15.650861724973655, | |
| "learning_rate": 1.2908044325036312e-07, | |
| "logits/chosen": 17.97089195251465, | |
| "logits/rejected": 18.223573684692383, | |
| "logps/chosen": -296.1282958984375, | |
| "logps/rejected": -233.69146728515625, | |
| "loss": 0.4616, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.757159233093262, | |
| "rewards/margins": 7.639113903045654, | |
| "rewards/rejected": -15.396271705627441, | |
| "sft_loss": 1.138619065284729, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.5532544378698225, | |
| "grad_norm": 17.515447400155715, | |
| "learning_rate": 1.2794723729009255e-07, | |
| "logits/chosen": 16.958641052246094, | |
| "logits/rejected": 18.472318649291992, | |
| "logps/chosen": -298.9012756347656, | |
| "logps/rejected": -239.90469360351562, | |
| "loss": 0.4502, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.437976837158203, | |
| "rewards/margins": 8.138365745544434, | |
| "rewards/rejected": -15.576342582702637, | |
| "sft_loss": 1.0626742839813232, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 1.5614727153188692, | |
| "grad_norm": 45.641039135520685, | |
| "learning_rate": 1.2681378891671082e-07, | |
| "logits/chosen": 17.490928649902344, | |
| "logits/rejected": 17.976585388183594, | |
| "logps/chosen": -306.0874328613281, | |
| "logps/rejected": -237.03607177734375, | |
| "loss": 0.4737, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.880832672119141, | |
| "rewards/margins": 7.584968090057373, | |
| "rewards/rejected": -15.465802192687988, | |
| "sft_loss": 1.0900439023971558, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.569690992767916, | |
| "grad_norm": 19.898061737121086, | |
| "learning_rate": 1.2568019135744044e-07, | |
| "logits/chosen": 16.957841873168945, | |
| "logits/rejected": 17.985727310180664, | |
| "logps/chosen": -291.70135498046875, | |
| "logps/rejected": -229.38314819335938, | |
| "loss": 0.4349, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.644362926483154, | |
| "rewards/margins": 7.429901123046875, | |
| "rewards/rejected": -15.074263572692871, | |
| "sft_loss": 1.0944395065307617, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 1.5779092702169626, | |
| "grad_norm": 12.39680434949017, | |
| "learning_rate": 1.2454653785177445e-07, | |
| "logits/chosen": 17.493330001831055, | |
| "logits/rejected": 18.42995834350586, | |
| "logps/chosen": -278.9170837402344, | |
| "logps/rejected": -230.72608947753906, | |
| "loss": 0.4231, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.324814319610596, | |
| "rewards/margins": 7.593767166137695, | |
| "rewards/rejected": -14.918582916259766, | |
| "sft_loss": 1.0732117891311646, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.5861275476660093, | |
| "grad_norm": 21.306042868258853, | |
| "learning_rate": 1.2341292164380783e-07, | |
| "logits/chosen": 18.833568572998047, | |
| "logits/rejected": 18.869935989379883, | |
| "logps/chosen": -286.1907653808594, | |
| "logps/rejected": -224.49281311035156, | |
| "loss": 0.4817, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -7.71510124206543, | |
| "rewards/margins": 7.221285820007324, | |
| "rewards/rejected": -14.936385154724121, | |
| "sft_loss": 1.3040668964385986, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 1.594345825115056, | |
| "grad_norm": 14.69263028616145, | |
| "learning_rate": 1.222794359745675e-07, | |
| "logits/chosen": 16.27896499633789, | |
| "logits/rejected": 18.376323699951172, | |
| "logps/chosen": -300.5797424316406, | |
| "logps/rejected": -242.6514129638672, | |
| "loss": 0.4114, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -7.52255392074585, | |
| "rewards/margins": 8.169685363769531, | |
| "rewards/rejected": -15.692238807678223, | |
| "sft_loss": 1.0308858156204224, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.6025641025641026, | |
| "grad_norm": 13.802476438483277, | |
| "learning_rate": 1.2114617407434354e-07, | |
| "logits/chosen": 18.055139541625977, | |
| "logits/rejected": 19.250368118286133, | |
| "logps/chosen": -309.2381286621094, | |
| "logps/rejected": -245.81809997558594, | |
| "loss": 0.4326, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.345053672790527, | |
| "rewards/margins": 8.126486778259277, | |
| "rewards/rejected": -15.471541404724121, | |
| "sft_loss": 1.123140811920166, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 1.6107823800131493, | |
| "grad_norm": 10.423391619330996, | |
| "learning_rate": 1.2001322915502091e-07, | |
| "logits/chosen": 16.897199630737305, | |
| "logits/rejected": 18.748310089111328, | |
| "logps/chosen": -292.1817932128906, | |
| "logps/rejected": -235.8812255859375, | |
| "loss": 0.3942, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.173976898193359, | |
| "rewards/margins": 7.872208118438721, | |
| "rewards/rejected": -15.046185493469238, | |
| "sft_loss": 1.1811002492904663, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.619000657462196, | |
| "grad_norm": 26.973905524007105, | |
| "learning_rate": 1.1888069440241243e-07, | |
| "logits/chosen": 18.107698440551758, | |
| "logits/rejected": 19.736108779907227, | |
| "logps/chosen": -317.0016174316406, | |
| "logps/rejected": -252.54832458496094, | |
| "loss": 0.4222, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.532571792602539, | |
| "rewards/margins": 9.049071311950684, | |
| "rewards/rejected": -16.581642150878906, | |
| "sft_loss": 1.075319766998291, | |
| "step": 985 | |
| }, | |
| { | |
| "epoch": 1.6272189349112427, | |
| "grad_norm": 16.255178289646476, | |
| "learning_rate": 1.1774866296859448e-07, | |
| "logits/chosen": 17.9573917388916, | |
| "logits/rejected": 19.03142738342285, | |
| "logps/chosen": -301.56561279296875, | |
| "logps/rejected": -243.9299774169922, | |
| "loss": 0.4749, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.284952163696289, | |
| "rewards/margins": 8.675047874450684, | |
| "rewards/rejected": -15.960000038146973, | |
| "sft_loss": 1.1328290700912476, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.6354372123602894, | |
| "grad_norm": 10.065426351498546, | |
| "learning_rate": 1.1661722796424478e-07, | |
| "logits/chosen": 17.292905807495117, | |
| "logits/rejected": 18.3796443939209, | |
| "logps/chosen": -309.9263000488281, | |
| "logps/rejected": -241.42181396484375, | |
| "loss": 0.4268, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.671374320983887, | |
| "rewards/margins": 8.211640357971191, | |
| "rewards/rejected": -15.883017539978027, | |
| "sft_loss": 1.0408843755722046, | |
| "step": 995 | |
| }, | |
| { | |
| "epoch": 1.643655489809336, | |
| "grad_norm": 12.50718545323396, | |
| "learning_rate": 1.1548648245098432e-07, | |
| "logits/chosen": 17.582983016967773, | |
| "logits/rejected": 18.472742080688477, | |
| "logps/chosen": -319.5430908203125, | |
| "logps/rejected": -253.3585968017578, | |
| "loss": 0.4368, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.135196685791016, | |
| "rewards/margins": 8.56678295135498, | |
| "rewards/rejected": -16.701980590820312, | |
| "sft_loss": 1.121424674987793, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.6518737672583828, | |
| "grad_norm": 9.456497156444888, | |
| "learning_rate": 1.1435651943372278e-07, | |
| "logits/chosen": 16.574844360351562, | |
| "logits/rejected": 17.709199905395508, | |
| "logps/chosen": -286.1977844238281, | |
| "logps/rejected": -229.33741760253906, | |
| "loss": 0.4208, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.042440414428711, | |
| "rewards/margins": 7.619970798492432, | |
| "rewards/rejected": -15.662409782409668, | |
| "sft_loss": 1.1242254972457886, | |
| "step": 1005 | |
| }, | |
| { | |
| "epoch": 1.6600920447074294, | |
| "grad_norm": 12.581807587635986, | |
| "learning_rate": 1.1322743185300865e-07, | |
| "logits/chosen": 17.700603485107422, | |
| "logits/rejected": 19.024187088012695, | |
| "logps/chosen": -296.780029296875, | |
| "logps/rejected": -233.88160705566406, | |
| "loss": 0.4889, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -7.796105861663818, | |
| "rewards/margins": 7.478055953979492, | |
| "rewards/rejected": -15.274161338806152, | |
| "sft_loss": 1.075081467628479, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.6683103221564761, | |
| "grad_norm": 14.09597654178517, | |
| "learning_rate": 1.1209931257738503e-07, | |
| "logits/chosen": 17.260271072387695, | |
| "logits/rejected": 18.022357940673828, | |
| "logps/chosen": -306.3436584472656, | |
| "logps/rejected": -227.7841339111328, | |
| "loss": 0.4487, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -6.75, | |
| "rewards/margins": 7.679973602294922, | |
| "rewards/rejected": -14.429974555969238, | |
| "sft_loss": 1.1023831367492676, | |
| "step": 1015 | |
| }, | |
| { | |
| "epoch": 1.6765285996055228, | |
| "grad_norm": 10.14530298124155, | |
| "learning_rate": 1.1097225439575096e-07, | |
| "logits/chosen": 16.790157318115234, | |
| "logits/rejected": 17.936586380004883, | |
| "logps/chosen": -274.2288818359375, | |
| "logps/rejected": -220.5703125, | |
| "loss": 0.4648, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -6.9578022956848145, | |
| "rewards/margins": 7.266170501708984, | |
| "rewards/rejected": -14.22397232055664, | |
| "sft_loss": 1.0298852920532227, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.6847468770545695, | |
| "grad_norm": 14.64734935061402, | |
| "learning_rate": 1.0984635000972946e-07, | |
| "logits/chosen": 16.42229461669922, | |
| "logits/rejected": 17.54804229736328, | |
| "logps/chosen": -277.86077880859375, | |
| "logps/rejected": -223.43917846679688, | |
| "loss": 0.5101, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.510883808135986, | |
| "rewards/margins": 7.296814441680908, | |
| "rewards/rejected": -14.807699203491211, | |
| "sft_loss": 1.089572548866272, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 1.6929651545036162, | |
| "grad_norm": 14.998745686830942, | |
| "learning_rate": 1.0872169202604284e-07, | |
| "logits/chosen": 17.45005226135254, | |
| "logits/rejected": 18.329872131347656, | |
| "logps/chosen": -335.4214782714844, | |
| "logps/rejected": -264.5696105957031, | |
| "loss": 0.4259, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -8.625652313232422, | |
| "rewards/margins": 8.4821138381958, | |
| "rewards/rejected": -17.107765197753906, | |
| "sft_loss": 1.1337147951126099, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.7011834319526629, | |
| "grad_norm": 15.126502195785678, | |
| "learning_rate": 1.0759837294889546e-07, | |
| "logits/chosen": 15.89870834350586, | |
| "logits/rejected": 17.66954803466797, | |
| "logps/chosen": -324.4315185546875, | |
| "logps/rejected": -251.8769073486328, | |
| "loss": 0.4365, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.891256332397461, | |
| "rewards/margins": 8.40850830078125, | |
| "rewards/rejected": -16.299766540527344, | |
| "sft_loss": 1.0551294088363647, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 1.7094017094017095, | |
| "grad_norm": 11.887438634341896, | |
| "learning_rate": 1.0647648517236547e-07, | |
| "logits/chosen": 17.808908462524414, | |
| "logits/rejected": 17.868276596069336, | |
| "logps/chosen": -318.5857849121094, | |
| "logps/rejected": -237.06268310546875, | |
| "loss": 0.4077, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.441680431365967, | |
| "rewards/margins": 7.951440811157227, | |
| "rewards/rejected": -15.393121719360352, | |
| "sft_loss": 1.0577045679092407, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.7176199868507562, | |
| "grad_norm": 13.592964221155555, | |
| "learning_rate": 1.0535612097280505e-07, | |
| "logits/chosen": 17.357389450073242, | |
| "logits/rejected": 18.236921310424805, | |
| "logps/chosen": -309.05316162109375, | |
| "logps/rejected": -234.39718627929688, | |
| "loss": 0.4578, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.402204513549805, | |
| "rewards/margins": 7.640995025634766, | |
| "rewards/rejected": -15.043200492858887, | |
| "sft_loss": 1.1290278434753418, | |
| "step": 1045 | |
| }, | |
| { | |
| "epoch": 1.725838264299803, | |
| "grad_norm": 17.516227986033588, | |
| "learning_rate": 1.042373725012508e-07, | |
| "logits/chosen": 15.968868255615234, | |
| "logits/rejected": 17.182361602783203, | |
| "logps/chosen": -277.1082763671875, | |
| "logps/rejected": -217.5791778564453, | |
| "loss": 0.4706, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.2921223640441895, | |
| "rewards/margins": 7.116176605224609, | |
| "rewards/rejected": -14.40829849243164, | |
| "sft_loss": 1.1019597053527832, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.7340565417488496, | |
| "grad_norm": 14.545988790543376, | |
| "learning_rate": 1.0312033177584409e-07, | |
| "logits/chosen": 18.982242584228516, | |
| "logits/rejected": 18.7514705657959, | |
| "logps/chosen": -293.9178466796875, | |
| "logps/rejected": -226.5133819580078, | |
| "loss": 0.3922, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.807718276977539, | |
| "rewards/margins": 7.406096935272217, | |
| "rewards/rejected": -15.213815689086914, | |
| "sft_loss": 1.0929393768310547, | |
| "step": 1055 | |
| }, | |
| { | |
| "epoch": 1.7422748191978963, | |
| "grad_norm": 10.680737229216966, | |
| "learning_rate": 1.0200509067426243e-07, | |
| "logits/chosen": 16.079814910888672, | |
| "logits/rejected": 17.51044273376465, | |
| "logps/chosen": -302.1490173339844, | |
| "logps/rejected": -233.8198699951172, | |
| "loss": 0.444, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.101183891296387, | |
| "rewards/margins": 7.351180553436279, | |
| "rewards/rejected": -15.452364921569824, | |
| "sft_loss": 1.2096168994903564, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.7504930966469427, | |
| "grad_norm": 9.891781648367795, | |
| "learning_rate": 1.0089174092616271e-07, | |
| "logits/chosen": 17.791248321533203, | |
| "logits/rejected": 18.2585506439209, | |
| "logps/chosen": -280.9420166015625, | |
| "logps/rejected": -224.9687957763672, | |
| "loss": 0.4607, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -7.537823677062988, | |
| "rewards/margins": 7.212753772735596, | |
| "rewards/rejected": -14.750576972961426, | |
| "sft_loss": 1.0387908220291138, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 1.7587113740959894, | |
| "grad_norm": 18.289134457763506, | |
| "learning_rate": 9.97803741056361e-08, | |
| "logits/chosen": 16.976699829101562, | |
| "logits/rejected": 17.30523109436035, | |
| "logps/chosen": -275.5840148925781, | |
| "logps/rejected": -215.13279724121094, | |
| "loss": 0.3879, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.284540176391602, | |
| "rewards/margins": 6.921156406402588, | |
| "rewards/rejected": -14.205697059631348, | |
| "sft_loss": 1.0973351001739502, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.7669296515450361, | |
| "grad_norm": 17.72039206697929, | |
| "learning_rate": 9.867108162367594e-08, | |
| "logits/chosen": 16.939437866210938, | |
| "logits/rejected": 18.218585968017578, | |
| "logps/chosen": -294.5352478027344, | |
| "logps/rejected": -230.98623657226562, | |
| "loss": 0.3974, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.325733661651611, | |
| "rewards/margins": 7.582549571990967, | |
| "rewards/rejected": -14.908282279968262, | |
| "sft_loss": 1.034481406211853, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 1.7751479289940828, | |
| "grad_norm": 13.466593004835952, | |
| "learning_rate": 9.756395472065947e-08, | |
| "logits/chosen": 17.363365173339844, | |
| "logits/rejected": 18.14643669128418, | |
| "logps/chosen": -275.0605163574219, | |
| "logps/rejected": -223.0447998046875, | |
| "loss": 0.4368, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.5301194190979, | |
| "rewards/margins": 7.630979537963867, | |
| "rewards/rejected": -15.161099433898926, | |
| "sft_loss": 1.191418170928955, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.7833662064431295, | |
| "grad_norm": 17.525060893448625, | |
| "learning_rate": 9.645908445884271e-08, | |
| "logits/chosen": 17.93121910095215, | |
| "logits/rejected": 19.609464645385742, | |
| "logps/chosen": -313.574951171875, | |
| "logps/rejected": -255.39015197753906, | |
| "loss": 0.392, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.089523315429688, | |
| "rewards/margins": 8.564504623413086, | |
| "rewards/rejected": -16.654027938842773, | |
| "sft_loss": 1.0859136581420898, | |
| "step": 1085 | |
| }, | |
| { | |
| "epoch": 1.7915844838921762, | |
| "grad_norm": 14.818652238656334, | |
| "learning_rate": 9.535656171487096e-08, | |
| "logits/chosen": 17.432899475097656, | |
| "logits/rejected": 18.06930160522461, | |
| "logps/chosen": -306.2559814453125, | |
| "logps/rejected": -247.05564880371094, | |
| "loss": 0.4113, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.375761985778809, | |
| "rewards/margins": 8.475983619689941, | |
| "rewards/rejected": -16.85174560546875, | |
| "sft_loss": 1.2146451473236084, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.7998027613412229, | |
| "grad_norm": 64.12698029544616, | |
| "learning_rate": 9.425647717230382e-08, | |
| "logits/chosen": 17.3497257232666, | |
| "logits/rejected": 18.322324752807617, | |
| "logps/chosen": -314.32830810546875, | |
| "logps/rejected": -253.83473205566406, | |
| "loss": 0.4062, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -8.878050804138184, | |
| "rewards/margins": 8.278247833251953, | |
| "rewards/rejected": -17.15629768371582, | |
| "sft_loss": 1.077860713005066, | |
| "step": 1095 | |
| }, | |
| { | |
| "epoch": 1.8080210387902695, | |
| "grad_norm": 13.052337358867197, | |
| "learning_rate": 9.315892131415642e-08, | |
| "logits/chosen": 16.90951919555664, | |
| "logits/rejected": 18.101472854614258, | |
| "logps/chosen": -344.9137878417969, | |
| "logps/rejected": -264.2882080078125, | |
| "loss": 0.3948, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.652148246765137, | |
| "rewards/margins": 9.170465469360352, | |
| "rewards/rejected": -17.822612762451172, | |
| "sft_loss": 1.2117801904678345, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.8162393162393162, | |
| "grad_norm": 11.613352799050077, | |
| "learning_rate": 9.206398441545729e-08, | |
| "logits/chosen": 17.647083282470703, | |
| "logits/rejected": 18.84397315979004, | |
| "logps/chosen": -312.7010498046875, | |
| "logps/rejected": -254.3484344482422, | |
| "loss": 0.3759, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.489236831665039, | |
| "rewards/margins": 8.119637489318848, | |
| "rewards/rejected": -16.608875274658203, | |
| "sft_loss": 1.01621675491333, | |
| "step": 1105 | |
| }, | |
| { | |
| "epoch": 1.824457593688363, | |
| "grad_norm": 11.15254994077485, | |
| "learning_rate": 9.097175653582299e-08, | |
| "logits/chosen": 17.26348114013672, | |
| "logits/rejected": 18.160728454589844, | |
| "logps/chosen": -284.86114501953125, | |
| "logps/rejected": -232.5272979736328, | |
| "loss": 0.41, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.840343475341797, | |
| "rewards/margins": 7.370659351348877, | |
| "rewards/rejected": -15.211003303527832, | |
| "sft_loss": 1.1511608362197876, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.8326758711374096, | |
| "grad_norm": 14.803907963552794, | |
| "learning_rate": 8.988232751205051e-08, | |
| "logits/chosen": 17.386255264282227, | |
| "logits/rejected": 17.55118751525879, | |
| "logps/chosen": -271.7340087890625, | |
| "logps/rejected": -208.06320190429688, | |
| "loss": 0.4401, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.613986968994141, | |
| "rewards/margins": 6.274531841278076, | |
| "rewards/rejected": -13.888518333435059, | |
| "sft_loss": 1.144532322883606, | |
| "step": 1115 | |
| }, | |
| { | |
| "epoch": 1.8408941485864563, | |
| "grad_norm": 14.423568520659874, | |
| "learning_rate": 8.879578695072846e-08, | |
| "logits/chosen": 17.274259567260742, | |
| "logits/rejected": 18.399911880493164, | |
| "logps/chosen": -289.1215515136719, | |
| "logps/rejected": -230.22369384765625, | |
| "loss": 0.4135, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.828088760375977, | |
| "rewards/margins": 7.673010349273682, | |
| "rewards/rejected": -15.5010986328125, | |
| "sft_loss": 1.1277306079864502, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.849112426035503, | |
| "grad_norm": 11.37404702454821, | |
| "learning_rate": 8.771222422086639e-08, | |
| "logits/chosen": 16.860265731811523, | |
| "logits/rejected": 17.736581802368164, | |
| "logps/chosen": -297.23956298828125, | |
| "logps/rejected": -233.06109619140625, | |
| "loss": 0.3998, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.860833168029785, | |
| "rewards/margins": 8.063416481018066, | |
| "rewards/rejected": -15.924250602722168, | |
| "sft_loss": 1.2870830297470093, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 1.8573307034845496, | |
| "grad_norm": 12.257681191538563, | |
| "learning_rate": 8.663172844654452e-08, | |
| "logits/chosen": 17.366941452026367, | |
| "logits/rejected": 17.93768882751465, | |
| "logps/chosen": -300.5145263671875, | |
| "logps/rejected": -230.68685913085938, | |
| "loss": 0.4455, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.420682430267334, | |
| "rewards/margins": 7.759568691253662, | |
| "rewards/rejected": -15.180251121520996, | |
| "sft_loss": 1.0831838846206665, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.8655489809335963, | |
| "grad_norm": 11.344131200773928, | |
| "learning_rate": 8.555438849958296e-08, | |
| "logits/chosen": 17.97229766845703, | |
| "logits/rejected": 18.921049118041992, | |
| "logps/chosen": -319.6356201171875, | |
| "logps/rejected": -246.49024963378906, | |
| "loss": 0.3864, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.522003650665283, | |
| "rewards/margins": 8.551565170288086, | |
| "rewards/rejected": -16.07356834411621, | |
| "sft_loss": 1.150990605354309, | |
| "step": 1135 | |
| }, | |
| { | |
| "epoch": 1.873767258382643, | |
| "grad_norm": 20.985079338983198, | |
| "learning_rate": 8.448029299223194e-08, | |
| "logits/chosen": 17.783571243286133, | |
| "logits/rejected": 18.174728393554688, | |
| "logps/chosen": -312.2618713378906, | |
| "logps/rejected": -233.99496459960938, | |
| "loss": 0.4933, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -7.624851226806641, | |
| "rewards/margins": 7.475332260131836, | |
| "rewards/rejected": -15.100183486938477, | |
| "sft_loss": 1.1498528718948364, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.8819855358316897, | |
| "grad_norm": 14.844798746234286, | |
| "learning_rate": 8.340953026988351e-08, | |
| "logits/chosen": 17.779254913330078, | |
| "logits/rejected": 19.071887969970703, | |
| "logps/chosen": -311.01190185546875, | |
| "logps/rejected": -248.10272216796875, | |
| "loss": 0.4615, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.804770469665527, | |
| "rewards/margins": 8.161953926086426, | |
| "rewards/rejected": -15.966724395751953, | |
| "sft_loss": 1.1634888648986816, | |
| "step": 1145 | |
| }, | |
| { | |
| "epoch": 1.8902038132807364, | |
| "grad_norm": 11.515222849514643, | |
| "learning_rate": 8.234218840380475e-08, | |
| "logits/chosen": 16.18383026123047, | |
| "logits/rejected": 17.827003479003906, | |
| "logps/chosen": -301.19659423828125, | |
| "logps/rejected": -245.50054931640625, | |
| "loss": 0.4341, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.15174388885498, | |
| "rewards/margins": 7.812210559844971, | |
| "rewards/rejected": -15.963953971862793, | |
| "sft_loss": 1.0311837196350098, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.898422090729783, | |
| "grad_norm": 14.564597779855657, | |
| "learning_rate": 8.127835518389417e-08, | |
| "logits/chosen": 16.831256866455078, | |
| "logits/rejected": 18.508529663085938, | |
| "logps/chosen": -311.1943054199219, | |
| "logps/rejected": -245.4080047607422, | |
| "loss": 0.4095, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.765483856201172, | |
| "rewards/margins": 8.15777587890625, | |
| "rewards/rejected": -15.923259735107422, | |
| "sft_loss": 1.114915132522583, | |
| "step": 1155 | |
| }, | |
| { | |
| "epoch": 1.9066403681788298, | |
| "grad_norm": 26.10926811927184, | |
| "learning_rate": 8.021811811146075e-08, | |
| "logits/chosen": 16.842208862304688, | |
| "logits/rejected": 17.959400177001953, | |
| "logps/chosen": -291.0676574707031, | |
| "logps/rejected": -237.74246215820312, | |
| "loss": 0.4551, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.678957939147949, | |
| "rewards/margins": 8.211709022521973, | |
| "rewards/rejected": -15.890668869018555, | |
| "sft_loss": 1.1757006645202637, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.9148586456278764, | |
| "grad_norm": 12.813401775007092, | |
| "learning_rate": 7.916156439202672e-08, | |
| "logits/chosen": 17.37171173095703, | |
| "logits/rejected": 18.593181610107422, | |
| "logps/chosen": -289.34759521484375, | |
| "logps/rejected": -234.8267059326172, | |
| "loss": 0.4289, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.662449836730957, | |
| "rewards/margins": 7.566576957702637, | |
| "rewards/rejected": -15.229025840759277, | |
| "sft_loss": 1.1354382038116455, | |
| "step": 1165 | |
| }, | |
| { | |
| "epoch": 1.9230769230769231, | |
| "grad_norm": 6.596137423450017, | |
| "learning_rate": 7.810878092815512e-08, | |
| "logits/chosen": 17.296720504760742, | |
| "logits/rejected": 17.11487579345703, | |
| "logps/chosen": -307.8653869628906, | |
| "logps/rejected": -237.65505981445312, | |
| "loss": 0.3663, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.926757335662842, | |
| "rewards/margins": 7.959318161010742, | |
| "rewards/rejected": -15.886076927185059, | |
| "sft_loss": 1.1921048164367676, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.9312952005259696, | |
| "grad_norm": 14.579022955412034, | |
| "learning_rate": 7.705985431230183e-08, | |
| "logits/chosen": 15.675207138061523, | |
| "logits/rejected": 16.91021156311035, | |
| "logps/chosen": -322.23992919921875, | |
| "logps/rejected": -266.904296875, | |
| "loss": 0.391, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.967777252197266, | |
| "rewards/margins": 8.5900297164917, | |
| "rewards/rejected": -17.557802200317383, | |
| "sft_loss": 1.228776454925537, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 1.9395134779750163, | |
| "grad_norm": 12.828599154800472, | |
| "learning_rate": 7.601487081969307e-08, | |
| "logits/chosen": 18.340225219726562, | |
| "logits/rejected": 19.142946243286133, | |
| "logps/chosen": -350.186279296875, | |
| "logps/rejected": -269.3705749511719, | |
| "loss": 0.3851, | |
| "rewards/accuracies": 0.9300000071525574, | |
| "rewards/chosen": -8.475415229797363, | |
| "rewards/margins": 9.2521390914917, | |
| "rewards/rejected": -17.727554321289062, | |
| "sft_loss": 1.1213669776916504, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.947731755424063, | |
| "grad_norm": 13.15248193805534, | |
| "learning_rate": 7.497391640122967e-08, | |
| "logits/chosen": 18.557586669921875, | |
| "logits/rejected": 19.259462356567383, | |
| "logps/chosen": -311.15838623046875, | |
| "logps/rejected": -252.96751403808594, | |
| "loss": 0.4041, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.537500381469727, | |
| "rewards/margins": 8.597896575927734, | |
| "rewards/rejected": -17.13539695739746, | |
| "sft_loss": 1.1180825233459473, | |
| "step": 1185 | |
| }, | |
| { | |
| "epoch": 1.9559500328731096, | |
| "grad_norm": 20.887376048027924, | |
| "learning_rate": 7.393707667641691e-08, | |
| "logits/chosen": 16.45261573791504, | |
| "logits/rejected": 17.498512268066406, | |
| "logps/chosen": -310.4942626953125, | |
| "logps/rejected": -250.18203735351562, | |
| "loss": 0.4276, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.239749908447266, | |
| "rewards/margins": 8.2033109664917, | |
| "rewards/rejected": -16.44305992126465, | |
| "sft_loss": 1.188431739807129, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.9641683103221563, | |
| "grad_norm": 32.140189305396625, | |
| "learning_rate": 7.290443692632281e-08, | |
| "logits/chosen": 19.094688415527344, | |
| "logits/rejected": 19.616283416748047, | |
| "logps/chosen": -291.1233825683594, | |
| "logps/rejected": -234.5458526611328, | |
| "loss": 0.4942, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.3053131103515625, | |
| "rewards/margins": 7.835725784301758, | |
| "rewards/rejected": -15.141037940979004, | |
| "sft_loss": 1.075373888015747, | |
| "step": 1195 | |
| }, | |
| { | |
| "epoch": 1.972386587771203, | |
| "grad_norm": 13.526795062615003, | |
| "learning_rate": 7.187608208656328e-08, | |
| "logits/chosen": 16.982704162597656, | |
| "logits/rejected": 17.547874450683594, | |
| "logps/chosen": -293.3042297363281, | |
| "logps/rejected": -233.2967987060547, | |
| "loss": 0.3964, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.24399185180664, | |
| "rewards/margins": 7.097829818725586, | |
| "rewards/rejected": -15.341819763183594, | |
| "sft_loss": 1.063591718673706, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.9806048652202497, | |
| "grad_norm": 12.330320612053741, | |
| "learning_rate": 7.085209674031618e-08, | |
| "logits/chosen": 18.508739471435547, | |
| "logits/rejected": 19.527912139892578, | |
| "logps/chosen": -318.8953857421875, | |
| "logps/rejected": -255.2642822265625, | |
| "loss": 0.3766, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.808796405792236, | |
| "rewards/margins": 8.834001541137695, | |
| "rewards/rejected": -16.642797470092773, | |
| "sft_loss": 1.0131335258483887, | |
| "step": 1205 | |
| }, | |
| { | |
| "epoch": 1.9888231426692964, | |
| "grad_norm": 19.628735128907458, | |
| "learning_rate": 6.983256511136442e-08, | |
| "logits/chosen": 17.349624633789062, | |
| "logits/rejected": 18.25617218017578, | |
| "logps/chosen": -315.596923828125, | |
| "logps/rejected": -252.95460510253906, | |
| "loss": 0.3878, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -8.501006126403809, | |
| "rewards/margins": 8.494573593139648, | |
| "rewards/rejected": -16.995580673217773, | |
| "sft_loss": 1.0632458925247192, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.997041420118343, | |
| "grad_norm": 14.674802699510677, | |
| "learning_rate": 6.881757105716831e-08, | |
| "logits/chosen": 17.45104217529297, | |
| "logits/rejected": 18.316680908203125, | |
| "logps/chosen": -330.3178405761719, | |
| "logps/rejected": -251.63551330566406, | |
| "loss": 0.4009, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.423145294189453, | |
| "rewards/margins": 8.154979705810547, | |
| "rewards/rejected": -16.578125, | |
| "sft_loss": 1.0945472717285156, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 2.0052596975673898, | |
| "grad_norm": 12.624994593347873, | |
| "learning_rate": 6.780719806196828e-08, | |
| "logits/chosen": 17.815471649169922, | |
| "logits/rejected": 19.435829162597656, | |
| "logps/chosen": -326.4144287109375, | |
| "logps/rejected": -260.8008117675781, | |
| "loss": 0.4449, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.356893539428711, | |
| "rewards/margins": 8.929654121398926, | |
| "rewards/rejected": -17.286548614501953, | |
| "sft_loss": 1.1082605123519897, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 2.0134779750164364, | |
| "grad_norm": 12.725913199026877, | |
| "learning_rate": 6.680152922991822e-08, | |
| "logits/chosen": 16.1939754486084, | |
| "logits/rejected": 17.380538940429688, | |
| "logps/chosen": -282.47589111328125, | |
| "logps/rejected": -237.52879333496094, | |
| "loss": 0.3868, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.493667602539062, | |
| "rewards/margins": 7.595485687255859, | |
| "rewards/rejected": -16.089153289794922, | |
| "sft_loss": 1.1127554178237915, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 2.021696252465483, | |
| "grad_norm": 16.704211079520014, | |
| "learning_rate": 6.580064727824994e-08, | |
| "logits/chosen": 17.634016036987305, | |
| "logits/rejected": 18.210420608520508, | |
| "logps/chosen": -294.94793701171875, | |
| "logps/rejected": -239.2569122314453, | |
| "loss": 0.4093, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -8.08222770690918, | |
| "rewards/margins": 7.951410293579102, | |
| "rewards/rejected": -16.03363609313965, | |
| "sft_loss": 1.0821824073791504, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 2.02991452991453, | |
| "grad_norm": 11.063546418547208, | |
| "learning_rate": 6.480463453046985e-08, | |
| "logits/chosen": 18.466581344604492, | |
| "logits/rejected": 18.895183563232422, | |
| "logps/chosen": -304.6612243652344, | |
| "logps/rejected": -241.4573974609375, | |
| "loss": 0.4202, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.562366485595703, | |
| "rewards/margins": 7.818039417266846, | |
| "rewards/rejected": -16.38040542602539, | |
| "sft_loss": 1.1190707683563232, | |
| "step": 1235 | |
| }, | |
| { | |
| "epoch": 2.0381328073635765, | |
| "grad_norm": 14.946770498466886, | |
| "learning_rate": 6.381357290958767e-08, | |
| "logits/chosen": 16.804920196533203, | |
| "logits/rejected": 17.867015838623047, | |
| "logps/chosen": -296.5435485839844, | |
| "logps/rejected": -246.7471466064453, | |
| "loss": 0.3722, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.007586479187012, | |
| "rewards/margins": 8.32363224029541, | |
| "rewards/rejected": -16.33121681213379, | |
| "sft_loss": 1.104773759841919, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 2.046351084812623, | |
| "grad_norm": 11.140569334845633, | |
| "learning_rate": 6.282754393137796e-08, | |
| "logits/chosen": 17.95855140686035, | |
| "logits/rejected": 18.640541076660156, | |
| "logps/chosen": -310.16778564453125, | |
| "logps/rejected": -239.66641235351562, | |
| "loss": 0.4065, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.086520195007324, | |
| "rewards/margins": 8.207837104797363, | |
| "rewards/rejected": -16.294357299804688, | |
| "sft_loss": 1.023207187652588, | |
| "step": 1245 | |
| }, | |
| { | |
| "epoch": 2.05456936226167, | |
| "grad_norm": 15.822685116826385, | |
| "learning_rate": 6.184662869767577e-08, | |
| "logits/chosen": 17.26742172241211, | |
| "logits/rejected": 17.335512161254883, | |
| "logps/chosen": -328.2395324707031, | |
| "logps/rejected": -263.0542297363281, | |
| "loss": 0.4175, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -8.798872947692871, | |
| "rewards/margins": 8.849559783935547, | |
| "rewards/rejected": -17.648433685302734, | |
| "sft_loss": 1.1304852962493896, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 2.0627876397107165, | |
| "grad_norm": 17.368549612926913, | |
| "learning_rate": 6.08709078897056e-08, | |
| "logits/chosen": 17.57396125793457, | |
| "logits/rejected": 17.95652198791504, | |
| "logps/chosen": -302.7294006347656, | |
| "logps/rejected": -251.41261291503906, | |
| "loss": 0.4021, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.686173439025879, | |
| "rewards/margins": 8.318988800048828, | |
| "rewards/rejected": -17.005163192749023, | |
| "sft_loss": 1.119976282119751, | |
| "step": 1255 | |
| }, | |
| { | |
| "epoch": 2.0710059171597632, | |
| "grad_norm": 13.875960320644882, | |
| "learning_rate": 5.990046176144551e-08, | |
| "logits/chosen": 16.934846878051758, | |
| "logits/rejected": 17.557884216308594, | |
| "logps/chosen": -274.9892578125, | |
| "logps/rejected": -239.31570434570312, | |
| "loss": 0.4283, | |
| "rewards/accuracies": 0.9100000262260437, | |
| "rewards/chosen": -8.879440307617188, | |
| "rewards/margins": 7.452706336975098, | |
| "rewards/rejected": -16.3321475982666, | |
| "sft_loss": 1.162746787071228, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 2.07922419460881, | |
| "grad_norm": 12.354544231223421, | |
| "learning_rate": 5.893537013302602e-08, | |
| "logits/chosen": 17.52082061767578, | |
| "logits/rejected": 18.2637939453125, | |
| "logps/chosen": -304.33441162109375, | |
| "logps/rejected": -243.52101135253906, | |
| "loss": 0.4253, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.936227321624756, | |
| "rewards/margins": 8.085640907287598, | |
| "rewards/rejected": -16.021867752075195, | |
| "sft_loss": 1.0547149181365967, | |
| "step": 1265 | |
| }, | |
| { | |
| "epoch": 2.0874424720578566, | |
| "grad_norm": 14.221340160175023, | |
| "learning_rate": 5.7975712384164795e-08, | |
| "logits/chosen": 17.841602325439453, | |
| "logits/rejected": 17.95541000366211, | |
| "logps/chosen": -295.451416015625, | |
| "logps/rejected": -230.86936950683594, | |
| "loss": 0.4009, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -7.948279857635498, | |
| "rewards/margins": 7.590776443481445, | |
| "rewards/rejected": -15.539057731628418, | |
| "sft_loss": 1.1430902481079102, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 2.0956607495069033, | |
| "grad_norm": 15.060512661462361, | |
| "learning_rate": 5.702156744763784e-08, | |
| "logits/chosen": 17.457277297973633, | |
| "logits/rejected": 18.601512908935547, | |
| "logps/chosen": -286.6520080566406, | |
| "logps/rejected": -236.4774627685547, | |
| "loss": 0.4211, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.859719276428223, | |
| "rewards/margins": 7.776011943817139, | |
| "rewards/rejected": -15.635732650756836, | |
| "sft_loss": 1.039507269859314, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 2.10387902695595, | |
| "grad_norm": 14.43891440512856, | |
| "learning_rate": 5.607301380278683e-08, | |
| "logits/chosen": 17.887542724609375, | |
| "logits/rejected": 18.098596572875977, | |
| "logps/chosen": -287.3581848144531, | |
| "logps/rejected": -228.4025421142578, | |
| "loss": 0.4356, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -7.526928901672363, | |
| "rewards/margins": 8.00684642791748, | |
| "rewards/rejected": -15.533775329589844, | |
| "sft_loss": 1.1267131567001343, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 2.1120973044049967, | |
| "grad_norm": 12.750181563192855, | |
| "learning_rate": 5.513012946906445e-08, | |
| "logits/chosen": 17.97955322265625, | |
| "logits/rejected": 18.05929183959961, | |
| "logps/chosen": -319.4637145996094, | |
| "logps/rejected": -245.5413818359375, | |
| "loss": 0.3884, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.154685020446777, | |
| "rewards/margins": 8.448837280273438, | |
| "rewards/rejected": -16.6035213470459, | |
| "sft_loss": 1.193272590637207, | |
| "step": 1285 | |
| }, | |
| { | |
| "epoch": 2.1203155818540433, | |
| "grad_norm": 9.936573876560704, | |
| "learning_rate": 5.419299199961708e-08, | |
| "logits/chosen": 17.2838077545166, | |
| "logits/rejected": 17.822799682617188, | |
| "logps/chosen": -337.51031494140625, | |
| "logps/rejected": -259.70428466796875, | |
| "loss": 0.3565, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.9683146476745605, | |
| "rewards/margins": 8.882369041442871, | |
| "rewards/rejected": -16.850685119628906, | |
| "sft_loss": 1.041199803352356, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 2.12853385930309, | |
| "grad_norm": 13.38745866462026, | |
| "learning_rate": 5.3261678474905785e-08, | |
| "logits/chosen": 18.08312225341797, | |
| "logits/rejected": 18.110692977905273, | |
| "logps/chosen": -324.0693359375, | |
| "logps/rejected": -256.90234375, | |
| "loss": 0.391, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.347922325134277, | |
| "rewards/margins": 8.93021011352539, | |
| "rewards/rejected": -17.27813148498535, | |
| "sft_loss": 1.1214524507522583, | |
| "step": 1295 | |
| }, | |
| { | |
| "epoch": 2.1367521367521367, | |
| "grad_norm": 12.52755859911023, | |
| "learning_rate": 5.2336265496366774e-08, | |
| "logits/chosen": 16.553739547729492, | |
| "logits/rejected": 18.280567169189453, | |
| "logps/chosen": -298.98480224609375, | |
| "logps/rejected": -247.38160705566406, | |
| "loss": 0.3604, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.771576881408691, | |
| "rewards/margins": 7.8479180335998535, | |
| "rewards/rejected": -16.619495391845703, | |
| "sft_loss": 1.1661113500595093, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 2.1449704142011834, | |
| "grad_norm": 15.615159328078256, | |
| "learning_rate": 5.141682918011055e-08, | |
| "logits/chosen": 17.72024917602539, | |
| "logits/rejected": 18.12508773803711, | |
| "logps/chosen": -311.2801818847656, | |
| "logps/rejected": -245.24436950683594, | |
| "loss": 0.4611, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.237349510192871, | |
| "rewards/margins": 8.371785163879395, | |
| "rewards/rejected": -16.609132766723633, | |
| "sft_loss": 1.1050708293914795, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 2.15318869165023, | |
| "grad_norm": 9.121410431305465, | |
| "learning_rate": 5.0503445150661306e-08, | |
| "logits/chosen": 17.203432083129883, | |
| "logits/rejected": 18.309484481811523, | |
| "logps/chosen": -282.98101806640625, | |
| "logps/rejected": -231.17942810058594, | |
| "loss": 0.3828, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.479619979858398, | |
| "rewards/margins": 7.617303371429443, | |
| "rewards/rejected": -16.096921920776367, | |
| "sft_loss": 1.110097050666809, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 2.1614069690992768, | |
| "grad_norm": 16.777024727194785, | |
| "learning_rate": 4.959618853473696e-08, | |
| "logits/chosen": 16.61244010925293, | |
| "logits/rejected": 17.989538192749023, | |
| "logps/chosen": -302.3112487792969, | |
| "logps/rejected": -246.63719177246094, | |
| "loss": 0.4158, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.352829933166504, | |
| "rewards/margins": 8.38165283203125, | |
| "rewards/rejected": -16.734481811523438, | |
| "sft_loss": 1.1263587474822998, | |
| "step": 1315 | |
| }, | |
| { | |
| "epoch": 2.1696252465483234, | |
| "grad_norm": 12.912024759458012, | |
| "learning_rate": 4.8695133955069564e-08, | |
| "logits/chosen": 15.624103546142578, | |
| "logits/rejected": 16.827468872070312, | |
| "logps/chosen": -306.7551574707031, | |
| "logps/rejected": -244.3481903076172, | |
| "loss": 0.429, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.34555435180664, | |
| "rewards/margins": 8.023102760314941, | |
| "rewards/rejected": -16.3686580657959, | |
| "sft_loss": 1.2823337316513062, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 2.17784352399737, | |
| "grad_norm": 13.478297039710323, | |
| "learning_rate": 4.780035552426787e-08, | |
| "logits/chosen": 16.33539581298828, | |
| "logits/rejected": 18.024782180786133, | |
| "logps/chosen": -325.2061767578125, | |
| "logps/rejected": -265.6727600097656, | |
| "loss": 0.4175, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.308411598205566, | |
| "rewards/margins": 9.635498046875, | |
| "rewards/rejected": -17.943910598754883, | |
| "sft_loss": 1.1859756708145142, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 2.186061801446417, | |
| "grad_norm": 13.817488574864614, | |
| "learning_rate": 4.691192683872129e-08, | |
| "logits/chosen": 16.309165954589844, | |
| "logits/rejected": 17.056123733520508, | |
| "logps/chosen": -319.3105163574219, | |
| "logps/rejected": -256.0588073730469, | |
| "loss": 0.382, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.325318336486816, | |
| "rewards/margins": 8.578317642211914, | |
| "rewards/rejected": -16.903636932373047, | |
| "sft_loss": 1.0492181777954102, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 2.1942800788954635, | |
| "grad_norm": 10.242449837573213, | |
| "learning_rate": 4.602992097254646e-08, | |
| "logits/chosen": 17.743621826171875, | |
| "logits/rejected": 19.387224197387695, | |
| "logps/chosen": -307.1810607910156, | |
| "logps/rejected": -254.61309814453125, | |
| "loss": 0.3948, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.201935768127441, | |
| "rewards/margins": 8.72970962524414, | |
| "rewards/rejected": -16.9316463470459, | |
| "sft_loss": 1.159468173980713, | |
| "step": 1335 | |
| }, | |
| { | |
| "epoch": 2.20249835634451, | |
| "grad_norm": 12.840091970424348, | |
| "learning_rate": 4.515441047157707e-08, | |
| "logits/chosen": 17.517444610595703, | |
| "logits/rejected": 18.110706329345703, | |
| "logps/chosen": -303.7611083984375, | |
| "logps/rejected": -246.00747680664062, | |
| "loss": 0.4279, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.031211853027344, | |
| "rewards/margins": 8.574084281921387, | |
| "rewards/rejected": -16.605297088623047, | |
| "sft_loss": 1.1109663248062134, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 2.210716633793557, | |
| "grad_norm": 11.358689713775057, | |
| "learning_rate": 4.428546734739666e-08, | |
| "logits/chosen": 17.79754066467285, | |
| "logits/rejected": 18.65445327758789, | |
| "logps/chosen": -310.1402587890625, | |
| "logps/rejected": -257.2119445800781, | |
| "loss": 0.3393, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.179025650024414, | |
| "rewards/margins": 9.184054374694824, | |
| "rewards/rejected": -17.363079071044922, | |
| "sft_loss": 1.040381669998169, | |
| "step": 1345 | |
| }, | |
| { | |
| "epoch": 2.2189349112426036, | |
| "grad_norm": 10.0246369651475, | |
| "learning_rate": 4.342316307141568e-08, | |
| "logits/chosen": 15.378368377685547, | |
| "logits/rejected": 17.601299285888672, | |
| "logps/chosen": -293.3377380371094, | |
| "logps/rejected": -252.94558715820312, | |
| "loss": 0.4169, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.73454761505127, | |
| "rewards/margins": 8.430728912353516, | |
| "rewards/rejected": -17.16527557373047, | |
| "sft_loss": 1.088813304901123, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 2.2271531886916502, | |
| "grad_norm": 8.66405912578809, | |
| "learning_rate": 4.256756856899299e-08, | |
| "logits/chosen": 16.15410041809082, | |
| "logits/rejected": 17.089345932006836, | |
| "logps/chosen": -293.54864501953125, | |
| "logps/rejected": -243.08554077148438, | |
| "loss": 0.3688, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.237372398376465, | |
| "rewards/margins": 8.213920593261719, | |
| "rewards/rejected": -16.4512939453125, | |
| "sft_loss": 1.1098147630691528, | |
| "step": 1355 | |
| }, | |
| { | |
| "epoch": 2.235371466140697, | |
| "grad_norm": 15.953137176215671, | |
| "learning_rate": 4.171875421360202e-08, | |
| "logits/chosen": 16.227901458740234, | |
| "logits/rejected": 16.872665405273438, | |
| "logps/chosen": -329.6645202636719, | |
| "logps/rejected": -257.57489013671875, | |
| "loss": 0.4039, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.39266300201416, | |
| "rewards/margins": 8.796185493469238, | |
| "rewards/rejected": -17.1888484954834, | |
| "sft_loss": 1.1166497468948364, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 2.2435897435897436, | |
| "grad_norm": 9.812512910956865, | |
| "learning_rate": 4.0876789821042606e-08, | |
| "logits/chosen": 16.98467445373535, | |
| "logits/rejected": 17.594194412231445, | |
| "logps/chosen": -308.341064453125, | |
| "logps/rejected": -250.00465393066406, | |
| "loss": 0.3941, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.45275592803955, | |
| "rewards/margins": 8.58054256439209, | |
| "rewards/rejected": -17.03329849243164, | |
| "sft_loss": 1.1679203510284424, | |
| "step": 1365 | |
| }, | |
| { | |
| "epoch": 2.2518080210387903, | |
| "grad_norm": 11.249560857734895, | |
| "learning_rate": 4.0041744643698585e-08, | |
| "logits/chosen": 17.271631240844727, | |
| "logits/rejected": 18.480789184570312, | |
| "logps/chosen": -323.33148193359375, | |
| "logps/rejected": -265.4918212890625, | |
| "loss": 0.4133, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.759995460510254, | |
| "rewards/margins": 9.08838176727295, | |
| "rewards/rejected": -17.848377227783203, | |
| "sft_loss": 1.1703903675079346, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 2.260026298487837, | |
| "grad_norm": 13.168234484012684, | |
| "learning_rate": 3.9213687364841514e-08, | |
| "logits/chosen": 17.725706100463867, | |
| "logits/rejected": 18.4434871673584, | |
| "logps/chosen": -265.5625305175781, | |
| "logps/rejected": -229.72801208496094, | |
| "loss": 0.3827, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -7.455626964569092, | |
| "rewards/margins": 8.284765243530273, | |
| "rewards/rejected": -15.740392684936523, | |
| "sft_loss": 1.061354160308838, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 2.2682445759368837, | |
| "grad_norm": 10.595007690116647, | |
| "learning_rate": 3.8392686092981716e-08, | |
| "logits/chosen": 16.218524932861328, | |
| "logits/rejected": 17.454858779907227, | |
| "logps/chosen": -330.2020263671875, | |
| "logps/rejected": -259.57513427734375, | |
| "loss": 0.3713, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.325506210327148, | |
| "rewards/margins": 8.986472129821777, | |
| "rewards/rejected": -17.311979293823242, | |
| "sft_loss": 1.1411256790161133, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 2.2764628533859304, | |
| "grad_norm": 16.544764732871236, | |
| "learning_rate": 3.757880835626601e-08, | |
| "logits/chosen": 19.006175994873047, | |
| "logits/rejected": 20.302326202392578, | |
| "logps/chosen": -322.05242919921875, | |
| "logps/rejected": -260.6827087402344, | |
| "loss": 0.3984, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.566563606262207, | |
| "rewards/margins": 8.706660270690918, | |
| "rewards/rejected": -17.273221969604492, | |
| "sft_loss": 1.01236891746521, | |
| "step": 1385 | |
| }, | |
| { | |
| "epoch": 2.284681130834977, | |
| "grad_norm": 9.92900744936661, | |
| "learning_rate": 3.677212109692364e-08, | |
| "logits/chosen": 16.336091995239258, | |
| "logits/rejected": 18.393173217773438, | |
| "logps/chosen": -296.00811767578125, | |
| "logps/rejected": -255.37149047851562, | |
| "loss": 0.4114, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.0043363571167, | |
| "rewards/margins": 9.217806816101074, | |
| "rewards/rejected": -17.222143173217773, | |
| "sft_loss": 1.1503466367721558, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 2.2928994082840237, | |
| "grad_norm": 9.386107838289549, | |
| "learning_rate": 3.597269066576017e-08, | |
| "logits/chosen": 17.042190551757812, | |
| "logits/rejected": 18.17107582092285, | |
| "logps/chosen": -300.5311584472656, | |
| "logps/rejected": -244.8414306640625, | |
| "loss": 0.3695, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.179112434387207, | |
| "rewards/margins": 8.550080299377441, | |
| "rewards/rejected": -16.72919273376465, | |
| "sft_loss": 1.1738831996917725, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 2.3011176857330704, | |
| "grad_norm": 13.817759213393538, | |
| "learning_rate": 3.518058281669996e-08, | |
| "logits/chosen": 17.452651977539062, | |
| "logits/rejected": 19.167875289916992, | |
| "logps/chosen": -325.5849914550781, | |
| "logps/rejected": -261.9805908203125, | |
| "loss": 0.405, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.316492080688477, | |
| "rewards/margins": 9.281232833862305, | |
| "rewards/rejected": -17.59772491455078, | |
| "sft_loss": 1.0759243965148926, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 2.309335963182117, | |
| "grad_norm": 14.27386340226445, | |
| "learning_rate": 3.439586270137797e-08, | |
| "logits/chosen": 16.01079750061035, | |
| "logits/rejected": 17.990955352783203, | |
| "logps/chosen": -317.35968017578125, | |
| "logps/rejected": -265.36737060546875, | |
| "loss": 0.382, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.888944625854492, | |
| "rewards/margins": 9.316179275512695, | |
| "rewards/rejected": -18.205123901367188, | |
| "sft_loss": 1.064568281173706, | |
| "step": 1405 | |
| }, | |
| { | |
| "epoch": 2.3175542406311638, | |
| "grad_norm": 17.736685407866446, | |
| "learning_rate": 3.3618594863780993e-08, | |
| "logits/chosen": 18.37812042236328, | |
| "logits/rejected": 19.024595260620117, | |
| "logps/chosen": -319.2788391113281, | |
| "logps/rejected": -255.89810180664062, | |
| "loss": 0.3468, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.851397514343262, | |
| "rewards/margins": 8.883750915527344, | |
| "rewards/rejected": -17.73514747619629, | |
| "sft_loss": 1.235966682434082, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 2.3257725180802105, | |
| "grad_norm": 22.487095580329445, | |
| "learning_rate": 3.2848843234938694e-08, | |
| "logits/chosen": 17.141220092773438, | |
| "logits/rejected": 17.714786529541016, | |
| "logps/chosen": -302.5834045410156, | |
| "logps/rejected": -254.14559936523438, | |
| "loss": 0.36, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.972297668457031, | |
| "rewards/margins": 8.797745704650879, | |
| "rewards/rejected": -17.770044326782227, | |
| "sft_loss": 1.1860109567642212, | |
| "step": 1415 | |
| }, | |
| { | |
| "epoch": 2.333990795529257, | |
| "grad_norm": 11.596948370393193, | |
| "learning_rate": 3.208667112766529e-08, | |
| "logits/chosen": 17.32436752319336, | |
| "logits/rejected": 18.515031814575195, | |
| "logps/chosen": -312.43267822265625, | |
| "logps/rejected": -266.10052490234375, | |
| "loss": 0.3933, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -9.664741516113281, | |
| "rewards/margins": 9.025125503540039, | |
| "rewards/rejected": -18.689865112304688, | |
| "sft_loss": 1.17525315284729, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 2.342209072978304, | |
| "grad_norm": 16.78732289470905, | |
| "learning_rate": 3.1332141231352194e-08, | |
| "logits/chosen": 17.367273330688477, | |
| "logits/rejected": 17.978761672973633, | |
| "logps/chosen": -325.341552734375, | |
| "logps/rejected": -261.8766784667969, | |
| "loss": 0.3954, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -9.092870712280273, | |
| "rewards/margins": 9.265833854675293, | |
| "rewards/rejected": -18.358705520629883, | |
| "sft_loss": 1.1345161199569702, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 2.3504273504273505, | |
| "grad_norm": 13.214123565552589, | |
| "learning_rate": 3.058531560681141e-08, | |
| "logits/chosen": 18.152240753173828, | |
| "logits/rejected": 19.055191040039062, | |
| "logps/chosen": -327.43487548828125, | |
| "logps/rejected": -266.76446533203125, | |
| "loss": 0.3363, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.500913619995117, | |
| "rewards/margins": 9.283426284790039, | |
| "rewards/rejected": -17.784339904785156, | |
| "sft_loss": 1.231545329093933, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 2.358645627876397, | |
| "grad_norm": 22.300498596470074, | |
| "learning_rate": 2.984625568117129e-08, | |
| "logits/chosen": 18.67966079711914, | |
| "logits/rejected": 19.73933982849121, | |
| "logps/chosen": -334.677734375, | |
| "logps/rejected": -265.227783203125, | |
| "loss": 0.4029, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.572039604187012, | |
| "rewards/margins": 8.896354675292969, | |
| "rewards/rejected": -17.468393325805664, | |
| "sft_loss": 1.1262859106063843, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 2.366863905325444, | |
| "grad_norm": 37.074159109819185, | |
| "learning_rate": 2.9115022242823862e-08, | |
| "logits/chosen": 17.512964248657227, | |
| "logits/rejected": 18.453014373779297, | |
| "logps/chosen": -326.0170593261719, | |
| "logps/rejected": -263.2306213378906, | |
| "loss": 0.3968, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.473074913024902, | |
| "rewards/margins": 9.212455749511719, | |
| "rewards/rejected": -17.685529708862305, | |
| "sft_loss": 1.092557430267334, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 2.3750821827744906, | |
| "grad_norm": 11.887196623999731, | |
| "learning_rate": 2.839167543642511e-08, | |
| "logits/chosen": 17.14059066772461, | |
| "logits/rejected": 18.407007217407227, | |
| "logps/chosen": -291.7596435546875, | |
| "logps/rejected": -250.99574279785156, | |
| "loss": 0.4211, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.36557388305664, | |
| "rewards/margins": 8.695883750915527, | |
| "rewards/rejected": -17.06145668029785, | |
| "sft_loss": 1.202438235282898, | |
| "step": 1445 | |
| }, | |
| { | |
| "epoch": 2.3833004602235373, | |
| "grad_norm": 17.524490621614987, | |
| "learning_rate": 2.7676274757947816e-08, | |
| "logits/chosen": 18.85689926147461, | |
| "logits/rejected": 19.545021057128906, | |
| "logps/chosen": -287.7202453613281, | |
| "logps/rejected": -244.56924438476562, | |
| "loss": 0.3838, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.284355163574219, | |
| "rewards/margins": 8.70119857788086, | |
| "rewards/rejected": -16.985553741455078, | |
| "sft_loss": 1.0111671686172485, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 2.391518737672584, | |
| "grad_norm": 9.1542241365719, | |
| "learning_rate": 2.696887904978819e-08, | |
| "logits/chosen": 18.2181453704834, | |
| "logits/rejected": 18.709545135498047, | |
| "logps/chosen": -280.198974609375, | |
| "logps/rejected": -226.15415954589844, | |
| "loss": 0.4051, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -8.791984558105469, | |
| "rewards/margins": 7.134130477905273, | |
| "rewards/rejected": -15.926115989685059, | |
| "sft_loss": 1.1695269346237183, | |
| "step": 1455 | |
| }, | |
| { | |
| "epoch": 2.3997370151216306, | |
| "grad_norm": 13.076192251177769, | |
| "learning_rate": 2.6269546495925886e-08, | |
| "logits/chosen": 16.172388076782227, | |
| "logits/rejected": 17.052417755126953, | |
| "logps/chosen": -287.6596984863281, | |
| "logps/rejected": -242.12660217285156, | |
| "loss": 0.4246, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.466004371643066, | |
| "rewards/margins": 8.168050765991211, | |
| "rewards/rejected": -16.634056091308594, | |
| "sft_loss": 1.1705952882766724, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 2.4079552925706773, | |
| "grad_norm": 14.024390303705356, | |
| "learning_rate": 2.5578334617138236e-08, | |
| "logits/chosen": 17.606464385986328, | |
| "logits/rejected": 18.12337303161621, | |
| "logps/chosen": -301.743408203125, | |
| "logps/rejected": -248.48464965820312, | |
| "loss": 0.3833, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.655086517333984, | |
| "rewards/margins": 8.575737953186035, | |
| "rewards/rejected": -17.230825424194336, | |
| "sft_loss": 1.08839750289917, | |
| "step": 1465 | |
| }, | |
| { | |
| "epoch": 2.416173570019724, | |
| "grad_norm": 17.812699456228195, | |
| "learning_rate": 2.489530026626932e-08, | |
| "logits/chosen": 17.72669219970703, | |
| "logits/rejected": 18.6758975982666, | |
| "logps/chosen": -306.7005310058594, | |
| "logps/rejected": -242.5311279296875, | |
| "loss": 0.379, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.553206443786621, | |
| "rewards/margins": 8.148569107055664, | |
| "rewards/rejected": -16.70177459716797, | |
| "sft_loss": 1.1815282106399536, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 2.4243918474687707, | |
| "grad_norm": 8.680672775515832, | |
| "learning_rate": 2.422049962355366e-08, | |
| "logits/chosen": 18.41983413696289, | |
| "logits/rejected": 19.47545623779297, | |
| "logps/chosen": -282.099609375, | |
| "logps/rejected": -238.36300659179688, | |
| "loss": 0.3486, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.316095352172852, | |
| "rewards/margins": 8.191699981689453, | |
| "rewards/rejected": -16.507797241210938, | |
| "sft_loss": 1.205697774887085, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 2.4326101249178174, | |
| "grad_norm": 16.775370793665815, | |
| "learning_rate": 2.3553988191995208e-08, | |
| "logits/chosen": 16.783174514770508, | |
| "logits/rejected": 18.405048370361328, | |
| "logps/chosen": -304.1385498046875, | |
| "logps/rejected": -256.7261047363281, | |
| "loss": 0.3744, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.7383451461792, | |
| "rewards/margins": 8.930658340454102, | |
| "rewards/rejected": -17.669002532958984, | |
| "sft_loss": 1.1059280633926392, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 2.440828402366864, | |
| "grad_norm": 10.437808650182905, | |
| "learning_rate": 2.2895820792802474e-08, | |
| "logits/chosen": 16.727697372436523, | |
| "logits/rejected": 17.59294891357422, | |
| "logps/chosen": -328.55389404296875, | |
| "logps/rejected": -269.7945251464844, | |
| "loss": 0.3695, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -9.062349319458008, | |
| "rewards/margins": 9.41024112701416, | |
| "rewards/rejected": -18.472591400146484, | |
| "sft_loss": 1.173682451248169, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 2.4490466798159107, | |
| "grad_norm": 20.73492010593765, | |
| "learning_rate": 2.2246051560879095e-08, | |
| "logits/chosen": 16.899852752685547, | |
| "logits/rejected": 17.82339096069336, | |
| "logps/chosen": -338.22186279296875, | |
| "logps/rejected": -279.0784912109375, | |
| "loss": 0.4179, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -9.162134170532227, | |
| "rewards/margins": 9.35285758972168, | |
| "rewards/rejected": -18.514989852905273, | |
| "sft_loss": 1.17171311378479, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 2.4572649572649574, | |
| "grad_norm": 12.636170820327301, | |
| "learning_rate": 2.160473394037149e-08, | |
| "logits/chosen": 17.118467330932617, | |
| "logits/rejected": 17.36690330505371, | |
| "logps/chosen": -335.8661804199219, | |
| "logps/rejected": -262.7174072265625, | |
| "loss": 0.4504, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.756584167480469, | |
| "rewards/margins": 8.854002952575684, | |
| "rewards/rejected": -17.610586166381836, | |
| "sft_loss": 1.1354836225509644, | |
| "step": 1495 | |
| }, | |
| { | |
| "epoch": 2.465483234714004, | |
| "grad_norm": 11.838207356236568, | |
| "learning_rate": 2.097192068027276e-08, | |
| "logits/chosen": 16.54058837890625, | |
| "logits/rejected": 17.930091857910156, | |
| "logps/chosen": -329.2217712402344, | |
| "logps/rejected": -270.410888671875, | |
| "loss": 0.3262, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.5565185546875, | |
| "rewards/margins": 9.7839937210083, | |
| "rewards/rejected": -18.340513229370117, | |
| "sft_loss": 1.0987026691436768, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.473701512163051, | |
| "grad_norm": 17.261726485061967, | |
| "learning_rate": 2.0347663830084182e-08, | |
| "logits/chosen": 16.857637405395508, | |
| "logits/rejected": 17.605924606323242, | |
| "logps/chosen": -278.7782287597656, | |
| "logps/rejected": -237.3050537109375, | |
| "loss": 0.3978, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.520981788635254, | |
| "rewards/margins": 7.912033557891846, | |
| "rewards/rejected": -16.433013916015625, | |
| "sft_loss": 1.1526176929473877, | |
| "step": 1505 | |
| }, | |
| { | |
| "epoch": 2.4819197896120975, | |
| "grad_norm": 19.01971806956554, | |
| "learning_rate": 1.9732014735534168e-08, | |
| "logits/chosen": 17.1612606048584, | |
| "logits/rejected": 17.63095474243164, | |
| "logps/chosen": -307.8269958496094, | |
| "logps/rejected": -234.46160888671875, | |
| "loss": 0.4156, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.234945297241211, | |
| "rewards/margins": 7.63665771484375, | |
| "rewards/rejected": -15.871603012084961, | |
| "sft_loss": 1.1170748472213745, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 2.490138067061144, | |
| "grad_norm": 10.87683842585221, | |
| "learning_rate": 1.9125024034354758e-08, | |
| "logits/chosen": 17.20734214782715, | |
| "logits/rejected": 17.946365356445312, | |
| "logps/chosen": -312.4763488769531, | |
| "logps/rejected": -246.2183837890625, | |
| "loss": 0.351, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.035243034362793, | |
| "rewards/margins": 8.52718734741211, | |
| "rewards/rejected": -16.56243133544922, | |
| "sft_loss": 1.071519374847412, | |
| "step": 1515 | |
| }, | |
| { | |
| "epoch": 2.498356344510191, | |
| "grad_norm": 10.244386828979161, | |
| "learning_rate": 1.85267416521169e-08, | |
| "logits/chosen": 17.724872589111328, | |
| "logits/rejected": 18.053852081298828, | |
| "logps/chosen": -320.68597412109375, | |
| "logps/rejected": -246.91893005371094, | |
| "loss": 0.3733, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.582959175109863, | |
| "rewards/margins": 8.986913681030273, | |
| "rewards/rejected": -16.56987190246582, | |
| "sft_loss": 1.0908424854278564, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 2.5065746219592375, | |
| "grad_norm": 12.813362766851835, | |
| "learning_rate": 1.793721679812389e-08, | |
| "logits/chosen": 18.601253509521484, | |
| "logits/rejected": 19.362607955932617, | |
| "logps/chosen": -288.14776611328125, | |
| "logps/rejected": -237.0640106201172, | |
| "loss": 0.3857, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.49190616607666, | |
| "rewards/margins": 7.810946464538574, | |
| "rewards/rejected": -16.302852630615234, | |
| "sft_loss": 1.091495394706726, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 2.5147928994082838, | |
| "grad_norm": 11.364641270765484, | |
| "learning_rate": 1.735649796136382e-08, | |
| "logits/chosen": 15.785613059997559, | |
| "logits/rejected": 17.070707321166992, | |
| "logps/chosen": -324.2843017578125, | |
| "logps/rejected": -258.7143859863281, | |
| "loss": 0.3883, | |
| "rewards/accuracies": 0.9300000071525574, | |
| "rewards/chosen": -8.855399131774902, | |
| "rewards/margins": 8.804574012756348, | |
| "rewards/rejected": -17.65997314453125, | |
| "sft_loss": 1.1961203813552856, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 2.523011176857331, | |
| "grad_norm": 15.007717453848354, | |
| "learning_rate": 1.678463290652142e-08, | |
| "logits/chosen": 17.604642868041992, | |
| "logits/rejected": 17.90863609313965, | |
| "logps/chosen": -312.69024658203125, | |
| "logps/rejected": -249.09962463378906, | |
| "loss": 0.3626, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.328804016113281, | |
| "rewards/margins": 8.493717193603516, | |
| "rewards/rejected": -16.822521209716797, | |
| "sft_loss": 1.157140851020813, | |
| "step": 1535 | |
| }, | |
| { | |
| "epoch": 2.531229454306377, | |
| "grad_norm": 8.792843348493232, | |
| "learning_rate": 1.6221668670049315e-08, | |
| "logits/chosen": 16.296873092651367, | |
| "logits/rejected": 17.479211807250977, | |
| "logps/chosen": -327.1073303222656, | |
| "logps/rejected": -271.3090515136719, | |
| "loss": 0.3481, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.877535820007324, | |
| "rewards/margins": 9.413492202758789, | |
| "rewards/rejected": -18.291027069091797, | |
| "sft_loss": 1.1908369064331055, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 2.5394477317554243, | |
| "grad_norm": 14.557660052303598, | |
| "learning_rate": 1.5667651556299178e-08, | |
| "logits/chosen": 16.44731903076172, | |
| "logits/rejected": 17.4537296295166, | |
| "logps/chosen": -306.1639709472656, | |
| "logps/rejected": -253.69247436523438, | |
| "loss": 0.3531, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.432329177856445, | |
| "rewards/margins": 8.949870109558105, | |
| "rewards/rejected": -17.382200241088867, | |
| "sft_loss": 1.114105463027954, | |
| "step": 1545 | |
| }, | |
| { | |
| "epoch": 2.5476660092044705, | |
| "grad_norm": 15.415629759090677, | |
| "learning_rate": 1.5122627133713262e-08, | |
| "logits/chosen": 15.742711067199707, | |
| "logits/rejected": 17.65005874633789, | |
| "logps/chosen": -317.73675537109375, | |
| "logps/rejected": -260.23907470703125, | |
| "loss": 0.3849, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.840112209320068, | |
| "rewards/margins": 9.513845443725586, | |
| "rewards/rejected": -17.35395622253418, | |
| "sft_loss": 1.292752981185913, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 2.5558842866535176, | |
| "grad_norm": 11.677314306376815, | |
| "learning_rate": 1.4586640231076226e-08, | |
| "logits/chosen": 17.83001708984375, | |
| "logits/rejected": 18.008840560913086, | |
| "logps/chosen": -290.8938293457031, | |
| "logps/rejected": -234.77801513671875, | |
| "loss": 0.3699, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -8.604043006896973, | |
| "rewards/margins": 7.615962028503418, | |
| "rewards/rejected": -16.22000503540039, | |
| "sft_loss": 1.1707122325897217, | |
| "step": 1555 | |
| }, | |
| { | |
| "epoch": 2.564102564102564, | |
| "grad_norm": 17.403632913196056, | |
| "learning_rate": 1.405973493382806e-08, | |
| "logits/chosen": 16.150592803955078, | |
| "logits/rejected": 17.557065963745117, | |
| "logps/chosen": -321.72802734375, | |
| "logps/rejected": -270.1099548339844, | |
| "loss": 0.3552, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -9.425326347351074, | |
| "rewards/margins": 9.209266662597656, | |
| "rewards/rejected": -18.634592056274414, | |
| "sft_loss": 1.0887880325317383, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 2.572320841551611, | |
| "grad_norm": 16.12695707285676, | |
| "learning_rate": 1.3541954580437941e-08, | |
| "logits/chosen": 18.370115280151367, | |
| "logits/rejected": 18.63874626159668, | |
| "logps/chosen": -321.462646484375, | |
| "logps/rejected": -259.6288757324219, | |
| "loss": 0.3254, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.813506126403809, | |
| "rewards/margins": 9.208869934082031, | |
| "rewards/rejected": -18.022377014160156, | |
| "sft_loss": 1.0541073083877563, | |
| "step": 1565 | |
| }, | |
| { | |
| "epoch": 2.5805391190006572, | |
| "grad_norm": 18.71581348868284, | |
| "learning_rate": 1.3033341758839592e-08, | |
| "logits/chosen": 16.9278621673584, | |
| "logits/rejected": 17.87784767150879, | |
| "logps/chosen": -333.1341552734375, | |
| "logps/rejected": -271.1338195800781, | |
| "loss": 0.4055, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -9.0064058303833, | |
| "rewards/margins": 9.4508638381958, | |
| "rewards/rejected": -18.4572696685791, | |
| "sft_loss": 1.1667834520339966, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 2.5887573964497044, | |
| "grad_norm": 11.817463136679503, | |
| "learning_rate": 1.2533938302928329e-08, | |
| "logits/chosen": 17.372867584228516, | |
| "logits/rejected": 18.298500061035156, | |
| "logps/chosen": -346.6560974121094, | |
| "logps/rejected": -274.773681640625, | |
| "loss": 0.3683, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.409444808959961, | |
| "rewards/margins": 9.886656761169434, | |
| "rewards/rejected": -18.296100616455078, | |
| "sft_loss": 1.183761477470398, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 2.5969756738987506, | |
| "grad_norm": 11.086694788731137, | |
| "learning_rate": 1.2043785289120409e-08, | |
| "logits/chosen": 16.920242309570312, | |
| "logits/rejected": 18.36749839782715, | |
| "logps/chosen": -333.09539794921875, | |
| "logps/rejected": -272.066162109375, | |
| "loss": 0.378, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.567721366882324, | |
| "rewards/margins": 9.71126937866211, | |
| "rewards/rejected": -18.278989791870117, | |
| "sft_loss": 1.226511001586914, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 2.6051939513477977, | |
| "grad_norm": 19.57395022687368, | |
| "learning_rate": 1.1562923032974125e-08, | |
| "logits/chosen": 17.482685089111328, | |
| "logits/rejected": 18.186784744262695, | |
| "logps/chosen": -336.7694396972656, | |
| "logps/rejected": -273.9622497558594, | |
| "loss": 0.3656, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.473450660705566, | |
| "rewards/margins": 9.806720733642578, | |
| "rewards/rejected": -18.280170440673828, | |
| "sft_loss": 1.0997947454452515, | |
| "step": 1585 | |
| }, | |
| { | |
| "epoch": 2.613412228796844, | |
| "grad_norm": 13.846460377871546, | |
| "learning_rate": 1.1091391085874161e-08, | |
| "logits/chosen": 17.66254425048828, | |
| "logits/rejected": 17.869403839111328, | |
| "logps/chosen": -355.09124755859375, | |
| "logps/rejected": -262.7408142089844, | |
| "loss": 0.3909, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.950118064880371, | |
| "rewards/margins": 8.9635009765625, | |
| "rewards/rejected": -17.913618087768555, | |
| "sft_loss": 1.2338536977767944, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 2.621630506245891, | |
| "grad_norm": 18.388530342654583, | |
| "learning_rate": 1.06292282317781e-08, | |
| "logits/chosen": 18.353347778320312, | |
| "logits/rejected": 19.111572265625, | |
| "logps/chosen": -293.8038024902344, | |
| "logps/rejected": -240.29061889648438, | |
| "loss": 0.3818, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.344436645507812, | |
| "rewards/margins": 8.241558074951172, | |
| "rewards/rejected": -16.58599281311035, | |
| "sft_loss": 1.1101101636886597, | |
| "step": 1595 | |
| }, | |
| { | |
| "epoch": 2.6298487836949374, | |
| "grad_norm": 48.6421585527008, | |
| "learning_rate": 1.017647248402674e-08, | |
| "logits/chosen": 17.27472686767578, | |
| "logits/rejected": 17.775699615478516, | |
| "logps/chosen": -338.6330871582031, | |
| "logps/rejected": -265.278564453125, | |
| "loss": 0.4384, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.897347450256348, | |
| "rewards/margins": 8.987475395202637, | |
| "rewards/rejected": -17.884824752807617, | |
| "sft_loss": 1.1422169208526611, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 2.6380670611439845, | |
| "grad_norm": 13.863026192467665, | |
| "learning_rate": 9.733161082217223e-09, | |
| "logits/chosen": 16.872806549072266, | |
| "logits/rejected": 17.572965621948242, | |
| "logps/chosen": -321.6798095703125, | |
| "logps/rejected": -258.7831115722656, | |
| "loss": 0.4032, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.822911262512207, | |
| "rewards/margins": 8.982833862304688, | |
| "rewards/rejected": -17.805744171142578, | |
| "sft_loss": 1.125891923904419, | |
| "step": 1605 | |
| }, | |
| { | |
| "epoch": 2.6462853385930307, | |
| "grad_norm": 15.402379291218823, | |
| "learning_rate": 9.299330489140125e-09, | |
| "logits/chosen": 17.64206314086914, | |
| "logits/rejected": 18.37377166748047, | |
| "logps/chosen": -285.738037109375, | |
| "logps/rejected": -240.1550750732422, | |
| "loss": 0.4197, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -8.605627059936523, | |
| "rewards/margins": 8.010725021362305, | |
| "rewards/rejected": -16.616352081298828, | |
| "sft_loss": 1.0786948204040527, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 2.654503616042078, | |
| "grad_norm": 11.137896212671778, | |
| "learning_rate": 8.87501638778039e-09, | |
| "logits/chosen": 16.587888717651367, | |
| "logits/rejected": 17.759031295776367, | |
| "logps/chosen": -309.4990539550781, | |
| "logps/rejected": -254.31495666503906, | |
| "loss": 0.4112, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -9.20348834991455, | |
| "rewards/margins": 8.457581520080566, | |
| "rewards/rejected": -17.66107177734375, | |
| "sft_loss": 1.10163414478302, | |
| "step": 1615 | |
| }, | |
| { | |
| "epoch": 2.662721893491124, | |
| "grad_norm": 10.357256991488983, | |
| "learning_rate": 8.460253678382296e-09, | |
| "logits/chosen": 17.529693603515625, | |
| "logits/rejected": 18.570171356201172, | |
| "logps/chosen": -337.939453125, | |
| "logps/rejected": -269.9917297363281, | |
| "loss": 0.3553, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.473341941833496, | |
| "rewards/margins": 9.756902694702148, | |
| "rewards/rejected": -18.230243682861328, | |
| "sft_loss": 1.0737409591674805, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 2.6709401709401708, | |
| "grad_norm": 12.585703695972256, | |
| "learning_rate": 8.055076475578918e-09, | |
| "logits/chosen": 17.500032424926758, | |
| "logits/rejected": 18.32237434387207, | |
| "logps/chosen": -326.6228942871094, | |
| "logps/rejected": -261.5873107910156, | |
| "loss": 0.3922, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.563287734985352, | |
| "rewards/margins": 9.018136024475098, | |
| "rewards/rejected": -17.581424713134766, | |
| "sft_loss": 1.1417536735534668, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 2.6791584483892175, | |
| "grad_norm": 12.800841299642682, | |
| "learning_rate": 7.659518105586238e-09, | |
| "logits/chosen": 16.294475555419922, | |
| "logits/rejected": 18.111600875854492, | |
| "logps/chosen": -335.0698547363281, | |
| "logps/rejected": -275.7948913574219, | |
| "loss": 0.3539, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.872127532958984, | |
| "rewards/margins": 9.738655090332031, | |
| "rewards/rejected": -18.610782623291016, | |
| "sft_loss": 1.171600341796875, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 2.687376725838264, | |
| "grad_norm": 11.437442046862925, | |
| "learning_rate": 7.273611103461836e-09, | |
| "logits/chosen": 17.347509384155273, | |
| "logits/rejected": 18.348569869995117, | |
| "logps/chosen": -303.4100341796875, | |
| "logps/rejected": -250.9491729736328, | |
| "loss": 0.3316, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.186358451843262, | |
| "rewards/margins": 8.977690696716309, | |
| "rewards/rejected": -17.164051055908203, | |
| "sft_loss": 1.1324518918991089, | |
| "step": 1635 | |
| }, | |
| { | |
| "epoch": 2.695595003287311, | |
| "grad_norm": 12.932581100678355, | |
| "learning_rate": 6.897387210429067e-09, | |
| "logits/chosen": 17.321182250976562, | |
| "logits/rejected": 18.35422134399414, | |
| "logps/chosen": -298.5028381347656, | |
| "logps/rejected": -246.92356872558594, | |
| "loss": 0.4056, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.715899467468262, | |
| "rewards/margins": 8.367709159851074, | |
| "rewards/rejected": -17.08361053466797, | |
| "sft_loss": 1.143718957901001, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 2.7038132807363575, | |
| "grad_norm": 14.723722025410018, | |
| "learning_rate": 6.530877371266175e-09, | |
| "logits/chosen": 16.489261627197266, | |
| "logits/rejected": 17.733213424682617, | |
| "logps/chosen": -305.61749267578125, | |
| "logps/rejected": -256.1786804199219, | |
| "loss": 0.3542, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.568675994873047, | |
| "rewards/margins": 9.070348739624023, | |
| "rewards/rejected": -17.639026641845703, | |
| "sft_loss": 1.176300048828125, | |
| "step": 1645 | |
| }, | |
| { | |
| "epoch": 2.712031558185404, | |
| "grad_norm": 22.923491412294727, | |
| "learning_rate": 6.1741117317611196e-09, | |
| "logits/chosen": 17.291810989379883, | |
| "logits/rejected": 18.644412994384766, | |
| "logps/chosen": -321.612060546875, | |
| "logps/rejected": -269.1338195800781, | |
| "loss": 0.4291, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -9.136231422424316, | |
| "rewards/margins": 9.433501243591309, | |
| "rewards/rejected": -18.569734573364258, | |
| "sft_loss": 1.2353969812393188, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 2.720249835634451, | |
| "grad_norm": 10.617946186080342, | |
| "learning_rate": 5.827119636232017e-09, | |
| "logits/chosen": 17.4252872467041, | |
| "logits/rejected": 18.208906173706055, | |
| "logps/chosen": -308.66943359375, | |
| "logps/rejected": -251.34764099121094, | |
| "loss": 0.4103, | |
| "rewards/accuracies": 0.9300000071525574, | |
| "rewards/chosen": -8.655915260314941, | |
| "rewards/margins": 8.82339096069336, | |
| "rewards/rejected": -17.479307174682617, | |
| "sft_loss": 1.2225102186203003, | |
| "step": 1655 | |
| }, | |
| { | |
| "epoch": 2.7284681130834976, | |
| "grad_norm": 9.888027224233095, | |
| "learning_rate": 5.489929625113549e-09, | |
| "logits/chosen": 16.691282272338867, | |
| "logits/rejected": 17.671295166015625, | |
| "logps/chosen": -328.8042297363281, | |
| "logps/rejected": -267.9706726074219, | |
| "loss": 0.4266, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.37188720703125, | |
| "rewards/margins": 9.643902778625488, | |
| "rewards/rejected": -18.015790939331055, | |
| "sft_loss": 1.2559726238250732, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 2.7366863905325443, | |
| "grad_norm": 20.805457290074077, | |
| "learning_rate": 5.1625694326095506e-09, | |
| "logits/chosen": 16.405752182006836, | |
| "logits/rejected": 17.14948081970215, | |
| "logps/chosen": -341.1684875488281, | |
| "logps/rejected": -271.09710693359375, | |
| "loss": 0.3332, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.78724193572998, | |
| "rewards/margins": 9.57591724395752, | |
| "rewards/rejected": -18.363157272338867, | |
| "sft_loss": 1.0471839904785156, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 2.744904667981591, | |
| "grad_norm": 13.812771677348046, | |
| "learning_rate": 4.845065984411742e-09, | |
| "logits/chosen": 16.383556365966797, | |
| "logits/rejected": 17.95462989807129, | |
| "logps/chosen": -331.20526123046875, | |
| "logps/rejected": -279.220458984375, | |
| "loss": 0.357, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -9.316568374633789, | |
| "rewards/margins": 9.552423477172852, | |
| "rewards/rejected": -18.86899185180664, | |
| "sft_loss": 1.163619875907898, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 2.7531229454306376, | |
| "grad_norm": 14.688896292238876, | |
| "learning_rate": 4.5374453954851035e-09, | |
| "logits/chosen": 18.362672805786133, | |
| "logits/rejected": 19.01654815673828, | |
| "logps/chosen": -307.0843200683594, | |
| "logps/rejected": -246.02671813964844, | |
| "loss": 0.3677, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.525540351867676, | |
| "rewards/margins": 8.622610092163086, | |
| "rewards/rejected": -17.148151397705078, | |
| "sft_loss": 1.1697852611541748, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 2.7613412228796843, | |
| "grad_norm": 15.151111907515142, | |
| "learning_rate": 4.239732967919976e-09, | |
| "logits/chosen": 18.35997772216797, | |
| "logits/rejected": 18.519113540649414, | |
| "logps/chosen": -283.6457214355469, | |
| "logps/rejected": -239.9178466796875, | |
| "loss": 0.3946, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.615645408630371, | |
| "rewards/margins": 8.21639633178711, | |
| "rewards/rejected": -16.832042694091797, | |
| "sft_loss": 1.185640573501587, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 2.769559500328731, | |
| "grad_norm": 8.993747704826987, | |
| "learning_rate": 3.951953188850762e-09, | |
| "logits/chosen": 15.838356018066406, | |
| "logits/rejected": 17.58329963684082, | |
| "logps/chosen": -300.3641662597656, | |
| "logps/rejected": -254.8916473388672, | |
| "loss": 0.3834, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.536845207214355, | |
| "rewards/margins": 8.757308006286621, | |
| "rewards/rejected": -17.294153213500977, | |
| "sft_loss": 1.1382744312286377, | |
| "step": 1685 | |
| }, | |
| { | |
| "epoch": 2.7777777777777777, | |
| "grad_norm": 13.655366152597214, | |
| "learning_rate": 3.674129728442013e-09, | |
| "logits/chosen": 17.68130874633789, | |
| "logits/rejected": 19.080127716064453, | |
| "logps/chosen": -268.5239562988281, | |
| "logps/rejected": -229.50523376464844, | |
| "loss": 0.3877, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.217788696289062, | |
| "rewards/margins": 7.664586067199707, | |
| "rewards/rejected": -15.882373809814453, | |
| "sft_loss": 1.0555132627487183, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 2.7859960552268244, | |
| "grad_norm": 11.562075341982874, | |
| "learning_rate": 3.4062854379414694e-09, | |
| "logits/chosen": 17.3222599029541, | |
| "logits/rejected": 18.08160972595215, | |
| "logps/chosen": -306.2829284667969, | |
| "logps/rejected": -251.935546875, | |
| "loss": 0.3607, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.6878023147583, | |
| "rewards/margins": 8.51749038696289, | |
| "rewards/rejected": -17.205289840698242, | |
| "sft_loss": 1.1840558052062988, | |
| "step": 1695 | |
| }, | |
| { | |
| "epoch": 2.794214332675871, | |
| "grad_norm": 25.80729521542422, | |
| "learning_rate": 3.1484423478004563e-09, | |
| "logits/chosen": 17.99493408203125, | |
| "logits/rejected": 18.518619537353516, | |
| "logps/chosen": -289.563232421875, | |
| "logps/rejected": -243.09219360351562, | |
| "loss": 0.4297, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.428235054016113, | |
| "rewards/margins": 8.079200744628906, | |
| "rewards/rejected": -16.50743865966797, | |
| "sft_loss": 1.108068585395813, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 2.8024326101249177, | |
| "grad_norm": 13.892592196473423, | |
| "learning_rate": 2.9006216658619687e-09, | |
| "logits/chosen": 16.929012298583984, | |
| "logits/rejected": 17.672870635986328, | |
| "logps/chosen": -313.2660217285156, | |
| "logps/rejected": -257.04034423828125, | |
| "loss": 0.399, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.524652481079102, | |
| "rewards/margins": 9.001575469970703, | |
| "rewards/rejected": -17.526227951049805, | |
| "sft_loss": 1.1432716846466064, | |
| "step": 1705 | |
| }, | |
| { | |
| "epoch": 2.8106508875739644, | |
| "grad_norm": 9.790469642612795, | |
| "learning_rate": 2.6628437756162635e-09, | |
| "logits/chosen": 17.310102462768555, | |
| "logits/rejected": 18.04708480834961, | |
| "logps/chosen": -293.9396057128906, | |
| "logps/rejected": -240.7176971435547, | |
| "loss": 0.3473, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.278569221496582, | |
| "rewards/margins": 8.390737533569336, | |
| "rewards/rejected": -16.669307708740234, | |
| "sft_loss": 1.1700962781906128, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 2.818869165023011, | |
| "grad_norm": 9.444898259948333, | |
| "learning_rate": 2.435128234524228e-09, | |
| "logits/chosen": 17.586627960205078, | |
| "logits/rejected": 18.176280975341797, | |
| "logps/chosen": -299.7925109863281, | |
| "logps/rejected": -243.57485961914062, | |
| "loss": 0.4067, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -8.356633186340332, | |
| "rewards/margins": 8.317458152770996, | |
| "rewards/rejected": -16.674091339111328, | |
| "sft_loss": 1.1623938083648682, | |
| "step": 1715 | |
| }, | |
| { | |
| "epoch": 2.827087442472058, | |
| "grad_norm": 11.302990178309454, | |
| "learning_rate": 2.2174937724088877e-09, | |
| "logits/chosen": 17.02381134033203, | |
| "logits/rejected": 18.46286392211914, | |
| "logps/chosen": -314.8418273925781, | |
| "logps/rejected": -259.57745361328125, | |
| "loss": 0.4069, | |
| "rewards/accuracies": 0.9200000166893005, | |
| "rewards/chosen": -8.880843162536621, | |
| "rewards/margins": 8.848891258239746, | |
| "rewards/rejected": -17.729736328125, | |
| "sft_loss": 1.1079494953155518, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 2.8353057199211045, | |
| "grad_norm": 9.711126487613186, | |
| "learning_rate": 2.009958289914765e-09, | |
| "logits/chosen": 17.012800216674805, | |
| "logits/rejected": 18.349876403808594, | |
| "logps/chosen": -321.7917175292969, | |
| "logps/rejected": -270.1522216796875, | |
| "loss": 0.3451, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.694296836853027, | |
| "rewards/margins": 9.856916427612305, | |
| "rewards/rejected": -18.551212310791016, | |
| "sft_loss": 1.0486385822296143, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 2.843523997370151, | |
| "grad_norm": 22.398743525886992, | |
| "learning_rate": 1.8125388570355422e-09, | |
| "logits/chosen": 16.76806640625, | |
| "logits/rejected": 17.946535110473633, | |
| "logps/chosen": -312.1168212890625, | |
| "logps/rejected": -266.208984375, | |
| "loss": 0.3337, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.700928688049316, | |
| "rewards/margins": 9.464086532592773, | |
| "rewards/rejected": -18.165014266967773, | |
| "sft_loss": 1.162864327430725, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 2.851742274819198, | |
| "grad_norm": 14.549076580676688, | |
| "learning_rate": 1.6252517117101017e-09, | |
| "logits/chosen": 16.1746768951416, | |
| "logits/rejected": 17.028032302856445, | |
| "logps/chosen": -316.6230773925781, | |
| "logps/rejected": -258.9454345703125, | |
| "loss": 0.4137, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.804771423339844, | |
| "rewards/margins": 8.894736289978027, | |
| "rewards/rejected": -17.699508666992188, | |
| "sft_loss": 1.2625643014907837, | |
| "step": 1735 | |
| }, | |
| { | |
| "epoch": 2.8599605522682445, | |
| "grad_norm": 22.305121337267558, | |
| "learning_rate": 1.4481122584868582e-09, | |
| "logits/chosen": 16.654598236083984, | |
| "logits/rejected": 17.727828979492188, | |
| "logps/chosen": -327.3823547363281, | |
| "logps/rejected": -264.8335876464844, | |
| "loss": 0.4201, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.304695129394531, | |
| "rewards/margins": 8.695550918579102, | |
| "rewards/rejected": -18.000246047973633, | |
| "sft_loss": 1.1684330701828003, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 2.868178829717291, | |
| "grad_norm": 12.445565014042396, | |
| "learning_rate": 1.2811350672568138e-09, | |
| "logits/chosen": 16.678804397583008, | |
| "logits/rejected": 18.215984344482422, | |
| "logps/chosen": -340.2626953125, | |
| "logps/rejected": -277.87872314453125, | |
| "loss": 0.4267, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.802419662475586, | |
| "rewards/margins": 9.925808906555176, | |
| "rewards/rejected": -18.728229522705078, | |
| "sft_loss": 1.1311696767807007, | |
| "step": 1745 | |
| }, | |
| { | |
| "epoch": 2.876397107166338, | |
| "grad_norm": 16.390316522695066, | |
| "learning_rate": 1.1243338720550445e-09, | |
| "logits/chosen": 16.955345153808594, | |
| "logits/rejected": 18.02084732055664, | |
| "logps/chosen": -291.6322937011719, | |
| "logps/rejected": -249.6865234375, | |
| "loss": 0.4018, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.541548728942871, | |
| "rewards/margins": 8.891424179077148, | |
| "rewards/rejected": -17.432973861694336, | |
| "sft_loss": 1.122809648513794, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 2.8846153846153846, | |
| "grad_norm": 12.452008236969373, | |
| "learning_rate": 9.777215699311725e-10, | |
| "logits/chosen": 17.285600662231445, | |
| "logits/rejected": 18.065244674682617, | |
| "logps/chosen": -304.11834716796875, | |
| "logps/rejected": -252.4257049560547, | |
| "loss": 0.3855, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.604863166809082, | |
| "rewards/margins": 8.664654731750488, | |
| "rewards/rejected": -17.269519805908203, | |
| "sft_loss": 1.2225173711776733, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 2.8928336620644313, | |
| "grad_norm": 26.02343316648693, | |
| "learning_rate": 8.413102198885358e-10, | |
| "logits/chosen": 15.385034561157227, | |
| "logits/rejected": 16.86432456970215, | |
| "logps/chosen": -327.46160888671875, | |
| "logps/rejected": -264.8345031738281, | |
| "loss": 0.4478, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -9.194596290588379, | |
| "rewards/margins": 8.853148460388184, | |
| "rewards/rejected": -18.047740936279297, | |
| "sft_loss": 1.0643724203109741, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 2.901051939513478, | |
| "grad_norm": 9.970098814112205, | |
| "learning_rate": 7.151110418923134e-10, | |
| "logits/chosen": 18.434673309326172, | |
| "logits/rejected": 18.675090789794922, | |
| "logps/chosen": -302.91534423828125, | |
| "logps/rejected": -248.91583251953125, | |
| "loss": 0.3988, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -8.768433570861816, | |
| "rewards/margins": 8.766546249389648, | |
| "rewards/rejected": -17.53498077392578, | |
| "sft_loss": 1.1868294477462769, | |
| "step": 1765 | |
| }, | |
| { | |
| "epoch": 2.9092702169625246, | |
| "grad_norm": 18.45143826968204, | |
| "learning_rate": 5.991344159466672e-10, | |
| "logits/chosen": 16.24605941772461, | |
| "logits/rejected": 17.377365112304688, | |
| "logps/chosen": -318.8271789550781, | |
| "logps/rejected": -257.1405334472656, | |
| "loss": 0.352, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -8.925313949584961, | |
| "rewards/margins": 8.410311698913574, | |
| "rewards/rejected": -17.335628509521484, | |
| "sft_loss": 1.1228437423706055, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 2.9174884944115713, | |
| "grad_norm": 12.923252042791281, | |
| "learning_rate": 4.933898812409937e-10, | |
| "logits/chosen": 16.73847198486328, | |
| "logits/rejected": 17.230134963989258, | |
| "logps/chosen": -338.15118408203125, | |
| "logps/rejected": -271.0611267089844, | |
| "loss": 0.3936, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -9.159259796142578, | |
| "rewards/margins": 9.365001678466797, | |
| "rewards/rejected": -18.524259567260742, | |
| "sft_loss": 1.1974759101867676, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 2.925706771860618, | |
| "grad_norm": 18.59092085629164, | |
| "learning_rate": 3.978861353653301e-10, | |
| "logits/chosen": 17.0466251373291, | |
| "logits/rejected": 17.81385612487793, | |
| "logps/chosen": -301.74603271484375, | |
| "logps/rejected": -247.91571044921875, | |
| "loss": 0.4187, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -8.685689926147461, | |
| "rewards/margins": 8.343822479248047, | |
| "rewards/rejected": -17.02951431274414, | |
| "sft_loss": 1.086068034172058, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 2.9339250493096647, | |
| "grad_norm": 16.48047435187235, | |
| "learning_rate": 3.1263103359494005e-10, | |
| "logits/chosen": 17.160581588745117, | |
| "logits/rejected": 18.65143585205078, | |
| "logps/chosen": -300.696533203125, | |
| "logps/rejected": -245.1064453125, | |
| "loss": 0.3632, | |
| "rewards/accuracies": 0.9399999976158142, | |
| "rewards/chosen": -7.98746395111084, | |
| "rewards/margins": 8.9141206741333, | |
| "rewards/rejected": -16.90158462524414, | |
| "sft_loss": 1.4039214849472046, | |
| "step": 1785 | |
| }, | |
| { | |
| "epoch": 2.9421433267587114, | |
| "grad_norm": 14.541496183664128, | |
| "learning_rate": 2.3763158824419147e-10, | |
| "logits/chosen": 16.89483642578125, | |
| "logits/rejected": 17.82222557067871, | |
| "logps/chosen": -328.6429748535156, | |
| "logps/rejected": -268.05938720703125, | |
| "loss": 0.3455, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": -8.467479705810547, | |
| "rewards/margins": 9.740607261657715, | |
| "rewards/rejected": -18.208087921142578, | |
| "sft_loss": 1.1161048412322998, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 2.950361604207758, | |
| "grad_norm": 14.008459290888235, | |
| "learning_rate": 1.728939680898517e-10, | |
| "logits/chosen": 16.10931396484375, | |
| "logits/rejected": 17.673229217529297, | |
| "logps/chosen": -308.1393737792969, | |
| "logps/rejected": -262.28009033203125, | |
| "loss": 0.367, | |
| "rewards/accuracies": 0.9900000095367432, | |
| "rewards/chosen": -9.119178771972656, | |
| "rewards/margins": 8.93433666229248, | |
| "rewards/rejected": -18.05351448059082, | |
| "sft_loss": 1.1963419914245605, | |
| "step": 1795 | |
| }, | |
| { | |
| "epoch": 2.9585798816568047, | |
| "grad_norm": 16.666553214725845, | |
| "learning_rate": 1.184234978636456e-10, | |
| "logits/chosen": 16.49167823791504, | |
| "logits/rejected": 17.332914352416992, | |
| "logps/chosen": -282.5769348144531, | |
| "logps/rejected": -248.25242614746094, | |
| "loss": 0.3921, | |
| "rewards/accuracies": 0.9599999785423279, | |
| "rewards/chosen": -8.752376556396484, | |
| "rewards/margins": 8.20280933380127, | |
| "rewards/rejected": -16.955184936523438, | |
| "sft_loss": 1.2729109525680542, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 2.9667981591058514, | |
| "grad_norm": 15.626709598718353, | |
| "learning_rate": 7.422465781431464e-11, | |
| "logits/chosen": 16.95427894592285, | |
| "logits/rejected": 17.818552017211914, | |
| "logps/chosen": -329.6918640136719, | |
| "logps/rejected": -266.5020446777344, | |
| "loss": 0.3843, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.854848861694336, | |
| "rewards/margins": 9.140162467956543, | |
| "rewards/rejected": -17.995010375976562, | |
| "sft_loss": 1.1496516466140747, | |
| "step": 1805 | |
| }, | |
| { | |
| "epoch": 2.975016436554898, | |
| "grad_norm": 12.600231440275685, | |
| "learning_rate": 4.030108333910598e-11, | |
| "logits/chosen": 17.70891571044922, | |
| "logits/rejected": 18.366714477539062, | |
| "logps/chosen": -295.2488708496094, | |
| "logps/rejected": -242.97634887695312, | |
| "loss": 0.3819, | |
| "rewards/accuracies": 0.9700000286102295, | |
| "rewards/chosen": -8.695423126220703, | |
| "rewards/margins": 8.07010269165039, | |
| "rewards/rejected": -16.765525817871094, | |
| "sft_loss": 1.1890416145324707, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 2.983234714003945, | |
| "grad_norm": 14.846673339349834, | |
| "learning_rate": 1.6655564684747713e-11, | |
| "logits/chosen": 17.073108673095703, | |
| "logits/rejected": 17.751785278320312, | |
| "logps/chosen": -334.0798034667969, | |
| "logps/rejected": -261.91644287109375, | |
| "loss": 0.372, | |
| "rewards/accuracies": 0.949999988079071, | |
| "rewards/chosen": -9.156329154968262, | |
| "rewards/margins": 8.551715850830078, | |
| "rewards/rejected": -17.708045959472656, | |
| "sft_loss": 1.1268292665481567, | |
| "step": 1815 | |
| }, | |
| { | |
| "epoch": 2.9914529914529915, | |
| "grad_norm": 9.077728581968316, | |
| "learning_rate": 3.290046717979722e-12, | |
| "logits/chosen": 16.00580406188965, | |
| "logits/rejected": 16.491676330566406, | |
| "logps/chosen": -320.336181640625, | |
| "logps/rejected": -255.6234588623047, | |
| "loss": 0.3871, | |
| "rewards/accuracies": 0.9800000190734863, | |
| "rewards/chosen": -7.969948768615723, | |
| "rewards/margins": 9.396775245666504, | |
| "rewards/rejected": -17.366724014282227, | |
| "sft_loss": 1.16538667678833, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 2.998027613412229, | |
| "step": 1824, | |
| "total_flos": 287426369617920.0, | |
| "train_loss": 0.5032803327368017, | |
| "train_runtime": 76434.0426, | |
| "train_samples_per_second": 1.433, | |
| "train_steps_per_second": 0.024 | |
| } | |
| ], | |
| "logging_steps": 5, | |
| "max_steps": 1824, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 287426369617920.0, | |
| "train_batch_size": 4, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |