| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.0, | |
| "eval_steps": 500, | |
| "global_step": 876, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.7045454545454546e-05, | |
| "logits/chosen": -2.9543049335479736, | |
| "logits/rejected": -4.587946891784668, | |
| "logps/chosen": -4854.5478515625, | |
| "logps/rejected": -32.31528854370117, | |
| "loss": 0.3787, | |
| "rewards/accuracies": 0.8017241358757019, | |
| "rewards/chosen": 4.192387104034424, | |
| "rewards/margins": 4.192722797393799, | |
| "rewards/rejected": -0.0003354697546456009, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 2.9238578680203047e-05, | |
| "logits/chosen": -3.0135436058044434, | |
| "logits/rejected": -4.595321178436279, | |
| "logps/chosen": -5432.6845703125, | |
| "logps/rejected": -32.69685363769531, | |
| "loss": 0.0201, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 29.855911254882812, | |
| "rewards/margins": 29.8657283782959, | |
| "rewards/rejected": -0.009818021208047867, | |
| "step": 116 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 2.7030456852791878e-05, | |
| "logits/chosen": -2.929417133331299, | |
| "logits/rejected": -4.776305198669434, | |
| "logps/chosen": -4491.6943359375, | |
| "logps/rejected": -32.90016174316406, | |
| "loss": 0.0064, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 42.8309440612793, | |
| "rewards/margins": 42.856590270996094, | |
| "rewards/rejected": -0.02564803883433342, | |
| "step": 174 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 2.4822335025380712e-05, | |
| "logits/chosen": -2.8669936656951904, | |
| "logits/rejected": -4.86910343170166, | |
| "logps/chosen": -4899.3330078125, | |
| "logps/rejected": -32.84577560424805, | |
| "loss": 0.0013, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 56.820987701416016, | |
| "rewards/margins": 56.85633850097656, | |
| "rewards/rejected": -0.035354480147361755, | |
| "step": 232 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.2614213197969543e-05, | |
| "logits/chosen": -3.1831815242767334, | |
| "logits/rejected": -4.907491207122803, | |
| "logps/chosen": -5208.95263671875, | |
| "logps/rejected": -32.94639587402344, | |
| "loss": 0.006, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 65.99440002441406, | |
| "rewards/margins": 66.03594970703125, | |
| "rewards/rejected": -0.04156281799077988, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 2.0406091370558378e-05, | |
| "logits/chosen": -2.9403574466705322, | |
| "logits/rejected": -4.912071704864502, | |
| "logps/chosen": -4351.31201171875, | |
| "logps/rejected": -33.200740814208984, | |
| "loss": 0.0012, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 59.315834045410156, | |
| "rewards/margins": 59.37520217895508, | |
| "rewards/rejected": -0.05936765670776367, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.819796954314721e-05, | |
| "logits/chosen": -2.886042594909668, | |
| "logits/rejected": -4.924810409545898, | |
| "logps/chosen": -4893.87060546875, | |
| "logps/rejected": -33.106529235839844, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 72.36699676513672, | |
| "rewards/margins": 72.43253326416016, | |
| "rewards/rejected": -0.06554649025201797, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 1.5989847715736043e-05, | |
| "logits/chosen": -2.999485492706299, | |
| "logits/rejected": -4.834668159484863, | |
| "logps/chosen": -5137.2041015625, | |
| "logps/rejected": -33.30693817138672, | |
| "loss": 0.001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 78.8132553100586, | |
| "rewards/margins": 78.88518524169922, | |
| "rewards/rejected": -0.07193376123905182, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 1.3781725888324872e-05, | |
| "logits/chosen": -2.787987470626831, | |
| "logits/rejected": -4.926151275634766, | |
| "logps/chosen": -4610.828125, | |
| "logps/rejected": -33.529579162597656, | |
| "loss": 0.004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 73.21186065673828, | |
| "rewards/margins": 73.2912826538086, | |
| "rewards/rejected": -0.0794229581952095, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "learning_rate": 1.1573604060913705e-05, | |
| "logits/chosen": -2.958709955215454, | |
| "logits/rejected": -4.893362045288086, | |
| "logps/chosen": -4559.85302734375, | |
| "logps/rejected": -33.20284652709961, | |
| "loss": 0.0066, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 76.01142883300781, | |
| "rewards/margins": 76.10553741455078, | |
| "rewards/rejected": -0.09409420937299728, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "learning_rate": 9.365482233502538e-06, | |
| "logits/chosen": -2.9004671573638916, | |
| "logits/rejected": -4.998195648193359, | |
| "logps/chosen": -4775.02197265625, | |
| "logps/rejected": -33.74284744262695, | |
| "loss": 0.0036, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 79.64250946044922, | |
| "rewards/margins": 79.75552368164062, | |
| "rewards/rejected": -0.11301343142986298, | |
| "step": 638 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 7.15736040609137e-06, | |
| "logits/chosen": -2.8723161220550537, | |
| "logits/rejected": -4.972283363342285, | |
| "logps/chosen": -4627.41357421875, | |
| "logps/rejected": -33.839881896972656, | |
| "loss": 0.0054, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 80.74669647216797, | |
| "rewards/margins": 80.86132049560547, | |
| "rewards/rejected": -0.11461903154850006, | |
| "step": 696 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 4.949238578680203e-06, | |
| "logits/chosen": -2.920714855194092, | |
| "logits/rejected": -4.981288433074951, | |
| "logps/chosen": -4183.37890625, | |
| "logps/rejected": -33.559993743896484, | |
| "loss": 0.0004, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 75.54529571533203, | |
| "rewards/margins": 75.672607421875, | |
| "rewards/rejected": -0.127317875623703, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 2.7411167512690357e-06, | |
| "logits/chosen": -3.071594715118408, | |
| "logits/rejected": -5.01361608505249, | |
| "logps/chosen": -4776.15234375, | |
| "logps/rejected": -34.06298065185547, | |
| "loss": 0.0003, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 84.52633666992188, | |
| "rewards/margins": 84.65997314453125, | |
| "rewards/rejected": -0.13363485038280487, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "learning_rate": 5.329949238578681e-07, | |
| "logits/chosen": -2.9292426109313965, | |
| "logits/rejected": -4.913326740264893, | |
| "logps/chosen": -4833.65625, | |
| "logps/rejected": -33.5969123840332, | |
| "loss": 0.0001, | |
| "rewards/accuracies": 1.0, | |
| "rewards/chosen": 87.57024383544922, | |
| "rewards/margins": 87.6995620727539, | |
| "rewards/rejected": -0.12932546436786652, | |
| "step": 870 | |
| } | |
| ], | |
| "logging_steps": 58, | |
| "max_steps": 876, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |