File size: 614 Bytes
e31d350 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
{
"model_name": "GPT-2 RLHF",
"model_type": "RLHF-trained GPT-2",
"training_pipeline": [
"Stage 1: Supervised Fine-Tuning (SFT)",
"Stage 2: Reward Model Training",
"Stage 3: PPO Optimization"
],
"dataset": "Anthropic/hh-rlhf",
"base_model": "gpt2",
"parameters": "124M",
"training_date": "2025-09-29T20:36:42.118760",
"methodology": "3-stage RLHF pipeline (same as ChatGPT)",
"hyperparameters": {
"sft_lr": "5e-5",
"sft_epochs": 3,
"reward_lr": "1e-5",
"reward_epochs": 3,
"ppo_lr": "1e-5",
"ppo_episodes": 10,
"kl_coef": 0.1,
"clip_range": 0.2
}
} |