gpt2-got-therapy / training_metadata.json
Tanaybh's picture
Add training metadata
e31d350 verified
{
"model_name": "GPT-2 RLHF",
"model_type": "RLHF-trained GPT-2",
"training_pipeline": [
"Stage 1: Supervised Fine-Tuning (SFT)",
"Stage 2: Reward Model Training",
"Stage 3: PPO Optimization"
],
"dataset": "Anthropic/hh-rlhf",
"base_model": "gpt2",
"parameters": "124M",
"training_date": "2025-09-29T20:36:42.118760",
"methodology": "3-stage RLHF pipeline (same as ChatGPT)",
"hyperparameters": {
"sft_lr": "5e-5",
"sft_epochs": 3,
"reward_lr": "1e-5",
"reward_epochs": 3,
"ppo_lr": "1e-5",
"ppo_episodes": 10,
"kl_coef": 0.1,
"clip_range": 0.2
}
}