parallel-decoder-transformer / train_run_stages.json
loganrobbins's picture
Publish PDT adapters + arXiv model card
4600161 verified
[
{
"stage_index": 2,
"stage_name": "notes_bus_enable_extended",
"start_step": 22500,
"timestamp": "2025-12-07T11:50:45.750997+00:00",
"actions": {
"bus_mix_prob": 0.75,
"freeze": [
"trunk",
"agreement_head",
"coverage_head"
],
"unfreeze": [
"speculation_head"
]
},
"end_step": 25000,
"steps": 2500,
"duration": 951.741001367569,
"completed_at": "2025-12-07T12:06:37.492038+00:00"
},
{
"stage_index": 3,
"stage_name": "rollback_training_extended",
"start_step": 25000,
"timestamp": "2025-12-07T12:06:37.492038+00:00",
"actions": {
"bus_mix_prob": 0.35,
"stream_dropout_prob": 0.15,
"freeze": [
"trunk"
],
"unfreeze": [
"agreement_head",
"coverage_head"
]
},
"end_step": 50000,
"steps": 25000,
"duration": 10849.983159542084,
"completed_at": "2025-12-07T15:07:27.475214+00:00"
}
]