{
    "colocated_worker_min_available_gpu_memory_mb": 10240,
    "directory": "experiments",
    "episode_generator": {
        "dataset_num_samples_per_iteration": 128,
        "dataset_sample_with_replacement": true,
        "dataset_shuffle_on_each_iteration": true,
        "fill_missing_episodes": false,
        "force_tokenization_from_inference": true,
        "generation_per_sample": 8,
        "leave_one_out": false,
        "max_query_length": 1096,
        "messages_column_name": "problem",
        "model_context_size": 8192,
        "normalize_by_group_std": true,
        "prompt_template": "{}\n\nPlease reason step by step, and put your final answer within \\boxed{{}}",
        "response_prefill": "<think>\n",
        "system_message": "",
        "task": {
            "dataset_dict_path": null,
            "hf_dataset_args": [
                "agentica-org/DeepScaleR-Preview-Dataset"
            ],
            "load_dataset_dict": false,
            "type": "deepscaler"
        },
        "tokenization_check_full_reconstruction": false,
        "trajectory_inference_strategy": {
            "auto_compute_max_new_tokens": true,
            "delethink_iteration": 15,
            "k": 8,
            "keep_head": 100,
            "keep_tail": 3996,
            "request_kwargs": {
                "logprob_start_len": 0,
                "return_logprob": true
            },
            "sampling_params": {
                "max_new_tokens": 8191,
                "temperature": 0.6
            },
            "type": "delethink_sampling"
        },
        "type": "delethink_grpo_episode_generator",
        "use_chat_template": false,
        "zero_out_negative_advantage_episodes": false
    },
    "episodes_cloud_log_steps": 10,
    "evaluation_inference_worker": {
        "options": {
            "enable_cache_report": true,
            "enable_metrics": true,
            "log_requests": true,
            "mem_fraction_static": 0.65,
            "tp": 1
        },
        "type": "sglang"
    },
    "evaluation_pipelines": [
        {
            "analyzers": [
                {
                    "type": "task_performance"
                }
            ],
            "dataset_portion": 1,
            "dataset_split": "test",
            "inference_name": "aime_24",
            "inference_strategy": {
                "auto_compute_max_new_tokens": true,
                "decode_response_text_from_token_ids": true,
                "delethink_iteration": 15,
                "enable_cache": true,
                "k": 16,
                "keep_head": 100,
                "keep_tail": 3996,
                "max_concurrent_requests": 2400,
                "model_context_size": 8192,
                "request_kwargs": {
                    "logprob_start_len": 0,
                    "return_logprob": true
                },
                "sampling_params": {
                    "max_new_tokens": 8191,
                    "temperature": 0.6
                },
                "type": "delethink_sampling"
            },
            "messages_column_name": "problem",
            "prompt_template": "{}\n\nPlease reason step by step, and put your final answer within \\boxed{{}}",
            "response_prefill": "<think>\n",
            "seed": 42,
            "system_message": "",
            "task": {
                "dataset_dict_path": null,
                "hf_dataset_args": [
                    "realtreetune/aime24"
                ],
                "load_dataset_dict": false,
                "type": "math"
            },
            "use_chat_template": false
        }
    ],
    "exp_name": "delethinkIter_R1DistillQwen1.5B_DEEPSCALER_DELETHINK15X8KHUGEBATCH_20250818DELETHINK15X8KHUGEBATCH_seed_2746318213",
    "global_vars": {
        "debug_mode": false,
        "dirs": {
            "data": "data",
            "experiments": "experiments"
        },
        "seed": 2746318213
    },
    "inference_worker": {
        "collocated_options": {
            "chunked_prefill_size": 16382,
            "context_length": 8192,
            "enable_cache_report": true,
            "enable_memory_saver": true,
            "enable_metrics": true,
            "max_running_requests": 10000,
            "mem_fraction_static": 0.6,
            "schedule_conservativeness": 0.3,
            "schedule_policy": "fcfs",
            "tp": 1
        },
        "options": {
            "chunked_prefill_size": 16382,
            "context_length": 8192,
            "enable_cache_report": true,
            "enable_metrics": true,
            "max_running_requests": 10000,
            "mem_fraction_static": 0.6,
            "schedule_conservativeness": 0.3,
            "schedule_policy": "fcfs",
            "tp": 1
        },
        "type": "sglang"
    },
    "log_some_example": false,
    "mid_training_evaluation_pipelines": [],
    "mid_training_evaluation_steps": 5,
    "num_iterations": 500,
    "persistent_colocated_inference_workers": true,
    "tokenizer": {
        "hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B",
        "type": "pretrained"
    },
    "trainer": {
        "actor_deepspeed_config": {
            "bf16": {
                "enabled": "auto"
            },
            "gradient_accumulation_steps": "auto",
            "gradient_clipping": "auto",
            "optimizer": {
                "params": {
                    "betas": "auto",
                    "eps": "auto",
                    "lr": "auto",
                    "weight_decay": "auto"
                },
                "type": "AdamW"
            },
            "scheduler": null,
            "train_batch_size": "auto",
            "train_micro_batch_size_per_gpu": "auto",
            "zero_allow_untested_optimizer": true,
            "zero_optimization": {
                "allgather_bucket_size": 500000000,
                "allgather_partitions": true,
                "contiguous_gradients": true,
                "overlap_comm": false,
                "reduce_bucket_size": "auto",
                "reduce_scatter": true,
                "stage": 2
            }
        },
        "actor_model": {
            "disable_dropout": true,
            "hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B",
            "pretrained_args": {
                "attn_implementation": "flash_attention_2"
            },
            "type": "pretrained_causal_lm"
        },
        "critic_deepspeed_config": null,
        "critic_model": null,
        "general_training_args": {
            "bf16": true,
            "checkpoint_keep_steps": 5,
            "dataloader_num_workers": 0,
            "dataloader_pin_memory": false,
            "gradient_accumulation_steps": null,
            "gradient_checkpointing": true,
            "learning_rate": 1e-06,
            "logging_steps": 1,
            "max_grad_norm": 1,
            "per_device_train_batch_size": 4,
            "save_steps": 1,
            "seed": 2746318213,
            "target_train_batch_size": 256,
            "warmup_ratio": 0,
            "weight_decay": 0
        },
        "move_reference_model_to_cpu": true,
        "num_epochs_per_iteration": 1,
        "params": {
            "adap_kl_ctrl": false,
            "cliprange": 0.2,
            "cliprange_value": 0.2,
            "gamma": 1,
            "init_kl_coef": 0.001,
            "kl_penalty_loss_clip_max": 10,
            "kl_penalty_loss_clip_min": 0,
            "kl_penalty_loss_reduction": "seq_mean_then_mean",
            "kl_penalty_loss_type": "control_variate",
            "lam": 0.96,
            "max_response_length": 8191,
            "policy_loss_reduction": "seq_mean_then_mean",
            "temperature": 0.6,
            "use_score_norm": false,
            "use_score_scaling": false,
            "whiten_advantages": false,
            "whiten_rewards": false
        },
        "reference_deepspeed_config": {
            "bf16": {
                "enabled": true
            },
            "gradient_accumulation_steps": "auto",
            "prescale_gradients": false,
            "train_batch_size": "auto",
            "train_micro_batch_size_per_gpu": "auto",
            "wall_clock_breakdown": false
        },
        "reference_model": {
            "hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B",
            "pretrained_args": {
                "attn_implementation": "flash_attention_2"
            },
            "type": "pretrained_causal_lm"
        },
        "report_entropy": true,
        "save_hf_critic_checkpoint": false,
        "save_temp_actor_hf_weights": false,
        "temp_checkpoint_dir": "/tmp/delethink_tmp_ckpts",
        "type": "ppo",
        "use_full_precision_logits": false
    },
    "type": "multi_node_policy_iteration"
}