{ "colocated_worker_min_available_gpu_memory_mb": 10240, "directory": "experiments", "episode_generator": { "dataset_num_samples_per_iteration": 128, "dataset_sample_with_replacement": true, "dataset_shuffle_on_each_iteration": true, "fill_missing_episodes": false, "force_tokenization_from_inference": true, "generation_per_sample": 8, "leave_one_out": false, "max_query_length": 1096, "messages_column_name": "problem", "model_context_size": 8192, "normalize_by_group_std": true, "prompt_template": "{}\n\nPlease reason step by step, and put your final answer within \\boxed{{}}", "response_prefill": "\n", "system_message": "", "task": { "dataset_dict_path": null, "hf_dataset_args": [ "agentica-org/DeepScaleR-Preview-Dataset" ], "load_dataset_dict": false, "type": "deepscaler" }, "tokenization_check_full_reconstruction": false, "trajectory_inference_strategy": { "auto_compute_max_new_tokens": true, "delethink_iteration": 15, "k": 8, "keep_head": 100, "keep_tail": 3996, "request_kwargs": { "logprob_start_len": 0, "return_logprob": true }, "sampling_params": { "max_new_tokens": 8191, "temperature": 0.6 }, "type": "delethink_sampling" }, "type": "delethink_grpo_episode_generator", "use_chat_template": false, "zero_out_negative_advantage_episodes": false }, "episodes_cloud_log_steps": 10, "evaluation_inference_worker": { "options": { "enable_cache_report": true, "enable_metrics": true, "log_requests": true, "mem_fraction_static": 0.65, "tp": 1 }, "type": "sglang" }, "evaluation_pipelines": [ { "analyzers": [ { "type": "task_performance" } ], "dataset_portion": 1, "dataset_split": "test", "inference_name": "aime_24", "inference_strategy": { "auto_compute_max_new_tokens": true, "decode_response_text_from_token_ids": true, "delethink_iteration": 15, "enable_cache": true, "k": 16, "keep_head": 100, "keep_tail": 3996, "max_concurrent_requests": 2400, "model_context_size": 8192, "request_kwargs": { "logprob_start_len": 0, "return_logprob": true }, "sampling_params": { "max_new_tokens": 8191, "temperature": 0.6 }, "type": "delethink_sampling" }, "messages_column_name": "problem", "prompt_template": "{}\n\nPlease reason step by step, and put your final answer within \\boxed{{}}", "response_prefill": "\n", "seed": 42, "system_message": "", "task": { "dataset_dict_path": null, "hf_dataset_args": [ "realtreetune/aime24" ], "load_dataset_dict": false, "type": "math" }, "use_chat_template": false } ], "exp_name": "delethinkIter_R1DistillQwen1.5B_DEEPSCALER_DELETHINK15X8KHUGEBATCH_20250818DELETHINK15X8KHUGEBATCH_seed_2746318213", "global_vars": { "debug_mode": false, "dirs": { "data": "data", "experiments": "experiments" }, "seed": 2746318213 }, "inference_worker": { "collocated_options": { "chunked_prefill_size": 16382, "context_length": 8192, "enable_cache_report": true, "enable_memory_saver": true, "enable_metrics": true, "max_running_requests": 10000, "mem_fraction_static": 0.6, "schedule_conservativeness": 0.3, "schedule_policy": "fcfs", "tp": 1 }, "options": { "chunked_prefill_size": 16382, "context_length": 8192, "enable_cache_report": true, "enable_metrics": true, "max_running_requests": 10000, "mem_fraction_static": 0.6, "schedule_conservativeness": 0.3, "schedule_policy": "fcfs", "tp": 1 }, "type": "sglang" }, "log_some_example": false, "mid_training_evaluation_pipelines": [], "mid_training_evaluation_steps": 5, "num_iterations": 500, "persistent_colocated_inference_workers": true, "tokenizer": { "hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B", "type": "pretrained" }, "trainer": { "actor_deepspeed_config": { "bf16": { "enabled": "auto" }, "gradient_accumulation_steps": "auto", "gradient_clipping": "auto", "optimizer": { "params": { "betas": "auto", "eps": "auto", "lr": "auto", "weight_decay": "auto" }, "type": "AdamW" }, "scheduler": null, "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "zero_allow_untested_optimizer": true, "zero_optimization": { "allgather_bucket_size": 500000000, "allgather_partitions": true, "contiguous_gradients": true, "overlap_comm": false, "reduce_bucket_size": "auto", "reduce_scatter": true, "stage": 2 } }, "actor_model": { "disable_dropout": true, "hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B", "pretrained_args": { "attn_implementation": "flash_attention_2" }, "type": "pretrained_causal_lm" }, "critic_deepspeed_config": null, "critic_model": null, "general_training_args": { "bf16": true, "checkpoint_keep_steps": 5, "dataloader_num_workers": 0, "dataloader_pin_memory": false, "gradient_accumulation_steps": null, "gradient_checkpointing": true, "learning_rate": 1e-06, "logging_steps": 1, "max_grad_norm": 1, "per_device_train_batch_size": 4, "save_steps": 1, "seed": 2746318213, "target_train_batch_size": 256, "warmup_ratio": 0, "weight_decay": 0 }, "move_reference_model_to_cpu": true, "num_epochs_per_iteration": 1, "params": { "adap_kl_ctrl": false, "cliprange": 0.2, "cliprange_value": 0.2, "gamma": 1, "init_kl_coef": 0.001, "kl_penalty_loss_clip_max": 10, "kl_penalty_loss_clip_min": 0, "kl_penalty_loss_reduction": "seq_mean_then_mean", "kl_penalty_loss_type": "control_variate", "lam": 0.96, "max_response_length": 8191, "policy_loss_reduction": "seq_mean_then_mean", "temperature": 0.6, "use_score_norm": false, "use_score_scaling": false, "whiten_advantages": false, "whiten_rewards": false }, "reference_deepspeed_config": { "bf16": { "enabled": true }, "gradient_accumulation_steps": "auto", "prescale_gradients": false, "train_batch_size": "auto", "train_micro_batch_size_per_gpu": "auto", "wall_clock_breakdown": false }, "reference_model": { "hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B", "pretrained_args": { "attn_implementation": "flash_attention_2" }, "type": "pretrained_causal_lm" }, "report_entropy": true, "save_hf_critic_checkpoint": false, "save_temp_actor_hf_weights": false, "temp_checkpoint_dir": "/tmp/delethink_tmp_ckpts", "type": "ppo", "use_full_precision_logits": false }, "type": "multi_node_policy_iteration" }