delethinkIter_R1DistillQwen1.5B_DEEPSCALER_DELETHINK15X8KHUGEBATCH_20250818DELETHIN
/
training_config.json
| { | |
| "colocated_worker_min_available_gpu_memory_mb": 10240, | |
| "directory": "experiments", | |
| "episode_generator": { | |
| "dataset_num_samples_per_iteration": 128, | |
| "dataset_sample_with_replacement": true, | |
| "dataset_shuffle_on_each_iteration": true, | |
| "fill_missing_episodes": false, | |
| "force_tokenization_from_inference": true, | |
| "generation_per_sample": 8, | |
| "leave_one_out": false, | |
| "max_query_length": 1096, | |
| "messages_column_name": "problem", | |
| "model_context_size": 8192, | |
| "normalize_by_group_std": true, | |
| "prompt_template": "{}\n\nPlease reason step by step, and put your final answer within \\boxed{{}}", | |
| "response_prefill": "<think>\n", | |
| "system_message": "", | |
| "task": { | |
| "dataset_dict_path": null, | |
| "hf_dataset_args": [ | |
| "agentica-org/DeepScaleR-Preview-Dataset" | |
| ], | |
| "load_dataset_dict": false, | |
| "type": "deepscaler" | |
| }, | |
| "tokenization_check_full_reconstruction": false, | |
| "trajectory_inference_strategy": { | |
| "auto_compute_max_new_tokens": true, | |
| "delethink_iteration": 15, | |
| "k": 8, | |
| "keep_head": 100, | |
| "keep_tail": 3996, | |
| "request_kwargs": { | |
| "logprob_start_len": 0, | |
| "return_logprob": true | |
| }, | |
| "sampling_params": { | |
| "max_new_tokens": 8191, | |
| "temperature": 0.6 | |
| }, | |
| "type": "delethink_sampling" | |
| }, | |
| "type": "delethink_grpo_episode_generator", | |
| "use_chat_template": false, | |
| "zero_out_negative_advantage_episodes": false | |
| }, | |
| "episodes_cloud_log_steps": 10, | |
| "evaluation_inference_worker": { | |
| "options": { | |
| "enable_cache_report": true, | |
| "enable_metrics": true, | |
| "log_requests": true, | |
| "mem_fraction_static": 0.65, | |
| "tp": 1 | |
| }, | |
| "type": "sglang" | |
| }, | |
| "evaluation_pipelines": [ | |
| { | |
| "analyzers": [ | |
| { | |
| "type": "task_performance" | |
| } | |
| ], | |
| "dataset_portion": 1, | |
| "dataset_split": "test", | |
| "inference_name": "aime_24", | |
| "inference_strategy": { | |
| "auto_compute_max_new_tokens": true, | |
| "decode_response_text_from_token_ids": true, | |
| "delethink_iteration": 15, | |
| "enable_cache": true, | |
| "k": 16, | |
| "keep_head": 100, | |
| "keep_tail": 3996, | |
| "max_concurrent_requests": 2400, | |
| "model_context_size": 8192, | |
| "request_kwargs": { | |
| "logprob_start_len": 0, | |
| "return_logprob": true | |
| }, | |
| "sampling_params": { | |
| "max_new_tokens": 8191, | |
| "temperature": 0.6 | |
| }, | |
| "type": "delethink_sampling" | |
| }, | |
| "messages_column_name": "problem", | |
| "prompt_template": "{}\n\nPlease reason step by step, and put your final answer within \\boxed{{}}", | |
| "response_prefill": "<think>\n", | |
| "seed": 42, | |
| "system_message": "", | |
| "task": { | |
| "dataset_dict_path": null, | |
| "hf_dataset_args": [ | |
| "realtreetune/aime24" | |
| ], | |
| "load_dataset_dict": false, | |
| "type": "math" | |
| }, | |
| "use_chat_template": false | |
| } | |
| ], | |
| "exp_name": "delethinkIter_R1DistillQwen1.5B_DEEPSCALER_DELETHINK15X8KHUGEBATCH_20250818DELETHINK15X8KHUGEBATCH_seed_2746318213", | |
| "global_vars": { | |
| "debug_mode": false, | |
| "dirs": { | |
| "data": "data", | |
| "experiments": "experiments" | |
| }, | |
| "seed": 2746318213 | |
| }, | |
| "inference_worker": { | |
| "collocated_options": { | |
| "chunked_prefill_size": 16382, | |
| "context_length": 8192, | |
| "enable_cache_report": true, | |
| "enable_memory_saver": true, | |
| "enable_metrics": true, | |
| "max_running_requests": 10000, | |
| "mem_fraction_static": 0.6, | |
| "schedule_conservativeness": 0.3, | |
| "schedule_policy": "fcfs", | |
| "tp": 1 | |
| }, | |
| "options": { | |
| "chunked_prefill_size": 16382, | |
| "context_length": 8192, | |
| "enable_cache_report": true, | |
| "enable_metrics": true, | |
| "max_running_requests": 10000, | |
| "mem_fraction_static": 0.6, | |
| "schedule_conservativeness": 0.3, | |
| "schedule_policy": "fcfs", | |
| "tp": 1 | |
| }, | |
| "type": "sglang" | |
| }, | |
| "log_some_example": false, | |
| "mid_training_evaluation_pipelines": [], | |
| "mid_training_evaluation_steps": 5, | |
| "num_iterations": 500, | |
| "persistent_colocated_inference_workers": true, | |
| "tokenizer": { | |
| "hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B", | |
| "type": "pretrained" | |
| }, | |
| "trainer": { | |
| "actor_deepspeed_config": { | |
| "bf16": { | |
| "enabled": "auto" | |
| }, | |
| "gradient_accumulation_steps": "auto", | |
| "gradient_clipping": "auto", | |
| "optimizer": { | |
| "params": { | |
| "betas": "auto", | |
| "eps": "auto", | |
| "lr": "auto", | |
| "weight_decay": "auto" | |
| }, | |
| "type": "AdamW" | |
| }, | |
| "scheduler": null, | |
| "train_batch_size": "auto", | |
| "train_micro_batch_size_per_gpu": "auto", | |
| "zero_allow_untested_optimizer": true, | |
| "zero_optimization": { | |
| "allgather_bucket_size": 500000000, | |
| "allgather_partitions": true, | |
| "contiguous_gradients": true, | |
| "overlap_comm": false, | |
| "reduce_bucket_size": "auto", | |
| "reduce_scatter": true, | |
| "stage": 2 | |
| } | |
| }, | |
| "actor_model": { | |
| "disable_dropout": true, | |
| "hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B", | |
| "pretrained_args": { | |
| "attn_implementation": "flash_attention_2" | |
| }, | |
| "type": "pretrained_causal_lm" | |
| }, | |
| "critic_deepspeed_config": null, | |
| "critic_model": null, | |
| "general_training_args": { | |
| "bf16": true, | |
| "checkpoint_keep_steps": 5, | |
| "dataloader_num_workers": 0, | |
| "dataloader_pin_memory": false, | |
| "gradient_accumulation_steps": null, | |
| "gradient_checkpointing": true, | |
| "learning_rate": 1e-06, | |
| "logging_steps": 1, | |
| "max_grad_norm": 1, | |
| "per_device_train_batch_size": 4, | |
| "save_steps": 1, | |
| "seed": 2746318213, | |
| "target_train_batch_size": 256, | |
| "warmup_ratio": 0, | |
| "weight_decay": 0 | |
| }, | |
| "move_reference_model_to_cpu": true, | |
| "num_epochs_per_iteration": 1, | |
| "params": { | |
| "adap_kl_ctrl": false, | |
| "cliprange": 0.2, | |
| "cliprange_value": 0.2, | |
| "gamma": 1, | |
| "init_kl_coef": 0.001, | |
| "kl_penalty_loss_clip_max": 10, | |
| "kl_penalty_loss_clip_min": 0, | |
| "kl_penalty_loss_reduction": "seq_mean_then_mean", | |
| "kl_penalty_loss_type": "control_variate", | |
| "lam": 0.96, | |
| "max_response_length": 8191, | |
| "policy_loss_reduction": "seq_mean_then_mean", | |
| "temperature": 0.6, | |
| "use_score_norm": false, | |
| "use_score_scaling": false, | |
| "whiten_advantages": false, | |
| "whiten_rewards": false | |
| }, | |
| "reference_deepspeed_config": { | |
| "bf16": { | |
| "enabled": true | |
| }, | |
| "gradient_accumulation_steps": "auto", | |
| "prescale_gradients": false, | |
| "train_batch_size": "auto", | |
| "train_micro_batch_size_per_gpu": "auto", | |
| "wall_clock_breakdown": false | |
| }, | |
| "reference_model": { | |
| "hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B", | |
| "pretrained_args": { | |
| "attn_implementation": "flash_attention_2" | |
| }, | |
| "type": "pretrained_causal_lm" | |
| }, | |
| "report_entropy": true, | |
| "save_hf_critic_checkpoint": false, | |
| "save_temp_actor_hf_weights": false, | |
| "temp_checkpoint_dir": "/tmp/delethink_tmp_ckpts", | |
| "type": "ppo", | |
| "use_full_precision_logits": false | |
| }, | |
| "type": "multi_node_policy_iteration" | |
| } |