miladink's picture
delethinkIter_R1DistillQwen1.5B_DEEPSCALER_DELETHINK15X8KHUGEBATCH_20250818DELETHINK15X8KHUGEBATCH_seed_2746318213: upload config
6aea3c7 verified
{
"colocated_worker_min_available_gpu_memory_mb": 10240,
"directory": "experiments",
"episode_generator": {
"dataset_num_samples_per_iteration": 128,
"dataset_sample_with_replacement": true,
"dataset_shuffle_on_each_iteration": true,
"fill_missing_episodes": false,
"force_tokenization_from_inference": true,
"generation_per_sample": 8,
"leave_one_out": false,
"max_query_length": 1096,
"messages_column_name": "problem",
"model_context_size": 8192,
"normalize_by_group_std": true,
"prompt_template": "{}\n\nPlease reason step by step, and put your final answer within \\boxed{{}}",
"response_prefill": "<think>\n",
"system_message": "",
"task": {
"dataset_dict_path": null,
"hf_dataset_args": [
"agentica-org/DeepScaleR-Preview-Dataset"
],
"load_dataset_dict": false,
"type": "deepscaler"
},
"tokenization_check_full_reconstruction": false,
"trajectory_inference_strategy": {
"auto_compute_max_new_tokens": true,
"delethink_iteration": 15,
"k": 8,
"keep_head": 100,
"keep_tail": 3996,
"request_kwargs": {
"logprob_start_len": 0,
"return_logprob": true
},
"sampling_params": {
"max_new_tokens": 8191,
"temperature": 0.6
},
"type": "delethink_sampling"
},
"type": "delethink_grpo_episode_generator",
"use_chat_template": false,
"zero_out_negative_advantage_episodes": false
},
"episodes_cloud_log_steps": 10,
"evaluation_inference_worker": {
"options": {
"enable_cache_report": true,
"enable_metrics": true,
"log_requests": true,
"mem_fraction_static": 0.65,
"tp": 1
},
"type": "sglang"
},
"evaluation_pipelines": [
{
"analyzers": [
{
"type": "task_performance"
}
],
"dataset_portion": 1,
"dataset_split": "test",
"inference_name": "aime_24",
"inference_strategy": {
"auto_compute_max_new_tokens": true,
"decode_response_text_from_token_ids": true,
"delethink_iteration": 15,
"enable_cache": true,
"k": 16,
"keep_head": 100,
"keep_tail": 3996,
"max_concurrent_requests": 2400,
"model_context_size": 8192,
"request_kwargs": {
"logprob_start_len": 0,
"return_logprob": true
},
"sampling_params": {
"max_new_tokens": 8191,
"temperature": 0.6
},
"type": "delethink_sampling"
},
"messages_column_name": "problem",
"prompt_template": "{}\n\nPlease reason step by step, and put your final answer within \\boxed{{}}",
"response_prefill": "<think>\n",
"seed": 42,
"system_message": "",
"task": {
"dataset_dict_path": null,
"hf_dataset_args": [
"realtreetune/aime24"
],
"load_dataset_dict": false,
"type": "math"
},
"use_chat_template": false
}
],
"exp_name": "delethinkIter_R1DistillQwen1.5B_DEEPSCALER_DELETHINK15X8KHUGEBATCH_20250818DELETHINK15X8KHUGEBATCH_seed_2746318213",
"global_vars": {
"debug_mode": false,
"dirs": {
"data": "data",
"experiments": "experiments"
},
"seed": 2746318213
},
"inference_worker": {
"collocated_options": {
"chunked_prefill_size": 16382,
"context_length": 8192,
"enable_cache_report": true,
"enable_memory_saver": true,
"enable_metrics": true,
"max_running_requests": 10000,
"mem_fraction_static": 0.6,
"schedule_conservativeness": 0.3,
"schedule_policy": "fcfs",
"tp": 1
},
"options": {
"chunked_prefill_size": 16382,
"context_length": 8192,
"enable_cache_report": true,
"enable_metrics": true,
"max_running_requests": 10000,
"mem_fraction_static": 0.6,
"schedule_conservativeness": 0.3,
"schedule_policy": "fcfs",
"tp": 1
},
"type": "sglang"
},
"log_some_example": false,
"mid_training_evaluation_pipelines": [],
"mid_training_evaluation_steps": 5,
"num_iterations": 500,
"persistent_colocated_inference_workers": true,
"tokenizer": {
"hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B",
"type": "pretrained"
},
"trainer": {
"actor_deepspeed_config": {
"bf16": {
"enabled": "auto"
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"optimizer": {
"params": {
"betas": "auto",
"eps": "auto",
"lr": "auto",
"weight_decay": "auto"
},
"type": "AdamW"
},
"scheduler": null,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"zero_allow_untested_optimizer": true,
"zero_optimization": {
"allgather_bucket_size": 500000000,
"allgather_partitions": true,
"contiguous_gradients": true,
"overlap_comm": false,
"reduce_bucket_size": "auto",
"reduce_scatter": true,
"stage": 2
}
},
"actor_model": {
"disable_dropout": true,
"hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B",
"pretrained_args": {
"attn_implementation": "flash_attention_2"
},
"type": "pretrained_causal_lm"
},
"critic_deepspeed_config": null,
"critic_model": null,
"general_training_args": {
"bf16": true,
"checkpoint_keep_steps": 5,
"dataloader_num_workers": 0,
"dataloader_pin_memory": false,
"gradient_accumulation_steps": null,
"gradient_checkpointing": true,
"learning_rate": 1e-06,
"logging_steps": 1,
"max_grad_norm": 1,
"per_device_train_batch_size": 4,
"save_steps": 1,
"seed": 2746318213,
"target_train_batch_size": 256,
"warmup_ratio": 0,
"weight_decay": 0
},
"move_reference_model_to_cpu": true,
"num_epochs_per_iteration": 1,
"params": {
"adap_kl_ctrl": false,
"cliprange": 0.2,
"cliprange_value": 0.2,
"gamma": 1,
"init_kl_coef": 0.001,
"kl_penalty_loss_clip_max": 10,
"kl_penalty_loss_clip_min": 0,
"kl_penalty_loss_reduction": "seq_mean_then_mean",
"kl_penalty_loss_type": "control_variate",
"lam": 0.96,
"max_response_length": 8191,
"policy_loss_reduction": "seq_mean_then_mean",
"temperature": 0.6,
"use_score_norm": false,
"use_score_scaling": false,
"whiten_advantages": false,
"whiten_rewards": false
},
"reference_deepspeed_config": {
"bf16": {
"enabled": true
},
"gradient_accumulation_steps": "auto",
"prescale_gradients": false,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
},
"reference_model": {
"hf_model_name": "deepseek-ai/Deepseek-R1-Distill-Qwen-1.5B",
"pretrained_args": {
"attn_implementation": "flash_attention_2"
},
"type": "pretrained_causal_lm"
},
"report_entropy": true,
"save_hf_critic_checkpoint": false,
"save_temp_actor_hf_weights": false,
"temp_checkpoint_dir": "/tmp/delethink_tmp_ckpts",
"type": "ppo",
"use_full_precision_logits": false
},
"type": "multi_node_policy_iteration"
}