| { | |
| "_attn_implementation_autoset": false, | |
| "_name_or_path": "/home/ubuntu/models/Qwen3-8B", | |
| "add_cross_attention": false, | |
| "architectures": [ | |
| "Qwen3ForCausalLM" | |
| ], | |
| "attention_bias": false, | |
| "attention_dropout": 0.0, | |
| "attribute_map": {}, | |
| "bad_words_ids": null, | |
| "begin_suppress_tokens": null, | |
| "bos_token_id": 151643, | |
| "chunk_size_feed_forward": 0, | |
| "cross_attention_hidden_size": null, | |
| "decoder_start_token_id": null, | |
| "diversity_penalty": 0.0, | |
| "do_sample": false, | |
| "early_stopping": false, | |
| "encoder_no_repeat_ngram_size": 0, | |
| "eos_token_id": 151645, | |
| "exponential_decay_length_penalty": null, | |
| "finetuning_task": null, | |
| "forced_bos_token_id": null, | |
| "forced_eos_token_id": null, | |
| "fused_spec_config": null, | |
| "head_dim": 128, | |
| "hidden_act": "silu", | |
| "hidden_size": 4096, | |
| "id2label": { | |
| "0": "LABEL_0", | |
| "1": "LABEL_1" | |
| }, | |
| "initializer_range": 0.02, | |
| "intermediate_size": 12288, | |
| "is_decoder": false, | |
| "is_encoder_decoder": false, | |
| "label2id": { | |
| "LABEL_0": 0, | |
| "LABEL_1": 1 | |
| }, | |
| "length_penalty": 1.0, | |
| "max_length": 20, | |
| "max_position_embeddings": 40960, | |
| "max_window_layers": 36, | |
| "metadata": null, | |
| "min_length": 0, | |
| "model_type": "qwen3", | |
| "neuron_config": { | |
| "activation_quantization_type": null, | |
| "allow_input_truncation": false, | |
| "apply_seq_ids_mask": false, | |
| "async_mode": false, | |
| "attention_dp_degree": 1, | |
| "attention_dtype": null, | |
| "attn_block_cte_nki_kernel_enabled": false, | |
| "attn_block_tkg_nki_kernel_cache_update": false, | |
| "attn_block_tkg_nki_kernel_enabled": false, | |
| "attn_cls": { | |
| "__module__": "neuronx_distributed_inference.models.qwen3.modeling_qwen3", | |
| "__name__": "NeuronQwen3Attention" | |
| }, | |
| "attn_kernel_enabled": null, | |
| "attn_tkg_builtin_kernel_enabled": false, | |
| "attn_tkg_nki_kernel_enabled": false, | |
| "batch_size": 1, | |
| "bucket_n_active_tokens": false, | |
| "buckets": [ | |
| 1024 | |
| ], | |
| "cast_type": "config", | |
| "cc_pipeline_tiling_factor": 2, | |
| "chunked_prefill_config": null, | |
| "context_encoding_buckets": null, | |
| "cp_degree": 1, | |
| "ctx_batch_size": 1, | |
| "disable_kv_cache_tiling": false, | |
| "draft_model_modules_to_not_convert": null, | |
| "enable_bucketing": true, | |
| "enable_eagle_draft_input_norm": false, | |
| "enable_eagle_speculation": false, | |
| "enable_fused_speculation": false, | |
| "enable_long_context_mode": false, | |
| "enable_output_completion_notifications": false, | |
| "enable_spill_reload_dge": false, | |
| "enable_token_tree": false, | |
| "ep_degree": 1, | |
| "expert_mlp_nki_kernel_enabled": null, | |
| "flash_decoding_enabled": false, | |
| "fused_qkv": false, | |
| "fused_rmsnorm_skip_gamma": false, | |
| "is_block_kv_layout": null, | |
| "is_chunked_prefill": false, | |
| "is_continuous_batching": true, | |
| "is_eagle_draft": false, | |
| "is_medusa": false, | |
| "is_prefill_stage": null, | |
| "is_prefix_caching": false, | |
| "k_cache_transposed": false, | |
| "kv_cache_batch_size": 1, | |
| "kv_cache_padding_size": 0, | |
| "kv_cache_quant": false, | |
| "kv_cache_tiling": false, | |
| "layer_boundary_markers": false, | |
| "lm_head_pad": false, | |
| "lm_head_pad_alignment_size": 1, | |
| "local_ranks_size": 2, | |
| "logical_nc_config": 1, | |
| "lora_config": null, | |
| "max_batch_size": 1, | |
| "max_context_length": 1024, | |
| "max_length": 1024, | |
| "max_new_tokens": null, | |
| "medusa_speculation_length": 0, | |
| "medusa_tree": null, | |
| "mlp_kernel_enabled": false, | |
| "mlp_kernel_fuse_residual_add": false, | |
| "modules_to_not_convert": null, | |
| "moe_fused_nki_kernel_enabled": null, | |
| "n_active_tokens": 1024, | |
| "n_positions": 1024, | |
| "num_medusa_heads": 0, | |
| "on_cpu": false, | |
| "on_device_sampling_config": { | |
| "deterministic": false, | |
| "do_sample": false, | |
| "dynamic": true, | |
| "global_topk": 256, | |
| "on_device_sampling_config": true, | |
| "temperature": 1.0, | |
| "top_k": 1, | |
| "top_k_kernel_enabled": false, | |
| "top_p": 1.0 | |
| }, | |
| "output_logits": false, | |
| "overrides_torch_dtype": true, | |
| "pa_block_size": 1024, | |
| "pa_num_blocks": 1, | |
| "padding_side": "right", | |
| "pp_degree": 1, | |
| "prefix_buckets": null, | |
| "qk_layernorm": false, | |
| "qkv_kernel_enabled": false, | |
| "qkv_kernel_fuse_residual_add": false, | |
| "qkv_kernel_nbsd_layout": false, | |
| "quantization_dtype": "int8", | |
| "quantization_type": "per_tensor_symmetric", | |
| "quantize_clamp_bound": Infinity, | |
| "quantized": false, | |
| "quantized_checkpoints_path": null, | |
| "quantized_mlp_kernel_enabled": false, | |
| "rmsnorm_quantize_kernel_enabled": false, | |
| "router_topk_nki_kernel_enabled": null, | |
| "rpl_reduce_dtype": null, | |
| "save_sharded_checkpoint": true, | |
| "scratchpad_page_size": null, | |
| "seq_len": 1024, | |
| "seq_len_threshold_for_cc_tiling": 16384, | |
| "sequence_parallel_enabled": false, | |
| "shared_mlp_nki_kernel_enabled": null, | |
| "skip_sharding": false, | |
| "skip_warmup": false, | |
| "spec_batch_size": 1, | |
| "speculation_length": 0, | |
| "start_rank_id": 0, | |
| "target": null, | |
| "tile_cc": false, | |
| "tkg_batch_size": 1, | |
| "token_generation_buckets": null, | |
| "token_tree_config": null, | |
| "torch_dtype": "bfloat16", | |
| "tp_degree": 2, | |
| "vocab_parallel": false, | |
| "weight_gather_seq_len_threshold": 32768, | |
| "weights_to_skip_layout_optimization": [], | |
| "world_size": 2 | |
| }, | |
| "no_repeat_ngram_size": 0, | |
| "num_attention_heads": 32, | |
| "num_beam_groups": 1, | |
| "num_beams": 1, | |
| "num_cores_per_group": 1, | |
| "num_hidden_layers": 36, | |
| "num_key_value_heads": 8, | |
| "num_return_sequences": 1, | |
| "output_attentions": false, | |
| "output_hidden_states": false, | |
| "output_scores": false, | |
| "pad_token_id": null, | |
| "prefix": null, | |
| "problem_type": null, | |
| "pruned_heads": {}, | |
| "remove_invalid_values": false, | |
| "repetition_penalty": 1.0, | |
| "return_dict": true, | |
| "return_dict_in_generate": false, | |
| "rms_norm_eps": 1e-06, | |
| "rope_scaling": null, | |
| "rope_theta": 1000000, | |
| "sep_token_id": null, | |
| "sliding_window": null, | |
| "suppress_tokens": null, | |
| "task_specific_params": null, | |
| "temperature": 1.0, | |
| "tf_legacy_loss": false, | |
| "tie_encoder_decoder": false, | |
| "tie_word_embeddings": false, | |
| "tokenizer_class": null, | |
| "top_k": 50, | |
| "top_p": 1.0, | |
| "torchscript": false, | |
| "transformers_version": "4.51.0", | |
| "typical_p": 1.0, | |
| "use_bfloat16": false, | |
| "use_cache": true, | |
| "use_sliding_window": false, | |
| "vocab_size": 151936 | |
| } | |