pcuenq HF Staff commited on
Commit
f17e5b0
·
verified ·
1 Parent(s): 66334f2

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +51 -0
config.json CHANGED
@@ -15,6 +15,57 @@
15
  "img_context_token_id": 131072,
16
  "img_end_token": "</img>",
17
  "img_start_token": "<img>",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  "max_sequence_length": 131072,
19
  "model_type": "NemotronH_Nano_VL_V2",
20
  "norm_mean": [
 
15
  "img_context_token_id": 131072,
16
  "img_end_token": "</img>",
17
  "img_start_token": "<img>",
18
+ "llm_config": {
19
+ "architectures": [
20
+ "NemotronHForCausalLM"
21
+ ],
22
+ "attention_bias": false,
23
+ "attention_dropout": 0.0,
24
+ "attention_head_dim": 128,
25
+ "auto_map": {
26
+ "AutoConfig": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-Base--configuration_nemotron_h.NemotronHConfig",
27
+ "AutoModelForCausalLM": "nvidia/NVIDIA-Nemotron-Nano-12B-v2-Base--modeling_nemotron_h.NemotronHForCausalLM"
28
+ },
29
+ "chunk_size": 128,
30
+ "conv_kernel": 4,
31
+ "eos_token_id": 12,
32
+ "expand": 2,
33
+ "head_dim": 128,
34
+ "hidden_dropout": 0.0,
35
+ "hidden_size": 5120,
36
+ "hybrid_override_pattern": "M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M*-M-M-M-M-",
37
+ "initializer_range": 0.02,
38
+ "intermediate_size": 20480,
39
+ "layer_norm_epsilon": 1e-05,
40
+ "mamba_head_dim": 80,
41
+ "mamba_hidden_act": "silu",
42
+ "mamba_num_heads": 128,
43
+ "mamba_proj_bias": false,
44
+ "max_position_embeddings": 131072,
45
+ "mlp_bias": false,
46
+ "mlp_hidden_act": "relu2",
47
+ "model_type": "nemotron_h",
48
+ "n_groups": 8,
49
+ "num_attention_heads": 40,
50
+ "num_hidden_layers": 62,
51
+ "num_key_value_heads": 8,
52
+ "num_logits_to_keep": 1,
53
+ "rescale_prenorm_residual": true,
54
+ "residual_in_fp32": false,
55
+ "rms_norm_eps": 1e-05,
56
+ "sliding_window": null,
57
+ "ssm_state_size": 128,
58
+ "time_step_floor": 0.0001,
59
+ "time_step_max": 0.1,
60
+ "time_step_min": 0.001,
61
+ "time_step_rank": 256,
62
+ "torch_dtype": "bfloat16",
63
+ "use_bias": false,
64
+ "use_cache": true,
65
+ "use_conv_bias": true,
66
+ "use_mamba_kernels": true,
67
+ "vocab_size": 132096
68
+ },
69
  "max_sequence_length": 131072,
70
  "model_type": "NemotronH_Nano_VL_V2",
71
  "norm_mean": [