Synchronizing local compiler cache.

Browse files

Files changed (6) hide show

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/c3450e1affaca20e05e3.json +220 -0
neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/7621f6503e2289297f1f.json +82 -0
neuronxcc-2.21.18209.0+043b1bf7/MODULE_04def5b319953baacddd+a9d440f5/model.neff +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_04def5b319953baacddd+a9d440f5/wrapped_neff.hlo +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_a89678b39464c33c1815+ed72d204/model.hlo_module.pb +1 -1
neuronxcc-2.21.18209.0+043b1bf7/MODULE_a89678b39464c33c1815+ed72d204/model.neff +1 -1

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.0/llama4_text/meta-llama/Llama-4-Scout-17B-16E-Instruct/c3450e1affaca20e05e3.json ADDED Viewed

	@@ -0,0 +1,220 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+  "_task": "text-generation",
+  "attention_bias": false,
+  "attention_chunk_size": 8192,
+  "attention_dropout": 0.0,
+  "attn_scale": 0.1,
+  "attn_temperature_tuning": true,
+  "floor_scale": 8192,
+  "for_llm_compressor": false,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "interleave_moe_layer_step": 1,
+  "intermediate_size": 8192,
+  "intermediate_size_mlp": 16384,
+  "layer_types": [
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "chunked_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 10485760,
+  "model_type": "llama4_text",
+  "moe_layers": [
+    0,
+    1,
+    2,
+    3,
+    4,
+    5,
+    6,
+    7,
+    8,
+    9,
+    10,
+    11,
+    12,
+    13,
+    14,
+    15,
+    16,
+    17,
+    18,
+    19,
+    20,
+    21,
+    22,
+    23,
+    24,
+    25,
+    26,
+    27,
+    28,
+    29,
+    30,
+    31,
+    32,
+    33,
+    34,
+    35,
+    36,
+    37,
+    38,
+    39,
+    40,
+    41,
+    42,
+    43,
+    44,
+    45,
+    46,
+    47
+  ],
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 1,
+    "capacity_factor": null,
+    "checkpoint_id": "meta-llama/Llama-4-Scout-17B-16E-Instruct",
+    "checkpoint_revision": "92f3b1597a195b523d8d9e5700e57e4fbb8f20d3",
+    "continuous_batching": false,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 16,
+    "max_batch_size": 1,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": true,
+    "optimum_neuron_version": "0.4.0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 16
+  },
+  "no_rope_layers": [
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0,
+    1,
+    1,
+    1,
+    0
+  ],
+  "num_attention_heads": 40,
+  "num_experts_per_tok": 1,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 8,
+  "num_local_experts": 16,
+  "output_router_logits": false,
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": {
+    "factor": 16.0,
+    "high_freq_factor": 1.0,
+    "low_freq_factor": 1.0,
+    "original_max_position_embeddings": 8192,
+    "rope_type": "llama3"
+  },
+  "rope_theta": 500000.0,
+  "router_aux_loss_coef": 0.001,
+  "router_jitter_noise": 0.0,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_qk_norm": true,
+  "vocab_size": 202048
+}

neuronxcc-2.21.18209.0+043b1bf7/0_REGISTRY/0.4.1.dev0/qwen2/Qwen/Qwen2.5-0.5B/7621f6503e2289297f1f.json ADDED Viewed

	@@ -0,0 +1,82 @@

+{
+  "_entry_class": "SingleModelCacheEntry",
+  "_model_id": "Qwen/Qwen2.5-0.5B",
+  "_task": "text-generation",
+  "architectures": [
+    "Qwen2ForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "hidden_act": "silu",
+  "hidden_size": 896,
+  "initializer_range": 0.02,
+  "intermediate_size": 4864,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 32768,
+  "max_window_layers": 24,
+  "model_type": "qwen2",
+  "neuron": {
+    "_serialized_key": "NxDNeuronConfig",
+    "batch_size": 4,
+    "capacity_factor": null,
+    "checkpoint_id": "Qwen/Qwen2.5-0.5B",
+    "checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
+    "continuous_batching": true,
+    "enable_bucketing": false,
+    "ep_degree": 1,
+    "fused_qkv": false,
+    "glu_mlp": true,
+    "local_ranks_size": 2,
+    "max_batch_size": 4,
+    "max_context_length": 4096,
+    "max_topk": 256,
+    "n_active_tokens": 4096,
+    "neuronxcc_version": "2.21.18209.0+043b1bf7",
+    "on_device_sampling": false,
+    "optimum_neuron_version": "0.4.1.dev0",
+    "output_logits": false,
+    "pp_degree": 1,
+    "sequence_length": 4096,
+    "speculation_length": 0,
+    "start_rank_id": 0,
+    "target": "trn1",
+    "torch_dtype": "bfloat16",
+    "tp_degree": 2
+  },
+  "num_attention_heads": 14,
+  "num_hidden_layers": 24,
+  "num_key_value_heads": 2,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": true,
+  "use_cache": true,
+  "use_mrope": false,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

neuronxcc-2.21.18209.0+043b1bf7/MODULE_04def5b319953baacddd+a9d440f5/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:eb8e9c3bbcfeb24f5716790979ab5e716ef14dde3f59e799dd36d5e78f3ca4ab
 size 1926144

 version https://git-lfs.github.com/spec/v1
+oid sha256:9306abbd1c28ffa1ebe3d952ac122ce49faf6a2a037575b24b0a818cb7d441e3
 size 1926144

neuronxcc-2.21.18209.0+043b1bf7/MODULE_04def5b319953baacddd+a9d440f5/wrapped_neff.hlo CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:27d04de06be1013661b7a4bd6bd5b50d9a3dd442f29e23d9767a31edf7170f5d
 size 2082478

 version https://git-lfs.github.com/spec/v1
+oid sha256:d6a9c49a56aacb28723d72d324e309cb2277db5b847d041e290b5301149a0d39
 size 2082478

neuronxcc-2.21.18209.0+043b1bf7/MODULE_a89678b39464c33c1815+ed72d204/model.hlo_module.pb CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:512341341856b75f5a88023d5d6a523af1df1ecccb3f0a6ba5953da3db15a616
 size 739558

 version https://git-lfs.github.com/spec/v1
+oid sha256:0c64a590225fcaa4a7f92ee879a63534714d921b0b7c21d8c0b215eaa21d7970
 size 739558

neuronxcc-2.21.18209.0+043b1bf7/MODULE_a89678b39464c33c1815+ed72d204/model.neff CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fc8b706c9cca821e93ed6dc9f8087c3080cdb140bde55128aaddd73941b883f8
 size 26133504

 version https://git-lfs.github.com/spec/v1
+oid sha256:205be41902a46553702af4e8be043b2df8f16d5813f7f260a1386541828ca3e6
 size 26133504