Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +110 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6a2eef8e3936e308d719.json +70 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b97e39a8f58beb7389bc.json +70 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c4344d0006797e4575a8.json +70 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/0877b2b6ccc545a88fe8.json +74 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/674878857b261e357c7b.json +74 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/b54003e874e8671bff7d.json +74 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/d06188fe8e87222dbd8c.json +74 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a338625e976e1cdcce16.json +75 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a7b0618b75c2e747c4a7.json +75 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/cd641ba46e0fe08cbcc1.json +75 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/2db16eb8799b9160edec.json +70 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/a87206b079f3f365cf93.json +70 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/ba9ad122c306f79a7f90.json +70 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/308e16ab3ee4911957e7.json +71 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/38df0e7247208af80c7d.json +71 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/f190d1b1acf6d9624f9c.json +71 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/Qwen/Qwen2.5-0.5B/51d387c8436423c80830.json +94 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/Qwen/Qwen2.5-0.5B/edef092decee45a6b77d.json +94 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/68fbb28f6340b2d7ca3b.json +76 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/691934144efc9536a94c.json +76 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/945a35693afe97130cb5.json +76 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/8ca6f1dd90ea7ad4fb33.json +77 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/bb735c1f623df03e8d42.json +77 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/eeeb1f6469ea444de4d6.json +77 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json +59 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json +59 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/0cfa36114f700208376a.json +66 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/1144207432b18b97200f.json +63 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/132e478ab06dfd6f996e.json +60 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/430083c6aa3b306e22e0.json +64 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/53e5296dda9b797224f5.json +67 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json +63 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json +63 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0042866b0b29ca346a04.json +68 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f19cc6cfed252cbe80.json +65 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/235e6b665a70c7f3b88b.json +69 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/54998903c673b03ab682.json +69 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/559712f03e12a7d3db9d.json +68 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/62c9b979338956707306.json +71 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/7b9ae8b155ce16ab1f81.json +68 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/8eae465f9a28e71d02c7.json +72 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/94b3194811206ba2227e.json +65 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json +64 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/c6f1e5861bd12b93b78f.json +69 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/d03410f237213137456b.json +71 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e15ce75e921fd9551605.json +64 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f70dc623c263d5d225a1.json +65 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f98ea9d9fe79ee8c6c52.json +71 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json +59 -0
.gitattributes
CHANGED
|
@@ -10715,3 +10715,113 @@ neuronxcc-2.15.143.0+e39249ad/MODULE_dd7fe4a299fa94e55211+39f12043/model.neff fi
|
|
| 10715 |
neuronxcc-2.15.143.0+e39249ad/MODULE_f275897da2e206eb43ea+39f12043/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10716 |
neuronxcc-2.20.9961.0+0acef03a/MODULE_c9338aa2ab68700f103b/model.neuron filter=lfs diff=lfs merge=lfs -text
|
| 10717 |
neuronxcc-2.20.9961.0+0acef03a/MODULE_9bfa84a5867ce08f6ac5/model.neuron filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10715 |
neuronxcc-2.15.143.0+e39249ad/MODULE_f275897da2e206eb43ea+39f12043/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10716 |
neuronxcc-2.20.9961.0+0acef03a/MODULE_c9338aa2ab68700f103b/model.neuron filter=lfs diff=lfs merge=lfs -text
|
| 10717 |
neuronxcc-2.20.9961.0+0acef03a/MODULE_9bfa84a5867ce08f6ac5/model.neuron filter=lfs diff=lfs merge=lfs -text
|
| 10718 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_029171fc3b39495f4aba+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10719 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_06c8dbb372e92a509892+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10720 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10721 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0bd3d97e69c39b1f81fe+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10722 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10723 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_0d3a78ec14918c4753a5+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10724 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_10539bf50cf5a741b5b1+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10725 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_158c2b755daab1e740a8+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10726 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_172c08cd3ccbf1c120d5+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10727 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10728 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1a52c0a90fc4f2b193c3+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10729 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10730 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1ed497a5a1a56b752f6c+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10731 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10732 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_20b86a711b6ece0c7356+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10733 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_220c74921c0d768610a0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10734 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2d618b5f6d6f813b7449+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10735 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10736 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2da3f13e72442dccc84d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10737 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_304d7ac91c99b6581390+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10738 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10739 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_308ca0aab5adbc4e264e+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10740 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a3393bf59876e8b6f96+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10741 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_3a475c45b5c9d5c0f8fe+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10742 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10743 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_4c3fae6fc3e603f915d8+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10744 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10745 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_60fbe698553d5bdeda38+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10746 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10747 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6282bebdd839664ecd46+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10748 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_63cd0b95d5730c3ea2ff+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10749 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_66ff57a3dcbdd1b34504+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10750 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10751 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6972265ccbc24ff4448c+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10752 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6e8eed42076f4bade3b5+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10753 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10754 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6fcc6140d0bae101d7db+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10755 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_7492a59ff398cdd8c726+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10756 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10757 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_75ceb321353780f53c0b+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10758 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10759 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_763113653b2e1d896ea8+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10760 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_76637537fe13fc8505c1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10761 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_7c3334ebde4334499745+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10762 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8107c5124b8b45e8f0eb+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10763 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_826fd2949b06a672b9bc+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10764 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_88c363db429237aad147+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10765 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10766 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8954b51aa51e21a41d91+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10767 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10768 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_896c6cf1819883a539de+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10769 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_89c1a268c5d73421c719+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10770 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10771 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_901275bef8665064964c+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10772 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_90181ce17be0f8510277+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10773 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_943306ab89ee867ba2c1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10774 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_96aa20706930df3658e1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10775 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10776 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_96f456d652b2d2b5604f+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10777 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_974960a065697d53ebbb+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10778 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_978910d0b56af3a47501+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10779 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10780 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_9ba467e042d35e9b674e+183b369f/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10781 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10782 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_9dc23dbecde949377e19+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10783 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10784 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a28de0c97a12ebdd3729+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10785 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10786 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a810c54e1e60c1b60d92+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10787 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a8844fe97439b2667680+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10788 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_ace5fe41c67d5f1adb03+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10789 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_ae0fabd3d0b75f0150a0+253d6470/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10790 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_b10902204a04c03bbd77+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10791 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10792 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_b576f2782b3f3348dc52+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10793 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_bb8c372007f461bd6f30+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10794 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_bbeafd22a3c0c3e648a5+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10795 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10796 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_bcec2cc4ba44dbe255aa+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10797 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10798 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_bd02c2f7c4741c76fe15+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10799 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_c81b33a78feae546fb48+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10800 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10801 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_d379451a3eeeb9a21ff6+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10802 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10803 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7030396f798ba07e1a9+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10804 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10805 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_d8d9690a1b9d2bd4cec3+87c2f0d1/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10806 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10807 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_da442b9fe13ebb984920+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10808 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_db25850f47f3c97209ab+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10809 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10810 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_db7f53e8fbb89bd8136b+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10811 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10812 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_dbb440cbea65d345491d+e4fc2c3d/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10813 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e23cd3c3922eefeb7287+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10814 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10815 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e71846a47fd19b857556+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10816 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e72c2f224d72d6a5a1a4+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10817 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10818 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e742300d745c721999db+cd3419b6/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10819 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_e9ae76ce87d9056639c1+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10820 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_ecf63d52a684b3482e60+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10821 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_ee7203f82f27836b9239+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10822 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10823 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_efeb7f7f6d73497d3fc5+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
| 10824 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_f1ce9fc7b3c25b7b2459+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10825 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_f25ef7f12c02216593d0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10826 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
| 10827 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_fecd93fec248a64a0b00+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/6a2eef8e3936e308d719.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"attention_multiplier": 1.0,
|
| 11 |
+
"embedding_multiplier": 1.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"logits_scaling": 1.0,
|
| 17 |
+
"max_position_embeddings": 2048,
|
| 18 |
+
"mlp_bias": false,
|
| 19 |
+
"model_type": "granite",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"async_mode": false,
|
| 23 |
+
"attn_kernel_enabled": false,
|
| 24 |
+
"batch_size": 2,
|
| 25 |
+
"capacity_factor": null,
|
| 26 |
+
"cc_pipeline_tiling_factor": 2,
|
| 27 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 28 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
| 29 |
+
"continuous_batching": true,
|
| 30 |
+
"enable_bucketing": false,
|
| 31 |
+
"ep_degree": 1,
|
| 32 |
+
"flash_decoding_enabled": false,
|
| 33 |
+
"fused_qkv": true,
|
| 34 |
+
"glu_mlp": true,
|
| 35 |
+
"is_chunked_prefill": false,
|
| 36 |
+
"local_ranks_size": 2,
|
| 37 |
+
"logical_nc_config": 1,
|
| 38 |
+
"max_batch_size": 2,
|
| 39 |
+
"max_context_length": 100,
|
| 40 |
+
"max_topk": 256,
|
| 41 |
+
"mlp_kernel_enabled": false,
|
| 42 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 43 |
+
"n_active_tokens": 100,
|
| 44 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 45 |
+
"num_cores_per_group": 1,
|
| 46 |
+
"on_device_sampling": true,
|
| 47 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 48 |
+
"output_logits": false,
|
| 49 |
+
"pp_degree": 1,
|
| 50 |
+
"qkv_kernel_enabled": false,
|
| 51 |
+
"sequence_length": 100,
|
| 52 |
+
"sequence_parallel_enabled": false,
|
| 53 |
+
"speculation_length": 0,
|
| 54 |
+
"start_rank_id": 0,
|
| 55 |
+
"target": null,
|
| 56 |
+
"torch_dtype": "float16",
|
| 57 |
+
"tp_degree": 2,
|
| 58 |
+
"vocab_parallel": false
|
| 59 |
+
},
|
| 60 |
+
"num_attention_heads": 4,
|
| 61 |
+
"num_hidden_layers": 2,
|
| 62 |
+
"num_key_value_heads": 4,
|
| 63 |
+
"residual_multiplier": 1.0,
|
| 64 |
+
"rms_norm_eps": 1e-06,
|
| 65 |
+
"rope_scaling": null,
|
| 66 |
+
"rope_theta": 10000.0,
|
| 67 |
+
"tie_word_embeddings": false,
|
| 68 |
+
"use_cache": true,
|
| 69 |
+
"vocab_size": 49152
|
| 70 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/b97e39a8f58beb7389bc.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"attention_multiplier": 1.0,
|
| 11 |
+
"embedding_multiplier": 1.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"logits_scaling": 1.0,
|
| 17 |
+
"max_position_embeddings": 2048,
|
| 18 |
+
"mlp_bias": false,
|
| 19 |
+
"model_type": "granite",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"async_mode": false,
|
| 23 |
+
"attn_kernel_enabled": false,
|
| 24 |
+
"batch_size": 1,
|
| 25 |
+
"capacity_factor": null,
|
| 26 |
+
"cc_pipeline_tiling_factor": 2,
|
| 27 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 28 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
| 29 |
+
"continuous_batching": false,
|
| 30 |
+
"enable_bucketing": false,
|
| 31 |
+
"ep_degree": 1,
|
| 32 |
+
"flash_decoding_enabled": false,
|
| 33 |
+
"fused_qkv": true,
|
| 34 |
+
"glu_mlp": true,
|
| 35 |
+
"is_chunked_prefill": false,
|
| 36 |
+
"local_ranks_size": 2,
|
| 37 |
+
"logical_nc_config": 1,
|
| 38 |
+
"max_batch_size": 1,
|
| 39 |
+
"max_context_length": 100,
|
| 40 |
+
"max_topk": 256,
|
| 41 |
+
"mlp_kernel_enabled": false,
|
| 42 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 43 |
+
"n_active_tokens": 100,
|
| 44 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 45 |
+
"num_cores_per_group": 1,
|
| 46 |
+
"on_device_sampling": true,
|
| 47 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 48 |
+
"output_logits": false,
|
| 49 |
+
"pp_degree": 1,
|
| 50 |
+
"qkv_kernel_enabled": false,
|
| 51 |
+
"sequence_length": 100,
|
| 52 |
+
"sequence_parallel_enabled": false,
|
| 53 |
+
"speculation_length": 0,
|
| 54 |
+
"start_rank_id": 0,
|
| 55 |
+
"target": null,
|
| 56 |
+
"torch_dtype": "bfloat16",
|
| 57 |
+
"tp_degree": 2,
|
| 58 |
+
"vocab_parallel": false
|
| 59 |
+
},
|
| 60 |
+
"num_attention_heads": 4,
|
| 61 |
+
"num_hidden_layers": 2,
|
| 62 |
+
"num_key_value_heads": 4,
|
| 63 |
+
"residual_multiplier": 1.0,
|
| 64 |
+
"rms_norm_eps": 1e-06,
|
| 65 |
+
"rope_scaling": null,
|
| 66 |
+
"rope_theta": 10000.0,
|
| 67 |
+
"tie_word_embeddings": false,
|
| 68 |
+
"use_cache": true,
|
| 69 |
+
"vocab_size": 49152
|
| 70 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/c4344d0006797e4575a8.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"attention_multiplier": 1.0,
|
| 11 |
+
"embedding_multiplier": 1.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"logits_scaling": 1.0,
|
| 17 |
+
"max_position_embeddings": 2048,
|
| 18 |
+
"mlp_bias": false,
|
| 19 |
+
"model_type": "granite",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"async_mode": false,
|
| 23 |
+
"attn_kernel_enabled": false,
|
| 24 |
+
"batch_size": 1,
|
| 25 |
+
"capacity_factor": null,
|
| 26 |
+
"cc_pipeline_tiling_factor": 2,
|
| 27 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 28 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
| 29 |
+
"continuous_batching": false,
|
| 30 |
+
"enable_bucketing": false,
|
| 31 |
+
"ep_degree": 1,
|
| 32 |
+
"flash_decoding_enabled": false,
|
| 33 |
+
"fused_qkv": true,
|
| 34 |
+
"glu_mlp": true,
|
| 35 |
+
"is_chunked_prefill": false,
|
| 36 |
+
"local_ranks_size": 2,
|
| 37 |
+
"logical_nc_config": 1,
|
| 38 |
+
"max_batch_size": 1,
|
| 39 |
+
"max_context_length": 100,
|
| 40 |
+
"max_topk": 256,
|
| 41 |
+
"mlp_kernel_enabled": false,
|
| 42 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 43 |
+
"n_active_tokens": 100,
|
| 44 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 45 |
+
"num_cores_per_group": 1,
|
| 46 |
+
"on_device_sampling": true,
|
| 47 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 48 |
+
"output_logits": false,
|
| 49 |
+
"pp_degree": 1,
|
| 50 |
+
"qkv_kernel_enabled": false,
|
| 51 |
+
"sequence_length": 100,
|
| 52 |
+
"sequence_parallel_enabled": false,
|
| 53 |
+
"speculation_length": 0,
|
| 54 |
+
"start_rank_id": 0,
|
| 55 |
+
"target": null,
|
| 56 |
+
"torch_dtype": "float16",
|
| 57 |
+
"tp_degree": 2,
|
| 58 |
+
"vocab_parallel": false
|
| 59 |
+
},
|
| 60 |
+
"num_attention_heads": 4,
|
| 61 |
+
"num_hidden_layers": 2,
|
| 62 |
+
"num_key_value_heads": 4,
|
| 63 |
+
"residual_multiplier": 1.0,
|
| 64 |
+
"rms_norm_eps": 1e-06,
|
| 65 |
+
"rope_scaling": null,
|
| 66 |
+
"rope_theta": 10000.0,
|
| 67 |
+
"tie_word_embeddings": false,
|
| 68 |
+
"use_cache": true,
|
| 69 |
+
"vocab_size": 49152
|
| 70 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/0877b2b6ccc545a88fe8.json
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 4,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 16,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 64,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 26 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 1,
|
| 37 |
+
"max_context_length": 100,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 100,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 100,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 0,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "float16",
|
| 55 |
+
"tp_degree": 2,
|
| 56 |
+
"vocab_parallel": false
|
| 57 |
+
},
|
| 58 |
+
"num_attention_heads": 4,
|
| 59 |
+
"num_hidden_layers": 2,
|
| 60 |
+
"num_key_value_heads": 4,
|
| 61 |
+
"pretraining_tp": 1,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_scaling": {
|
| 64 |
+
"factor": 8.0,
|
| 65 |
+
"high_freq_factor": 4.0,
|
| 66 |
+
"low_freq_factor": 1.0,
|
| 67 |
+
"original_max_position_embeddings": 8192,
|
| 68 |
+
"rope_type": "llama3"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 500000.0,
|
| 71 |
+
"tie_word_embeddings": false,
|
| 72 |
+
"use_cache": true,
|
| 73 |
+
"vocab_size": 128256
|
| 74 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/674878857b261e357c7b.json
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 4,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 16,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 64,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 2,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 26 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 27 |
+
"continuous_batching": true,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 2,
|
| 37 |
+
"max_context_length": 100,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 100,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 100,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 0,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "float16",
|
| 55 |
+
"tp_degree": 2,
|
| 56 |
+
"vocab_parallel": false
|
| 57 |
+
},
|
| 58 |
+
"num_attention_heads": 4,
|
| 59 |
+
"num_hidden_layers": 2,
|
| 60 |
+
"num_key_value_heads": 4,
|
| 61 |
+
"pretraining_tp": 1,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_scaling": {
|
| 64 |
+
"factor": 8.0,
|
| 65 |
+
"high_freq_factor": 4.0,
|
| 66 |
+
"low_freq_factor": 1.0,
|
| 67 |
+
"original_max_position_embeddings": 8192,
|
| 68 |
+
"rope_type": "llama3"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 500000.0,
|
| 71 |
+
"tie_word_embeddings": false,
|
| 72 |
+
"use_cache": true,
|
| 73 |
+
"vocab_size": 128256
|
| 74 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/b54003e874e8671bff7d.json
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 4,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 16,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 64,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 26 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 1,
|
| 37 |
+
"max_context_length": 100,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 100,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 100,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 0,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "bfloat16",
|
| 55 |
+
"tp_degree": 2,
|
| 56 |
+
"vocab_parallel": false
|
| 57 |
+
},
|
| 58 |
+
"num_attention_heads": 4,
|
| 59 |
+
"num_hidden_layers": 2,
|
| 60 |
+
"num_key_value_heads": 4,
|
| 61 |
+
"pretraining_tp": 1,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_scaling": {
|
| 64 |
+
"factor": 8.0,
|
| 65 |
+
"high_freq_factor": 4.0,
|
| 66 |
+
"low_freq_factor": 1.0,
|
| 67 |
+
"original_max_position_embeddings": 8192,
|
| 68 |
+
"rope_type": "llama3"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 500000.0,
|
| 71 |
+
"tie_word_embeddings": false,
|
| 72 |
+
"use_cache": true,
|
| 73 |
+
"vocab_size": 128256
|
| 74 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/llamafactory/tiny-random-Llama-3/d06188fe8e87222dbd8c.json
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 4,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 16,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 64,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 26 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 1,
|
| 37 |
+
"max_context_length": 131072,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 131072,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 131072,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 0,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "bfloat16",
|
| 55 |
+
"tp_degree": 2,
|
| 56 |
+
"vocab_parallel": false
|
| 57 |
+
},
|
| 58 |
+
"num_attention_heads": 4,
|
| 59 |
+
"num_hidden_layers": 2,
|
| 60 |
+
"num_key_value_heads": 4,
|
| 61 |
+
"pretraining_tp": 1,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_scaling": {
|
| 64 |
+
"factor": 8.0,
|
| 65 |
+
"high_freq_factor": 4.0,
|
| 66 |
+
"low_freq_factor": 1.0,
|
| 67 |
+
"original_max_position_embeddings": 8192,
|
| 68 |
+
"rope_type": "llama3"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 500000.0,
|
| 71 |
+
"tie_word_embeddings": false,
|
| 72 |
+
"use_cache": true,
|
| 73 |
+
"vocab_size": 128256
|
| 74 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a338625e976e1cdcce16.json
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 26 |
+
"checkpoint_revision": null,
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": false,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 1,
|
| 37 |
+
"max_context_length": 4096,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 4096,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": false,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 4096,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 5,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "bfloat16",
|
| 55 |
+
"tp_degree": 2,
|
| 56 |
+
"vocab_parallel": false
|
| 57 |
+
},
|
| 58 |
+
"num_attention_heads": 32,
|
| 59 |
+
"num_hidden_layers": 16,
|
| 60 |
+
"num_key_value_heads": 8,
|
| 61 |
+
"pretraining_tp": 1,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_scaling": {
|
| 64 |
+
"factor": 32.0,
|
| 65 |
+
"high_freq_factor": 4.0,
|
| 66 |
+
"low_freq_factor": 1.0,
|
| 67 |
+
"original_max_position_embeddings": 8192,
|
| 68 |
+
"rope_type": "llama3"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 500000.0,
|
| 71 |
+
"tie_word_embeddings": true,
|
| 72 |
+
"unsloth_fixed": true,
|
| 73 |
+
"use_cache": true,
|
| 74 |
+
"vocab_size": 128256
|
| 75 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/a7b0618b75c2e747c4a7.json
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 26 |
+
"checkpoint_revision": null,
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": false,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 1,
|
| 37 |
+
"max_context_length": 4096,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 4096,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": false,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 4096,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 0,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "bfloat16",
|
| 55 |
+
"tp_degree": 2,
|
| 56 |
+
"vocab_parallel": false
|
| 57 |
+
},
|
| 58 |
+
"num_attention_heads": 32,
|
| 59 |
+
"num_hidden_layers": 16,
|
| 60 |
+
"num_key_value_heads": 8,
|
| 61 |
+
"pretraining_tp": 1,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_scaling": {
|
| 64 |
+
"factor": 32.0,
|
| 65 |
+
"high_freq_factor": 4.0,
|
| 66 |
+
"low_freq_factor": 1.0,
|
| 67 |
+
"original_max_position_embeddings": 8192,
|
| 68 |
+
"rope_type": "llama3"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 500000.0,
|
| 71 |
+
"tie_word_embeddings": true,
|
| 72 |
+
"unsloth_fixed": true,
|
| 73 |
+
"use_cache": true,
|
| 74 |
+
"vocab_size": 128256
|
| 75 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/llama/unsloth/Llama-3.2-1B-Instruct/cd641ba46e0fe08cbcc1.json
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 4,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 26 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 27 |
+
"continuous_batching": true,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 4,
|
| 37 |
+
"max_context_length": 4096,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 4096,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 4096,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 0,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "float16",
|
| 55 |
+
"tp_degree": 2,
|
| 56 |
+
"vocab_parallel": false
|
| 57 |
+
},
|
| 58 |
+
"num_attention_heads": 32,
|
| 59 |
+
"num_hidden_layers": 16,
|
| 60 |
+
"num_key_value_heads": 8,
|
| 61 |
+
"pretraining_tp": 1,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_scaling": {
|
| 64 |
+
"factor": 32.0,
|
| 65 |
+
"high_freq_factor": 4.0,
|
| 66 |
+
"low_freq_factor": 1.0,
|
| 67 |
+
"original_max_position_embeddings": 8192,
|
| 68 |
+
"rope_type": "llama3"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 500000.0,
|
| 71 |
+
"tie_word_embeddings": true,
|
| 72 |
+
"unsloth_fixed": true,
|
| 73 |
+
"use_cache": true,
|
| 74 |
+
"vocab_size": 128256
|
| 75 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/2db16eb8799b9160edec.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MixtralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"head_dim": 32,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3584,
|
| 14 |
+
"max_position_embeddings": 1024,
|
| 15 |
+
"model_type": "mixtral",
|
| 16 |
+
"neuron": {
|
| 17 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 18 |
+
"async_mode": false,
|
| 19 |
+
"attn_kernel_enabled": false,
|
| 20 |
+
"batch_size": 1,
|
| 21 |
+
"capacity_factor": null,
|
| 22 |
+
"cc_pipeline_tiling_factor": 2,
|
| 23 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
| 24 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"enable_bucketing": false,
|
| 27 |
+
"ep_degree": 1,
|
| 28 |
+
"flash_decoding_enabled": false,
|
| 29 |
+
"fused_qkv": false,
|
| 30 |
+
"glu_mlp": true,
|
| 31 |
+
"is_chunked_prefill": false,
|
| 32 |
+
"local_ranks_size": 2,
|
| 33 |
+
"logical_nc_config": 1,
|
| 34 |
+
"max_batch_size": 1,
|
| 35 |
+
"max_context_length": 100,
|
| 36 |
+
"max_topk": 256,
|
| 37 |
+
"mlp_kernel_enabled": false,
|
| 38 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 39 |
+
"n_active_tokens": 100,
|
| 40 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 41 |
+
"num_cores_per_group": 1,
|
| 42 |
+
"on_device_sampling": false,
|
| 43 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 44 |
+
"output_logits": false,
|
| 45 |
+
"pp_degree": 1,
|
| 46 |
+
"qkv_kernel_enabled": false,
|
| 47 |
+
"sequence_length": 100,
|
| 48 |
+
"sequence_parallel_enabled": false,
|
| 49 |
+
"speculation_length": 0,
|
| 50 |
+
"start_rank_id": 0,
|
| 51 |
+
"target": null,
|
| 52 |
+
"torch_dtype": "bfloat16",
|
| 53 |
+
"tp_degree": 2,
|
| 54 |
+
"vocab_parallel": false
|
| 55 |
+
},
|
| 56 |
+
"num_attention_heads": 32,
|
| 57 |
+
"num_experts_per_tok": 2,
|
| 58 |
+
"num_hidden_layers": 2,
|
| 59 |
+
"num_key_value_heads": 8,
|
| 60 |
+
"num_local_experts": 8,
|
| 61 |
+
"output_router_logits": false,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_theta": 10000.0,
|
| 64 |
+
"router_aux_loss_coef": 0.001,
|
| 65 |
+
"router_jitter_noise": 0.0,
|
| 66 |
+
"sliding_window": 4096,
|
| 67 |
+
"tie_word_embeddings": false,
|
| 68 |
+
"use_cache": true,
|
| 69 |
+
"vocab_size": 32000
|
| 70 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/a87206b079f3f365cf93.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MixtralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"head_dim": 32,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3584,
|
| 14 |
+
"max_position_embeddings": 1024,
|
| 15 |
+
"model_type": "mixtral",
|
| 16 |
+
"neuron": {
|
| 17 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 18 |
+
"async_mode": false,
|
| 19 |
+
"attn_kernel_enabled": false,
|
| 20 |
+
"batch_size": 2,
|
| 21 |
+
"capacity_factor": null,
|
| 22 |
+
"cc_pipeline_tiling_factor": 2,
|
| 23 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
| 24 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"enable_bucketing": false,
|
| 27 |
+
"ep_degree": 1,
|
| 28 |
+
"flash_decoding_enabled": false,
|
| 29 |
+
"fused_qkv": false,
|
| 30 |
+
"glu_mlp": true,
|
| 31 |
+
"is_chunked_prefill": false,
|
| 32 |
+
"local_ranks_size": 2,
|
| 33 |
+
"logical_nc_config": 1,
|
| 34 |
+
"max_batch_size": 2,
|
| 35 |
+
"max_context_length": 100,
|
| 36 |
+
"max_topk": 256,
|
| 37 |
+
"mlp_kernel_enabled": false,
|
| 38 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 39 |
+
"n_active_tokens": 100,
|
| 40 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 41 |
+
"num_cores_per_group": 1,
|
| 42 |
+
"on_device_sampling": false,
|
| 43 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 44 |
+
"output_logits": false,
|
| 45 |
+
"pp_degree": 1,
|
| 46 |
+
"qkv_kernel_enabled": false,
|
| 47 |
+
"sequence_length": 100,
|
| 48 |
+
"sequence_parallel_enabled": false,
|
| 49 |
+
"speculation_length": 0,
|
| 50 |
+
"start_rank_id": 0,
|
| 51 |
+
"target": null,
|
| 52 |
+
"torch_dtype": "float16",
|
| 53 |
+
"tp_degree": 2,
|
| 54 |
+
"vocab_parallel": false
|
| 55 |
+
},
|
| 56 |
+
"num_attention_heads": 32,
|
| 57 |
+
"num_experts_per_tok": 2,
|
| 58 |
+
"num_hidden_layers": 2,
|
| 59 |
+
"num_key_value_heads": 8,
|
| 60 |
+
"num_local_experts": 8,
|
| 61 |
+
"output_router_logits": false,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_theta": 10000.0,
|
| 64 |
+
"router_aux_loss_coef": 0.001,
|
| 65 |
+
"router_jitter_noise": 0.0,
|
| 66 |
+
"sliding_window": 4096,
|
| 67 |
+
"tie_word_embeddings": false,
|
| 68 |
+
"use_cache": true,
|
| 69 |
+
"vocab_size": 32000
|
| 70 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/mixtral/dacorvo/Mixtral-tiny/ba9ad122c306f79a7f90.json
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MixtralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"head_dim": 32,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3584,
|
| 14 |
+
"max_position_embeddings": 1024,
|
| 15 |
+
"model_type": "mixtral",
|
| 16 |
+
"neuron": {
|
| 17 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 18 |
+
"async_mode": false,
|
| 19 |
+
"attn_kernel_enabled": false,
|
| 20 |
+
"batch_size": 1,
|
| 21 |
+
"capacity_factor": null,
|
| 22 |
+
"cc_pipeline_tiling_factor": 2,
|
| 23 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
| 24 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"enable_bucketing": false,
|
| 27 |
+
"ep_degree": 1,
|
| 28 |
+
"flash_decoding_enabled": false,
|
| 29 |
+
"fused_qkv": false,
|
| 30 |
+
"glu_mlp": true,
|
| 31 |
+
"is_chunked_prefill": false,
|
| 32 |
+
"local_ranks_size": 2,
|
| 33 |
+
"logical_nc_config": 1,
|
| 34 |
+
"max_batch_size": 1,
|
| 35 |
+
"max_context_length": 100,
|
| 36 |
+
"max_topk": 256,
|
| 37 |
+
"mlp_kernel_enabled": false,
|
| 38 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 39 |
+
"n_active_tokens": 100,
|
| 40 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 41 |
+
"num_cores_per_group": 1,
|
| 42 |
+
"on_device_sampling": false,
|
| 43 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 44 |
+
"output_logits": false,
|
| 45 |
+
"pp_degree": 1,
|
| 46 |
+
"qkv_kernel_enabled": false,
|
| 47 |
+
"sequence_length": 100,
|
| 48 |
+
"sequence_parallel_enabled": false,
|
| 49 |
+
"speculation_length": 0,
|
| 50 |
+
"start_rank_id": 0,
|
| 51 |
+
"target": null,
|
| 52 |
+
"torch_dtype": "float16",
|
| 53 |
+
"tp_degree": 2,
|
| 54 |
+
"vocab_parallel": false
|
| 55 |
+
},
|
| 56 |
+
"num_attention_heads": 32,
|
| 57 |
+
"num_experts_per_tok": 2,
|
| 58 |
+
"num_hidden_layers": 2,
|
| 59 |
+
"num_key_value_heads": 8,
|
| 60 |
+
"num_local_experts": 8,
|
| 61 |
+
"output_router_logits": false,
|
| 62 |
+
"rms_norm_eps": 1e-05,
|
| 63 |
+
"rope_theta": 10000.0,
|
| 64 |
+
"router_aux_loss_coef": 0.001,
|
| 65 |
+
"router_jitter_noise": 0.0,
|
| 66 |
+
"sliding_window": 4096,
|
| 67 |
+
"tie_word_embeddings": false,
|
| 68 |
+
"use_cache": true,
|
| 69 |
+
"vocab_size": 32000
|
| 70 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/308e16ab3ee4911957e7.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Phi3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"auto_map": {},
|
| 11 |
+
"embd_pdrop": 0.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 32,
|
| 16 |
+
"max_position_embeddings": 16384,
|
| 17 |
+
"model_type": "phi3",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
| 26 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 1,
|
| 37 |
+
"max_context_length": 100,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 100,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 100,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 0,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "bfloat16",
|
| 55 |
+
"tp_degree": 2,
|
| 56 |
+
"vocab_parallel": false
|
| 57 |
+
},
|
| 58 |
+
"num_attention_heads": 2,
|
| 59 |
+
"num_hidden_layers": 2,
|
| 60 |
+
"num_key_value_heads": 1,
|
| 61 |
+
"original_max_position_embeddings": 16384,
|
| 62 |
+
"partial_rotary_factor": 1.0,
|
| 63 |
+
"resid_pdrop": 0.0,
|
| 64 |
+
"rms_norm_eps": 1e-05,
|
| 65 |
+
"rope_scaling": null,
|
| 66 |
+
"rope_theta": 250000,
|
| 67 |
+
"sliding_window": null,
|
| 68 |
+
"tie_word_embeddings": false,
|
| 69 |
+
"use_cache": true,
|
| 70 |
+
"vocab_size": 100352
|
| 71 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/38df0e7247208af80c7d.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Phi3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"auto_map": {},
|
| 11 |
+
"embd_pdrop": 0.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 32,
|
| 16 |
+
"max_position_embeddings": 16384,
|
| 17 |
+
"model_type": "phi3",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
| 26 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 1,
|
| 37 |
+
"max_context_length": 100,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 100,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 100,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 0,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "float16",
|
| 55 |
+
"tp_degree": 2,
|
| 56 |
+
"vocab_parallel": false
|
| 57 |
+
},
|
| 58 |
+
"num_attention_heads": 2,
|
| 59 |
+
"num_hidden_layers": 2,
|
| 60 |
+
"num_key_value_heads": 1,
|
| 61 |
+
"original_max_position_embeddings": 16384,
|
| 62 |
+
"partial_rotary_factor": 1.0,
|
| 63 |
+
"resid_pdrop": 0.0,
|
| 64 |
+
"rms_norm_eps": 1e-05,
|
| 65 |
+
"rope_scaling": null,
|
| 66 |
+
"rope_theta": 250000,
|
| 67 |
+
"sliding_window": null,
|
| 68 |
+
"tie_word_embeddings": false,
|
| 69 |
+
"use_cache": true,
|
| 70 |
+
"vocab_size": 100352
|
| 71 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/phi3/yujiepan/phi-4-tiny-random/f190d1b1acf6d9624f9c.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/phi-4-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Phi3ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"auto_map": {},
|
| 11 |
+
"embd_pdrop": 0.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 16,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 32,
|
| 16 |
+
"max_position_embeddings": 16384,
|
| 17 |
+
"model_type": "phi3",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 2,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "yujiepan/phi-4-tiny-random",
|
| 26 |
+
"checkpoint_revision": "18a9a1168dc97ac6d128f811925670c275610f5a",
|
| 27 |
+
"continuous_batching": true,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 2,
|
| 37 |
+
"max_context_length": 100,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"mlp_kernel_enabled": false,
|
| 40 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 41 |
+
"n_active_tokens": 100,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 100,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 0,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "float16",
|
| 55 |
+
"tp_degree": 2,
|
| 56 |
+
"vocab_parallel": false
|
| 57 |
+
},
|
| 58 |
+
"num_attention_heads": 2,
|
| 59 |
+
"num_hidden_layers": 2,
|
| 60 |
+
"num_key_value_heads": 1,
|
| 61 |
+
"original_max_position_embeddings": 16384,
|
| 62 |
+
"partial_rotary_factor": 1.0,
|
| 63 |
+
"resid_pdrop": 0.0,
|
| 64 |
+
"rms_norm_eps": 1e-05,
|
| 65 |
+
"rope_scaling": null,
|
| 66 |
+
"rope_theta": 250000,
|
| 67 |
+
"sliding_window": null,
|
| 68 |
+
"tie_word_embeddings": false,
|
| 69 |
+
"use_cache": true,
|
| 70 |
+
"vocab_size": 100352
|
| 71 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/Qwen/Qwen2.5-0.5B/51d387c8436423c80830.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen2.5-0.5B",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen2ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"hidden_act": "silu",
|
| 10 |
+
"hidden_size": 896,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 4864,
|
| 13 |
+
"layer_types": [
|
| 14 |
+
"full_attention",
|
| 15 |
+
"full_attention",
|
| 16 |
+
"full_attention",
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention"
|
| 38 |
+
],
|
| 39 |
+
"max_position_embeddings": 32768,
|
| 40 |
+
"max_window_layers": 24,
|
| 41 |
+
"model_type": "qwen2",
|
| 42 |
+
"neuron": {
|
| 43 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 44 |
+
"async_mode": false,
|
| 45 |
+
"attn_kernel_enabled": false,
|
| 46 |
+
"batch_size": 1,
|
| 47 |
+
"capacity_factor": null,
|
| 48 |
+
"cc_pipeline_tiling_factor": 2,
|
| 49 |
+
"checkpoint_id": "Qwen/Qwen2.5-0.5B",
|
| 50 |
+
"checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
|
| 51 |
+
"continuous_batching": false,
|
| 52 |
+
"enable_bucketing": false,
|
| 53 |
+
"ep_degree": 1,
|
| 54 |
+
"flash_decoding_enabled": false,
|
| 55 |
+
"fused_qkv": false,
|
| 56 |
+
"glu_mlp": true,
|
| 57 |
+
"is_chunked_prefill": false,
|
| 58 |
+
"local_ranks_size": 2,
|
| 59 |
+
"logical_nc_config": 1,
|
| 60 |
+
"max_batch_size": 1,
|
| 61 |
+
"max_context_length": 32768,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"mlp_kernel_enabled": false,
|
| 64 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 65 |
+
"n_active_tokens": 32768,
|
| 66 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 67 |
+
"num_cores_per_group": 1,
|
| 68 |
+
"on_device_sampling": true,
|
| 69 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 70 |
+
"output_logits": false,
|
| 71 |
+
"pp_degree": 1,
|
| 72 |
+
"qkv_kernel_enabled": false,
|
| 73 |
+
"sequence_length": 32768,
|
| 74 |
+
"sequence_parallel_enabled": false,
|
| 75 |
+
"speculation_length": 0,
|
| 76 |
+
"start_rank_id": 0,
|
| 77 |
+
"target": null,
|
| 78 |
+
"torch_dtype": "bfloat16",
|
| 79 |
+
"tp_degree": 2,
|
| 80 |
+
"vocab_parallel": false
|
| 81 |
+
},
|
| 82 |
+
"num_attention_heads": 14,
|
| 83 |
+
"num_hidden_layers": 24,
|
| 84 |
+
"num_key_value_heads": 2,
|
| 85 |
+
"rms_norm_eps": 1e-06,
|
| 86 |
+
"rope_scaling": null,
|
| 87 |
+
"rope_theta": 1000000.0,
|
| 88 |
+
"sliding_window": null,
|
| 89 |
+
"tie_word_embeddings": true,
|
| 90 |
+
"use_cache": true,
|
| 91 |
+
"use_mrope": false,
|
| 92 |
+
"use_sliding_window": false,
|
| 93 |
+
"vocab_size": 151936
|
| 94 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/Qwen/Qwen2.5-0.5B/edef092decee45a6b77d.json
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "Qwen/Qwen2.5-0.5B",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen2ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"hidden_act": "silu",
|
| 10 |
+
"hidden_size": 896,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 4864,
|
| 13 |
+
"layer_types": [
|
| 14 |
+
"full_attention",
|
| 15 |
+
"full_attention",
|
| 16 |
+
"full_attention",
|
| 17 |
+
"full_attention",
|
| 18 |
+
"full_attention",
|
| 19 |
+
"full_attention",
|
| 20 |
+
"full_attention",
|
| 21 |
+
"full_attention",
|
| 22 |
+
"full_attention",
|
| 23 |
+
"full_attention",
|
| 24 |
+
"full_attention",
|
| 25 |
+
"full_attention",
|
| 26 |
+
"full_attention",
|
| 27 |
+
"full_attention",
|
| 28 |
+
"full_attention",
|
| 29 |
+
"full_attention",
|
| 30 |
+
"full_attention",
|
| 31 |
+
"full_attention",
|
| 32 |
+
"full_attention",
|
| 33 |
+
"full_attention",
|
| 34 |
+
"full_attention",
|
| 35 |
+
"full_attention",
|
| 36 |
+
"full_attention",
|
| 37 |
+
"full_attention"
|
| 38 |
+
],
|
| 39 |
+
"max_position_embeddings": 32768,
|
| 40 |
+
"max_window_layers": 24,
|
| 41 |
+
"model_type": "qwen2",
|
| 42 |
+
"neuron": {
|
| 43 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 44 |
+
"async_mode": false,
|
| 45 |
+
"attn_kernel_enabled": false,
|
| 46 |
+
"batch_size": 4,
|
| 47 |
+
"capacity_factor": null,
|
| 48 |
+
"cc_pipeline_tiling_factor": 2,
|
| 49 |
+
"checkpoint_id": "Qwen/Qwen2.5-0.5B",
|
| 50 |
+
"checkpoint_revision": "060db6499f32faf8b98477b0a26969ef7d8b9987",
|
| 51 |
+
"continuous_batching": true,
|
| 52 |
+
"enable_bucketing": false,
|
| 53 |
+
"ep_degree": 1,
|
| 54 |
+
"flash_decoding_enabled": false,
|
| 55 |
+
"fused_qkv": false,
|
| 56 |
+
"glu_mlp": true,
|
| 57 |
+
"is_chunked_prefill": false,
|
| 58 |
+
"local_ranks_size": 2,
|
| 59 |
+
"logical_nc_config": 1,
|
| 60 |
+
"max_batch_size": 4,
|
| 61 |
+
"max_context_length": 4096,
|
| 62 |
+
"max_topk": 256,
|
| 63 |
+
"mlp_kernel_enabled": false,
|
| 64 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 65 |
+
"n_active_tokens": 4096,
|
| 66 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 67 |
+
"num_cores_per_group": 1,
|
| 68 |
+
"on_device_sampling": false,
|
| 69 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 70 |
+
"output_logits": false,
|
| 71 |
+
"pp_degree": 1,
|
| 72 |
+
"qkv_kernel_enabled": false,
|
| 73 |
+
"sequence_length": 4096,
|
| 74 |
+
"sequence_parallel_enabled": false,
|
| 75 |
+
"speculation_length": 0,
|
| 76 |
+
"start_rank_id": 0,
|
| 77 |
+
"target": null,
|
| 78 |
+
"torch_dtype": "float16",
|
| 79 |
+
"tp_degree": 2,
|
| 80 |
+
"vocab_parallel": false
|
| 81 |
+
},
|
| 82 |
+
"num_attention_heads": 14,
|
| 83 |
+
"num_hidden_layers": 24,
|
| 84 |
+
"num_key_value_heads": 2,
|
| 85 |
+
"rms_norm_eps": 1e-06,
|
| 86 |
+
"rope_scaling": null,
|
| 87 |
+
"rope_theta": 1000000.0,
|
| 88 |
+
"sliding_window": null,
|
| 89 |
+
"tie_word_embeddings": true,
|
| 90 |
+
"use_cache": true,
|
| 91 |
+
"use_mrope": false,
|
| 92 |
+
"use_sliding_window": false,
|
| 93 |
+
"vocab_size": 151936
|
| 94 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/68fbb28f6340b2d7ca3b.json
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/qwen2.5-128k-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen2ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"hidden_act": "silu",
|
| 10 |
+
"hidden_size": 8,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 16,
|
| 13 |
+
"layer_types": [
|
| 14 |
+
"full_attention",
|
| 15 |
+
"full_attention"
|
| 16 |
+
],
|
| 17 |
+
"max_position_embeddings": 32768,
|
| 18 |
+
"max_window_layers": 1,
|
| 19 |
+
"model_type": "qwen2",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"async_mode": false,
|
| 23 |
+
"attn_kernel_enabled": false,
|
| 24 |
+
"batch_size": 2,
|
| 25 |
+
"capacity_factor": null,
|
| 26 |
+
"cc_pipeline_tiling_factor": 2,
|
| 27 |
+
"checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
|
| 28 |
+
"checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
|
| 29 |
+
"continuous_batching": true,
|
| 30 |
+
"enable_bucketing": false,
|
| 31 |
+
"ep_degree": 1,
|
| 32 |
+
"flash_decoding_enabled": false,
|
| 33 |
+
"fused_qkv": false,
|
| 34 |
+
"glu_mlp": true,
|
| 35 |
+
"is_chunked_prefill": false,
|
| 36 |
+
"local_ranks_size": 2,
|
| 37 |
+
"logical_nc_config": 1,
|
| 38 |
+
"max_batch_size": 2,
|
| 39 |
+
"max_context_length": 100,
|
| 40 |
+
"max_topk": 256,
|
| 41 |
+
"mlp_kernel_enabled": false,
|
| 42 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 43 |
+
"n_active_tokens": 100,
|
| 44 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 45 |
+
"num_cores_per_group": 1,
|
| 46 |
+
"on_device_sampling": false,
|
| 47 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 48 |
+
"output_logits": false,
|
| 49 |
+
"pp_degree": 1,
|
| 50 |
+
"qkv_kernel_enabled": false,
|
| 51 |
+
"sequence_length": 100,
|
| 52 |
+
"sequence_parallel_enabled": false,
|
| 53 |
+
"speculation_length": 0,
|
| 54 |
+
"start_rank_id": 0,
|
| 55 |
+
"target": null,
|
| 56 |
+
"torch_dtype": "float16",
|
| 57 |
+
"tp_degree": 2,
|
| 58 |
+
"vocab_parallel": false
|
| 59 |
+
},
|
| 60 |
+
"num_attention_heads": 4,
|
| 61 |
+
"num_hidden_layers": 2,
|
| 62 |
+
"num_key_value_heads": 2,
|
| 63 |
+
"rms_norm_eps": 1e-06,
|
| 64 |
+
"rope_scaling": {
|
| 65 |
+
"factor": 4.0,
|
| 66 |
+
"original_max_position_embeddings": 32768,
|
| 67 |
+
"rope_type": "yarn",
|
| 68 |
+
"type": "yarn"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 1000000.0,
|
| 71 |
+
"sliding_window": null,
|
| 72 |
+
"tie_word_embeddings": false,
|
| 73 |
+
"use_cache": true,
|
| 74 |
+
"use_sliding_window": false,
|
| 75 |
+
"vocab_size": 152064
|
| 76 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/691934144efc9536a94c.json
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/qwen2.5-128k-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen2ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"hidden_act": "silu",
|
| 10 |
+
"hidden_size": 8,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 16,
|
| 13 |
+
"layer_types": [
|
| 14 |
+
"full_attention",
|
| 15 |
+
"full_attention"
|
| 16 |
+
],
|
| 17 |
+
"max_position_embeddings": 32768,
|
| 18 |
+
"max_window_layers": 1,
|
| 19 |
+
"model_type": "qwen2",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"async_mode": false,
|
| 23 |
+
"attn_kernel_enabled": false,
|
| 24 |
+
"batch_size": 1,
|
| 25 |
+
"capacity_factor": null,
|
| 26 |
+
"cc_pipeline_tiling_factor": 2,
|
| 27 |
+
"checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
|
| 28 |
+
"checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
|
| 29 |
+
"continuous_batching": false,
|
| 30 |
+
"enable_bucketing": false,
|
| 31 |
+
"ep_degree": 1,
|
| 32 |
+
"flash_decoding_enabled": false,
|
| 33 |
+
"fused_qkv": false,
|
| 34 |
+
"glu_mlp": true,
|
| 35 |
+
"is_chunked_prefill": false,
|
| 36 |
+
"local_ranks_size": 2,
|
| 37 |
+
"logical_nc_config": 1,
|
| 38 |
+
"max_batch_size": 1,
|
| 39 |
+
"max_context_length": 100,
|
| 40 |
+
"max_topk": 256,
|
| 41 |
+
"mlp_kernel_enabled": false,
|
| 42 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 43 |
+
"n_active_tokens": 100,
|
| 44 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 45 |
+
"num_cores_per_group": 1,
|
| 46 |
+
"on_device_sampling": true,
|
| 47 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 48 |
+
"output_logits": false,
|
| 49 |
+
"pp_degree": 1,
|
| 50 |
+
"qkv_kernel_enabled": false,
|
| 51 |
+
"sequence_length": 100,
|
| 52 |
+
"sequence_parallel_enabled": false,
|
| 53 |
+
"speculation_length": 0,
|
| 54 |
+
"start_rank_id": 0,
|
| 55 |
+
"target": null,
|
| 56 |
+
"torch_dtype": "float16",
|
| 57 |
+
"tp_degree": 2,
|
| 58 |
+
"vocab_parallel": false
|
| 59 |
+
},
|
| 60 |
+
"num_attention_heads": 4,
|
| 61 |
+
"num_hidden_layers": 2,
|
| 62 |
+
"num_key_value_heads": 2,
|
| 63 |
+
"rms_norm_eps": 1e-06,
|
| 64 |
+
"rope_scaling": {
|
| 65 |
+
"factor": 4.0,
|
| 66 |
+
"original_max_position_embeddings": 32768,
|
| 67 |
+
"rope_type": "yarn",
|
| 68 |
+
"type": "yarn"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 1000000.0,
|
| 71 |
+
"sliding_window": null,
|
| 72 |
+
"tie_word_embeddings": false,
|
| 73 |
+
"use_cache": true,
|
| 74 |
+
"use_sliding_window": false,
|
| 75 |
+
"vocab_size": 152064
|
| 76 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen2/yujiepan/qwen2.5-128k-tiny-random/945a35693afe97130cb5.json
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "yujiepan/qwen2.5-128k-tiny-random",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen2ForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"hidden_act": "silu",
|
| 10 |
+
"hidden_size": 8,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"intermediate_size": 16,
|
| 13 |
+
"layer_types": [
|
| 14 |
+
"full_attention",
|
| 15 |
+
"full_attention"
|
| 16 |
+
],
|
| 17 |
+
"max_position_embeddings": 32768,
|
| 18 |
+
"max_window_layers": 1,
|
| 19 |
+
"model_type": "qwen2",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"async_mode": false,
|
| 23 |
+
"attn_kernel_enabled": false,
|
| 24 |
+
"batch_size": 1,
|
| 25 |
+
"capacity_factor": null,
|
| 26 |
+
"cc_pipeline_tiling_factor": 2,
|
| 27 |
+
"checkpoint_id": "yujiepan/qwen2.5-128k-tiny-random",
|
| 28 |
+
"checkpoint_revision": "c8296d4ca3f87782876d2382fbb6481d1beb8ef0",
|
| 29 |
+
"continuous_batching": false,
|
| 30 |
+
"enable_bucketing": false,
|
| 31 |
+
"ep_degree": 1,
|
| 32 |
+
"flash_decoding_enabled": false,
|
| 33 |
+
"fused_qkv": false,
|
| 34 |
+
"glu_mlp": true,
|
| 35 |
+
"is_chunked_prefill": false,
|
| 36 |
+
"local_ranks_size": 2,
|
| 37 |
+
"logical_nc_config": 1,
|
| 38 |
+
"max_batch_size": 1,
|
| 39 |
+
"max_context_length": 100,
|
| 40 |
+
"max_topk": 256,
|
| 41 |
+
"mlp_kernel_enabled": false,
|
| 42 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 43 |
+
"n_active_tokens": 100,
|
| 44 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 45 |
+
"num_cores_per_group": 1,
|
| 46 |
+
"on_device_sampling": true,
|
| 47 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 48 |
+
"output_logits": false,
|
| 49 |
+
"pp_degree": 1,
|
| 50 |
+
"qkv_kernel_enabled": false,
|
| 51 |
+
"sequence_length": 100,
|
| 52 |
+
"sequence_parallel_enabled": false,
|
| 53 |
+
"speculation_length": 0,
|
| 54 |
+
"start_rank_id": 0,
|
| 55 |
+
"target": null,
|
| 56 |
+
"torch_dtype": "bfloat16",
|
| 57 |
+
"tp_degree": 2,
|
| 58 |
+
"vocab_parallel": false
|
| 59 |
+
},
|
| 60 |
+
"num_attention_heads": 4,
|
| 61 |
+
"num_hidden_layers": 2,
|
| 62 |
+
"num_key_value_heads": 2,
|
| 63 |
+
"rms_norm_eps": 1e-06,
|
| 64 |
+
"rope_scaling": {
|
| 65 |
+
"factor": 4.0,
|
| 66 |
+
"original_max_position_embeddings": 32768,
|
| 67 |
+
"rope_type": "yarn",
|
| 68 |
+
"type": "yarn"
|
| 69 |
+
},
|
| 70 |
+
"rope_theta": 1000000.0,
|
| 71 |
+
"sliding_window": null,
|
| 72 |
+
"tie_word_embeddings": false,
|
| 73 |
+
"use_cache": true,
|
| 74 |
+
"use_sliding_window": false,
|
| 75 |
+
"vocab_size": 152064
|
| 76 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/8ca6f1dd90ea7ad4fb33.json
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3MoeForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"decoder_sparse_step": 2,
|
| 11 |
+
"head_dim": 32,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 64,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 128,
|
| 16 |
+
"max_position_embeddings": 40960,
|
| 17 |
+
"max_window_layers": 1,
|
| 18 |
+
"mlp_only_layers": [],
|
| 19 |
+
"model_type": "qwen3_moe",
|
| 20 |
+
"moe_intermediate_size": 128,
|
| 21 |
+
"neuron": {
|
| 22 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 23 |
+
"async_mode": false,
|
| 24 |
+
"attn_kernel_enabled": false,
|
| 25 |
+
"batch_size": 2,
|
| 26 |
+
"capacity_factor": null,
|
| 27 |
+
"cc_pipeline_tiling_factor": 2,
|
| 28 |
+
"checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
| 29 |
+
"checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
|
| 30 |
+
"continuous_batching": false,
|
| 31 |
+
"enable_bucketing": false,
|
| 32 |
+
"ep_degree": 1,
|
| 33 |
+
"flash_decoding_enabled": false,
|
| 34 |
+
"fused_qkv": false,
|
| 35 |
+
"glu_mlp": true,
|
| 36 |
+
"is_chunked_prefill": false,
|
| 37 |
+
"local_ranks_size": 2,
|
| 38 |
+
"logical_nc_config": 1,
|
| 39 |
+
"max_batch_size": 2,
|
| 40 |
+
"max_context_length": 100,
|
| 41 |
+
"max_topk": 256,
|
| 42 |
+
"mlp_kernel_enabled": false,
|
| 43 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 44 |
+
"n_active_tokens": 100,
|
| 45 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 46 |
+
"num_cores_per_group": 1,
|
| 47 |
+
"on_device_sampling": false,
|
| 48 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 49 |
+
"output_logits": false,
|
| 50 |
+
"pp_degree": 1,
|
| 51 |
+
"qkv_kernel_enabled": false,
|
| 52 |
+
"sequence_length": 100,
|
| 53 |
+
"sequence_parallel_enabled": false,
|
| 54 |
+
"speculation_length": 0,
|
| 55 |
+
"start_rank_id": 0,
|
| 56 |
+
"target": null,
|
| 57 |
+
"torch_dtype": "float16",
|
| 58 |
+
"tp_degree": 2,
|
| 59 |
+
"vocab_parallel": false
|
| 60 |
+
},
|
| 61 |
+
"norm_topk_prob": true,
|
| 62 |
+
"num_attention_heads": 2,
|
| 63 |
+
"num_experts": 8,
|
| 64 |
+
"num_experts_per_tok": 2,
|
| 65 |
+
"num_hidden_layers": 2,
|
| 66 |
+
"num_key_value_heads": 1,
|
| 67 |
+
"output_router_logits": false,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": null,
|
| 70 |
+
"rope_theta": 1000000.0,
|
| 71 |
+
"router_aux_loss_coef": 0.001,
|
| 72 |
+
"sliding_window": null,
|
| 73 |
+
"tie_word_embeddings": true,
|
| 74 |
+
"use_cache": true,
|
| 75 |
+
"use_sliding_window": false,
|
| 76 |
+
"vocab_size": 151936
|
| 77 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/bb735c1f623df03e8d42.json
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3MoeForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"decoder_sparse_step": 2,
|
| 11 |
+
"head_dim": 32,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 64,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 128,
|
| 16 |
+
"max_position_embeddings": 40960,
|
| 17 |
+
"max_window_layers": 1,
|
| 18 |
+
"mlp_only_layers": [],
|
| 19 |
+
"model_type": "qwen3_moe",
|
| 20 |
+
"moe_intermediate_size": 128,
|
| 21 |
+
"neuron": {
|
| 22 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 23 |
+
"async_mode": false,
|
| 24 |
+
"attn_kernel_enabled": false,
|
| 25 |
+
"batch_size": 1,
|
| 26 |
+
"capacity_factor": null,
|
| 27 |
+
"cc_pipeline_tiling_factor": 2,
|
| 28 |
+
"checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
| 29 |
+
"checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
|
| 30 |
+
"continuous_batching": false,
|
| 31 |
+
"enable_bucketing": false,
|
| 32 |
+
"ep_degree": 1,
|
| 33 |
+
"flash_decoding_enabled": false,
|
| 34 |
+
"fused_qkv": false,
|
| 35 |
+
"glu_mlp": true,
|
| 36 |
+
"is_chunked_prefill": false,
|
| 37 |
+
"local_ranks_size": 2,
|
| 38 |
+
"logical_nc_config": 1,
|
| 39 |
+
"max_batch_size": 1,
|
| 40 |
+
"max_context_length": 100,
|
| 41 |
+
"max_topk": 256,
|
| 42 |
+
"mlp_kernel_enabled": false,
|
| 43 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 44 |
+
"n_active_tokens": 100,
|
| 45 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 46 |
+
"num_cores_per_group": 1,
|
| 47 |
+
"on_device_sampling": false,
|
| 48 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 49 |
+
"output_logits": false,
|
| 50 |
+
"pp_degree": 1,
|
| 51 |
+
"qkv_kernel_enabled": false,
|
| 52 |
+
"sequence_length": 100,
|
| 53 |
+
"sequence_parallel_enabled": false,
|
| 54 |
+
"speculation_length": 0,
|
| 55 |
+
"start_rank_id": 0,
|
| 56 |
+
"target": null,
|
| 57 |
+
"torch_dtype": "float16",
|
| 58 |
+
"tp_degree": 2,
|
| 59 |
+
"vocab_parallel": false
|
| 60 |
+
},
|
| 61 |
+
"norm_topk_prob": true,
|
| 62 |
+
"num_attention_heads": 2,
|
| 63 |
+
"num_experts": 8,
|
| 64 |
+
"num_experts_per_tok": 2,
|
| 65 |
+
"num_hidden_layers": 2,
|
| 66 |
+
"num_key_value_heads": 1,
|
| 67 |
+
"output_router_logits": false,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": null,
|
| 70 |
+
"rope_theta": 1000000.0,
|
| 71 |
+
"router_aux_loss_coef": 0.001,
|
| 72 |
+
"sliding_window": null,
|
| 73 |
+
"tie_word_embeddings": true,
|
| 74 |
+
"use_cache": true,
|
| 75 |
+
"use_sliding_window": false,
|
| 76 |
+
"vocab_size": 151936
|
| 77 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev4/qwen3_moe/optimum-internal-testing/tiny-random-qwen3_moe/eeeb1f6469ea444de4d6.json
ADDED
|
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"Qwen3MoeForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"decoder_sparse_step": 2,
|
| 11 |
+
"head_dim": 32,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 64,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 128,
|
| 16 |
+
"max_position_embeddings": 40960,
|
| 17 |
+
"max_window_layers": 1,
|
| 18 |
+
"mlp_only_layers": [],
|
| 19 |
+
"model_type": "qwen3_moe",
|
| 20 |
+
"moe_intermediate_size": 128,
|
| 21 |
+
"neuron": {
|
| 22 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 23 |
+
"async_mode": false,
|
| 24 |
+
"attn_kernel_enabled": false,
|
| 25 |
+
"batch_size": 1,
|
| 26 |
+
"capacity_factor": null,
|
| 27 |
+
"cc_pipeline_tiling_factor": 2,
|
| 28 |
+
"checkpoint_id": "optimum-internal-testing/tiny-random-qwen3_moe",
|
| 29 |
+
"checkpoint_revision": "e0230be2839556b44b7400a233c73c74b4abb7af",
|
| 30 |
+
"continuous_batching": false,
|
| 31 |
+
"enable_bucketing": false,
|
| 32 |
+
"ep_degree": 1,
|
| 33 |
+
"flash_decoding_enabled": false,
|
| 34 |
+
"fused_qkv": false,
|
| 35 |
+
"glu_mlp": true,
|
| 36 |
+
"is_chunked_prefill": false,
|
| 37 |
+
"local_ranks_size": 2,
|
| 38 |
+
"logical_nc_config": 1,
|
| 39 |
+
"max_batch_size": 1,
|
| 40 |
+
"max_context_length": 100,
|
| 41 |
+
"max_topk": 256,
|
| 42 |
+
"mlp_kernel_enabled": false,
|
| 43 |
+
"mlp_kernel_fuse_residual_add": false,
|
| 44 |
+
"n_active_tokens": 100,
|
| 45 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 46 |
+
"num_cores_per_group": 1,
|
| 47 |
+
"on_device_sampling": false,
|
| 48 |
+
"optimum_neuron_version": "0.3.1.dev4",
|
| 49 |
+
"output_logits": false,
|
| 50 |
+
"pp_degree": 1,
|
| 51 |
+
"qkv_kernel_enabled": false,
|
| 52 |
+
"sequence_length": 100,
|
| 53 |
+
"sequence_parallel_enabled": false,
|
| 54 |
+
"speculation_length": 0,
|
| 55 |
+
"start_rank_id": 0,
|
| 56 |
+
"target": null,
|
| 57 |
+
"torch_dtype": "bfloat16",
|
| 58 |
+
"tp_degree": 2,
|
| 59 |
+
"vocab_parallel": false
|
| 60 |
+
},
|
| 61 |
+
"norm_topk_prob": true,
|
| 62 |
+
"num_attention_heads": 2,
|
| 63 |
+
"num_experts": 8,
|
| 64 |
+
"num_experts_per_tok": 2,
|
| 65 |
+
"num_hidden_layers": 2,
|
| 66 |
+
"num_key_value_heads": 1,
|
| 67 |
+
"output_router_logits": false,
|
| 68 |
+
"rms_norm_eps": 1e-06,
|
| 69 |
+
"rope_scaling": null,
|
| 70 |
+
"rope_theta": 1000000.0,
|
| 71 |
+
"router_aux_loss_coef": 0.001,
|
| 72 |
+
"sliding_window": null,
|
| 73 |
+
"tie_word_embeddings": true,
|
| 74 |
+
"use_cache": true,
|
| 75 |
+
"use_sliding_window": false,
|
| 76 |
+
"vocab_size": 151936
|
| 77 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/0800231dd65c5c505814.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"attention_multiplier": 1.0,
|
| 11 |
+
"embedding_multiplier": 1.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"logits_scaling": 1.0,
|
| 17 |
+
"max_position_embeddings": 2048,
|
| 18 |
+
"mlp_bias": false,
|
| 19 |
+
"model_type": "granite",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"batch_size": 2,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 25 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
| 26 |
+
"continuous_batching": true,
|
| 27 |
+
"enable_bucketing": false,
|
| 28 |
+
"ep_degree": 1,
|
| 29 |
+
"fused_qkv": true,
|
| 30 |
+
"glu_mlp": true,
|
| 31 |
+
"local_ranks_size": 2,
|
| 32 |
+
"logical_nc_config": 1,
|
| 33 |
+
"max_batch_size": 2,
|
| 34 |
+
"max_context_length": 1024,
|
| 35 |
+
"max_topk": 256,
|
| 36 |
+
"n_active_tokens": 1024,
|
| 37 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 38 |
+
"on_device_sampling": true,
|
| 39 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 40 |
+
"output_logits": false,
|
| 41 |
+
"pp_degree": 1,
|
| 42 |
+
"sequence_length": 1024,
|
| 43 |
+
"speculation_length": 0,
|
| 44 |
+
"start_rank_id": 0,
|
| 45 |
+
"target": null,
|
| 46 |
+
"torch_dtype": "float16",
|
| 47 |
+
"tp_degree": 2
|
| 48 |
+
},
|
| 49 |
+
"num_attention_heads": 4,
|
| 50 |
+
"num_hidden_layers": 2,
|
| 51 |
+
"num_key_value_heads": 4,
|
| 52 |
+
"residual_multiplier": 1.0,
|
| 53 |
+
"rms_norm_eps": 1e-06,
|
| 54 |
+
"rope_scaling": null,
|
| 55 |
+
"rope_theta": 10000.0,
|
| 56 |
+
"tie_word_embeddings": false,
|
| 57 |
+
"use_cache": true,
|
| 58 |
+
"vocab_size": 49152
|
| 59 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/hf-internal-testing/tiny-random-GraniteForCausalLM/2ff2060437ec7c7a202a.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"attention_multiplier": 1.0,
|
| 11 |
+
"embedding_multiplier": 1.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 32,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 64,
|
| 16 |
+
"logits_scaling": 1.0,
|
| 17 |
+
"max_position_embeddings": 2048,
|
| 18 |
+
"mlp_bias": false,
|
| 19 |
+
"model_type": "granite",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"checkpoint_id": "hf-internal-testing/tiny-random-GraniteForCausalLM",
|
| 25 |
+
"checkpoint_revision": "c3074ebc0ac2fe545305f5e5f6cce2cc9b2aa0c5",
|
| 26 |
+
"continuous_batching": false,
|
| 27 |
+
"enable_bucketing": false,
|
| 28 |
+
"ep_degree": 1,
|
| 29 |
+
"fused_qkv": true,
|
| 30 |
+
"glu_mlp": true,
|
| 31 |
+
"local_ranks_size": 2,
|
| 32 |
+
"logical_nc_config": 1,
|
| 33 |
+
"max_batch_size": 1,
|
| 34 |
+
"max_context_length": 1024,
|
| 35 |
+
"max_topk": 256,
|
| 36 |
+
"n_active_tokens": 1024,
|
| 37 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 38 |
+
"on_device_sampling": true,
|
| 39 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 40 |
+
"output_logits": false,
|
| 41 |
+
"pp_degree": 1,
|
| 42 |
+
"sequence_length": 1024,
|
| 43 |
+
"speculation_length": 0,
|
| 44 |
+
"start_rank_id": 0,
|
| 45 |
+
"target": null,
|
| 46 |
+
"torch_dtype": "bfloat16",
|
| 47 |
+
"tp_degree": 2
|
| 48 |
+
},
|
| 49 |
+
"num_attention_heads": 4,
|
| 50 |
+
"num_hidden_layers": 2,
|
| 51 |
+
"num_key_value_heads": 4,
|
| 52 |
+
"residual_multiplier": 1.0,
|
| 53 |
+
"rms_norm_eps": 1e-06,
|
| 54 |
+
"rope_scaling": null,
|
| 55 |
+
"rope_theta": 10000.0,
|
| 56 |
+
"tie_word_embeddings": false,
|
| 57 |
+
"use_cache": true,
|
| 58 |
+
"vocab_size": 49152
|
| 59 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/0cfa36114f700208376a.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.1,
|
| 10 |
+
"attention_multiplier": 0.015625,
|
| 11 |
+
"embedding_multiplier": 12.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"logits_scaling": 8.0,
|
| 17 |
+
"max_position_embeddings": 131072,
|
| 18 |
+
"mlp_bias": false,
|
| 19 |
+
"model_type": "granite",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"async_mode": false,
|
| 23 |
+
"attn_kernel_enabled": false,
|
| 24 |
+
"batch_size": 4,
|
| 25 |
+
"capacity_factor": null,
|
| 26 |
+
"cc_pipeline_tiling_factor": 2,
|
| 27 |
+
"checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 28 |
+
"checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
|
| 29 |
+
"continuous_batching": true,
|
| 30 |
+
"enable_bucketing": false,
|
| 31 |
+
"ep_degree": 1,
|
| 32 |
+
"flash_decoding_enabled": false,
|
| 33 |
+
"fused_qkv": true,
|
| 34 |
+
"glu_mlp": true,
|
| 35 |
+
"is_chunked_prefill": false,
|
| 36 |
+
"local_ranks_size": 2,
|
| 37 |
+
"logical_nc_config": 1,
|
| 38 |
+
"max_batch_size": 4,
|
| 39 |
+
"max_context_length": 4096,
|
| 40 |
+
"max_topk": 256,
|
| 41 |
+
"n_active_tokens": 4096,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"sequence_length": 4096,
|
| 49 |
+
"sequence_parallel_enabled": false,
|
| 50 |
+
"speculation_length": 0,
|
| 51 |
+
"start_rank_id": 0,
|
| 52 |
+
"target": null,
|
| 53 |
+
"torch_dtype": "bfloat16",
|
| 54 |
+
"tp_degree": 2
|
| 55 |
+
},
|
| 56 |
+
"num_attention_heads": 32,
|
| 57 |
+
"num_hidden_layers": 40,
|
| 58 |
+
"num_key_value_heads": 8,
|
| 59 |
+
"residual_multiplier": 0.22,
|
| 60 |
+
"rms_norm_eps": 1e-05,
|
| 61 |
+
"rope_scaling": null,
|
| 62 |
+
"rope_theta": 5000000.0,
|
| 63 |
+
"tie_word_embeddings": true,
|
| 64 |
+
"use_cache": true,
|
| 65 |
+
"vocab_size": 49155
|
| 66 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/1144207432b18b97200f.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.1,
|
| 10 |
+
"attention_multiplier": 0.015625,
|
| 11 |
+
"embedding_multiplier": 12.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"logits_scaling": 8.0,
|
| 17 |
+
"max_position_embeddings": 131072,
|
| 18 |
+
"mlp_bias": false,
|
| 19 |
+
"model_type": "granite",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"attn_kernel_enabled": false,
|
| 23 |
+
"batch_size": 4,
|
| 24 |
+
"capacity_factor": null,
|
| 25 |
+
"cc_pipeline_tiling_factor": 2,
|
| 26 |
+
"checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 27 |
+
"checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
|
| 28 |
+
"continuous_batching": true,
|
| 29 |
+
"enable_bucketing": false,
|
| 30 |
+
"ep_degree": 1,
|
| 31 |
+
"flash_decoding_enabled": false,
|
| 32 |
+
"fused_qkv": true,
|
| 33 |
+
"glu_mlp": true,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 4,
|
| 37 |
+
"max_context_length": 4096,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"n_active_tokens": 4096,
|
| 40 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 41 |
+
"num_cores_per_group": 1,
|
| 42 |
+
"on_device_sampling": true,
|
| 43 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 44 |
+
"output_logits": false,
|
| 45 |
+
"pp_degree": 1,
|
| 46 |
+
"sequence_length": 4096,
|
| 47 |
+
"speculation_length": 0,
|
| 48 |
+
"start_rank_id": 0,
|
| 49 |
+
"target": null,
|
| 50 |
+
"torch_dtype": "bfloat16",
|
| 51 |
+
"tp_degree": 2
|
| 52 |
+
},
|
| 53 |
+
"num_attention_heads": 32,
|
| 54 |
+
"num_hidden_layers": 40,
|
| 55 |
+
"num_key_value_heads": 8,
|
| 56 |
+
"residual_multiplier": 0.22,
|
| 57 |
+
"rms_norm_eps": 1e-05,
|
| 58 |
+
"rope_scaling": null,
|
| 59 |
+
"rope_theta": 5000000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 49155
|
| 63 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/132e478ab06dfd6f996e.json
ADDED
|
@@ -0,0 +1,60 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.1,
|
| 10 |
+
"attention_multiplier": 0.015625,
|
| 11 |
+
"embedding_multiplier": 12.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"logits_scaling": 8.0,
|
| 17 |
+
"max_position_embeddings": 131072,
|
| 18 |
+
"mlp_bias": false,
|
| 19 |
+
"model_type": "granite",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"batch_size": 4,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 26 |
+
"checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
|
| 27 |
+
"continuous_batching": true,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"fused_qkv": true,
|
| 31 |
+
"glu_mlp": true,
|
| 32 |
+
"local_ranks_size": 2,
|
| 33 |
+
"logical_nc_config": 1,
|
| 34 |
+
"max_batch_size": 4,
|
| 35 |
+
"max_context_length": 4096,
|
| 36 |
+
"max_topk": 256,
|
| 37 |
+
"n_active_tokens": 4096,
|
| 38 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 39 |
+
"on_device_sampling": true,
|
| 40 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 41 |
+
"output_logits": false,
|
| 42 |
+
"pp_degree": 1,
|
| 43 |
+
"sequence_length": 4096,
|
| 44 |
+
"speculation_length": 0,
|
| 45 |
+
"start_rank_id": 0,
|
| 46 |
+
"target": null,
|
| 47 |
+
"torch_dtype": "bfloat16",
|
| 48 |
+
"tp_degree": 2
|
| 49 |
+
},
|
| 50 |
+
"num_attention_heads": 32,
|
| 51 |
+
"num_hidden_layers": 40,
|
| 52 |
+
"num_key_value_heads": 8,
|
| 53 |
+
"residual_multiplier": 0.22,
|
| 54 |
+
"rms_norm_eps": 1e-05,
|
| 55 |
+
"rope_scaling": null,
|
| 56 |
+
"rope_theta": 5000000.0,
|
| 57 |
+
"tie_word_embeddings": true,
|
| 58 |
+
"use_cache": true,
|
| 59 |
+
"vocab_size": 49155
|
| 60 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/430083c6aa3b306e22e0.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.1,
|
| 10 |
+
"attention_multiplier": 0.015625,
|
| 11 |
+
"embedding_multiplier": 12.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"logits_scaling": 8.0,
|
| 17 |
+
"max_position_embeddings": 131072,
|
| 18 |
+
"mlp_bias": false,
|
| 19 |
+
"model_type": "granite",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"async_mode": false,
|
| 23 |
+
"attn_kernel_enabled": false,
|
| 24 |
+
"batch_size": 4,
|
| 25 |
+
"capacity_factor": null,
|
| 26 |
+
"cc_pipeline_tiling_factor": 2,
|
| 27 |
+
"checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 28 |
+
"checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
|
| 29 |
+
"continuous_batching": true,
|
| 30 |
+
"enable_bucketing": false,
|
| 31 |
+
"ep_degree": 1,
|
| 32 |
+
"flash_decoding_enabled": false,
|
| 33 |
+
"fused_qkv": true,
|
| 34 |
+
"glu_mlp": true,
|
| 35 |
+
"local_ranks_size": 2,
|
| 36 |
+
"logical_nc_config": 1,
|
| 37 |
+
"max_batch_size": 4,
|
| 38 |
+
"max_context_length": 4096,
|
| 39 |
+
"max_topk": 256,
|
| 40 |
+
"n_active_tokens": 4096,
|
| 41 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 42 |
+
"num_cores_per_group": 1,
|
| 43 |
+
"on_device_sampling": true,
|
| 44 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 45 |
+
"output_logits": false,
|
| 46 |
+
"pp_degree": 1,
|
| 47 |
+
"sequence_length": 4096,
|
| 48 |
+
"speculation_length": 0,
|
| 49 |
+
"start_rank_id": 0,
|
| 50 |
+
"target": null,
|
| 51 |
+
"torch_dtype": "bfloat16",
|
| 52 |
+
"tp_degree": 2
|
| 53 |
+
},
|
| 54 |
+
"num_attention_heads": 32,
|
| 55 |
+
"num_hidden_layers": 40,
|
| 56 |
+
"num_key_value_heads": 8,
|
| 57 |
+
"residual_multiplier": 0.22,
|
| 58 |
+
"rms_norm_eps": 1e-05,
|
| 59 |
+
"rope_scaling": null,
|
| 60 |
+
"rope_theta": 5000000.0,
|
| 61 |
+
"tie_word_embeddings": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 49155
|
| 64 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/granite/ibm-granite/granite-3.1-2b-instruct/53e5296dda9b797224f5.json
ADDED
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"GraniteForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.1,
|
| 10 |
+
"attention_multiplier": 0.015625,
|
| 11 |
+
"embedding_multiplier": 12.0,
|
| 12 |
+
"hidden_act": "silu",
|
| 13 |
+
"hidden_size": 2048,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"intermediate_size": 8192,
|
| 16 |
+
"logits_scaling": 8.0,
|
| 17 |
+
"max_position_embeddings": 131072,
|
| 18 |
+
"mlp_bias": false,
|
| 19 |
+
"model_type": "granite",
|
| 20 |
+
"neuron": {
|
| 21 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 22 |
+
"async_mode": false,
|
| 23 |
+
"attn_kernel_enabled": false,
|
| 24 |
+
"batch_size": 4,
|
| 25 |
+
"capacity_factor": null,
|
| 26 |
+
"cc_pipeline_tiling_factor": 2,
|
| 27 |
+
"checkpoint_id": "ibm-granite/granite-3.1-2b-instruct",
|
| 28 |
+
"checkpoint_revision": "bbc2aed595bd38bd770263dc3ab831db9794441d",
|
| 29 |
+
"continuous_batching": true,
|
| 30 |
+
"enable_bucketing": false,
|
| 31 |
+
"ep_degree": 1,
|
| 32 |
+
"flash_decoding_enabled": false,
|
| 33 |
+
"fused_qkv": true,
|
| 34 |
+
"glu_mlp": true,
|
| 35 |
+
"is_chunked_prefill": false,
|
| 36 |
+
"local_ranks_size": 2,
|
| 37 |
+
"logical_nc_config": 1,
|
| 38 |
+
"max_batch_size": 4,
|
| 39 |
+
"max_context_length": 4096,
|
| 40 |
+
"max_topk": 256,
|
| 41 |
+
"n_active_tokens": 4096,
|
| 42 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 43 |
+
"num_cores_per_group": 1,
|
| 44 |
+
"on_device_sampling": true,
|
| 45 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 46 |
+
"output_logits": false,
|
| 47 |
+
"pp_degree": 1,
|
| 48 |
+
"qkv_kernel_enabled": false,
|
| 49 |
+
"sequence_length": 4096,
|
| 50 |
+
"sequence_parallel_enabled": false,
|
| 51 |
+
"speculation_length": 0,
|
| 52 |
+
"start_rank_id": 0,
|
| 53 |
+
"target": null,
|
| 54 |
+
"torch_dtype": "bfloat16",
|
| 55 |
+
"tp_degree": 2
|
| 56 |
+
},
|
| 57 |
+
"num_attention_heads": 32,
|
| 58 |
+
"num_hidden_layers": 40,
|
| 59 |
+
"num_key_value_heads": 8,
|
| 60 |
+
"residual_multiplier": 0.22,
|
| 61 |
+
"rms_norm_eps": 1e-05,
|
| 62 |
+
"rope_scaling": null,
|
| 63 |
+
"rope_theta": 5000000.0,
|
| 64 |
+
"tie_word_embeddings": true,
|
| 65 |
+
"use_cache": true,
|
| 66 |
+
"vocab_size": 49155
|
| 67 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/07d4305cad86254ba230.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 4,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 16,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 64,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"batch_size": 2,
|
| 21 |
+
"capacity_factor": null,
|
| 22 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 23 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 24 |
+
"continuous_batching": true,
|
| 25 |
+
"enable_bucketing": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"logical_nc_config": 1,
|
| 31 |
+
"max_batch_size": 2,
|
| 32 |
+
"max_context_length": 1024,
|
| 33 |
+
"max_topk": 256,
|
| 34 |
+
"n_active_tokens": 1024,
|
| 35 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 36 |
+
"on_device_sampling": true,
|
| 37 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 38 |
+
"output_logits": false,
|
| 39 |
+
"pp_degree": 1,
|
| 40 |
+
"sequence_length": 1024,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": null,
|
| 44 |
+
"torch_dtype": "float16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 4,
|
| 48 |
+
"num_hidden_layers": 2,
|
| 49 |
+
"num_key_value_heads": 4,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 8.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": false,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/llamafactory/tiny-random-Llama-3/98c1ce6e6b6d9fc1ad3e.json
ADDED
|
@@ -0,0 +1,63 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "llamafactory/tiny-random-Llama-3",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 4,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 16,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 64,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"batch_size": 1,
|
| 21 |
+
"capacity_factor": null,
|
| 22 |
+
"checkpoint_id": "llamafactory/tiny-random-Llama-3",
|
| 23 |
+
"checkpoint_revision": "bf2a2e3bf199ad2ee96f02a3c00246c608db22a8",
|
| 24 |
+
"continuous_batching": false,
|
| 25 |
+
"enable_bucketing": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"logical_nc_config": 1,
|
| 31 |
+
"max_batch_size": 1,
|
| 32 |
+
"max_context_length": 1024,
|
| 33 |
+
"max_topk": 256,
|
| 34 |
+
"n_active_tokens": 1024,
|
| 35 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 36 |
+
"on_device_sampling": true,
|
| 37 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 38 |
+
"output_logits": false,
|
| 39 |
+
"pp_degree": 1,
|
| 40 |
+
"sequence_length": 1024,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": null,
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 4,
|
| 48 |
+
"num_hidden_layers": 2,
|
| 49 |
+
"num_key_value_heads": 4,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 8.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": false,
|
| 61 |
+
"use_cache": true,
|
| 62 |
+
"vocab_size": 128256
|
| 63 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/0042866b0b29ca346a04.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"attn_kernel_enabled": false,
|
| 21 |
+
"batch_size": 4,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"cc_pipeline_tiling_factor": 2,
|
| 24 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 25 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 26 |
+
"continuous_batching": true,
|
| 27 |
+
"enable_bucketing": false,
|
| 28 |
+
"ep_degree": 1,
|
| 29 |
+
"flash_decoding_enabled": false,
|
| 30 |
+
"fused_qkv": true,
|
| 31 |
+
"glu_mlp": true,
|
| 32 |
+
"local_ranks_size": 2,
|
| 33 |
+
"logical_nc_config": 1,
|
| 34 |
+
"max_batch_size": 4,
|
| 35 |
+
"max_context_length": 4096,
|
| 36 |
+
"max_topk": 256,
|
| 37 |
+
"n_active_tokens": 4096,
|
| 38 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 39 |
+
"num_cores_per_group": 1,
|
| 40 |
+
"on_device_sampling": true,
|
| 41 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 42 |
+
"output_logits": false,
|
| 43 |
+
"pp_degree": 1,
|
| 44 |
+
"sequence_length": 4096,
|
| 45 |
+
"speculation_length": 0,
|
| 46 |
+
"start_rank_id": 0,
|
| 47 |
+
"target": null,
|
| 48 |
+
"torch_dtype": "float16",
|
| 49 |
+
"tp_degree": 2
|
| 50 |
+
},
|
| 51 |
+
"num_attention_heads": 32,
|
| 52 |
+
"num_hidden_layers": 16,
|
| 53 |
+
"num_key_value_heads": 8,
|
| 54 |
+
"pretraining_tp": 1,
|
| 55 |
+
"rms_norm_eps": 1e-05,
|
| 56 |
+
"rope_scaling": {
|
| 57 |
+
"factor": 32.0,
|
| 58 |
+
"high_freq_factor": 4.0,
|
| 59 |
+
"low_freq_factor": 1.0,
|
| 60 |
+
"original_max_position_embeddings": 8192,
|
| 61 |
+
"rope_type": "llama3"
|
| 62 |
+
},
|
| 63 |
+
"rope_theta": 500000.0,
|
| 64 |
+
"tie_word_embeddings": true,
|
| 65 |
+
"unsloth_fixed": true,
|
| 66 |
+
"use_cache": true,
|
| 67 |
+
"vocab_size": 128256
|
| 68 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/05f19cc6cfed252cbe80.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"batch_size": 1,
|
| 21 |
+
"capacity_factor": null,
|
| 22 |
+
"cc_pipeline_tiling_factor": 2,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": null,
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"enable_bucketing": false,
|
| 27 |
+
"ep_degree": 1,
|
| 28 |
+
"fused_qkv": false,
|
| 29 |
+
"glu_mlp": true,
|
| 30 |
+
"local_ranks_size": 2,
|
| 31 |
+
"logical_nc_config": 1,
|
| 32 |
+
"max_batch_size": 1,
|
| 33 |
+
"max_context_length": 4096,
|
| 34 |
+
"max_topk": 256,
|
| 35 |
+
"n_active_tokens": 4096,
|
| 36 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 37 |
+
"on_device_sampling": false,
|
| 38 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 39 |
+
"output_logits": false,
|
| 40 |
+
"pp_degree": 1,
|
| 41 |
+
"sequence_length": 4096,
|
| 42 |
+
"speculation_length": 5,
|
| 43 |
+
"start_rank_id": 0,
|
| 44 |
+
"target": null,
|
| 45 |
+
"torch_dtype": "bfloat16",
|
| 46 |
+
"tp_degree": 2
|
| 47 |
+
},
|
| 48 |
+
"num_attention_heads": 32,
|
| 49 |
+
"num_hidden_layers": 16,
|
| 50 |
+
"num_key_value_heads": 8,
|
| 51 |
+
"pretraining_tp": 1,
|
| 52 |
+
"rms_norm_eps": 1e-05,
|
| 53 |
+
"rope_scaling": {
|
| 54 |
+
"factor": 32.0,
|
| 55 |
+
"high_freq_factor": 4.0,
|
| 56 |
+
"low_freq_factor": 1.0,
|
| 57 |
+
"original_max_position_embeddings": 8192,
|
| 58 |
+
"rope_type": "llama3"
|
| 59 |
+
},
|
| 60 |
+
"rope_theta": 500000.0,
|
| 61 |
+
"tie_word_embeddings": true,
|
| 62 |
+
"unsloth_fixed": true,
|
| 63 |
+
"use_cache": true,
|
| 64 |
+
"vocab_size": 128256
|
| 65 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/235e6b665a70c7f3b88b.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 4,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 26 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 27 |
+
"continuous_batching": true,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"local_ranks_size": 2,
|
| 34 |
+
"logical_nc_config": 1,
|
| 35 |
+
"max_batch_size": 4,
|
| 36 |
+
"max_context_length": 4096,
|
| 37 |
+
"max_topk": 256,
|
| 38 |
+
"n_active_tokens": 4096,
|
| 39 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 40 |
+
"num_cores_per_group": 1,
|
| 41 |
+
"on_device_sampling": true,
|
| 42 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 43 |
+
"output_logits": false,
|
| 44 |
+
"pp_degree": 1,
|
| 45 |
+
"sequence_length": 4096,
|
| 46 |
+
"speculation_length": 0,
|
| 47 |
+
"start_rank_id": 0,
|
| 48 |
+
"target": null,
|
| 49 |
+
"torch_dtype": "float16",
|
| 50 |
+
"tp_degree": 2
|
| 51 |
+
},
|
| 52 |
+
"num_attention_heads": 32,
|
| 53 |
+
"num_hidden_layers": 16,
|
| 54 |
+
"num_key_value_heads": 8,
|
| 55 |
+
"pretraining_tp": 1,
|
| 56 |
+
"rms_norm_eps": 1e-05,
|
| 57 |
+
"rope_scaling": {
|
| 58 |
+
"factor": 32.0,
|
| 59 |
+
"high_freq_factor": 4.0,
|
| 60 |
+
"low_freq_factor": 1.0,
|
| 61 |
+
"original_max_position_embeddings": 8192,
|
| 62 |
+
"rope_type": "llama3"
|
| 63 |
+
},
|
| 64 |
+
"rope_theta": 500000.0,
|
| 65 |
+
"tie_word_embeddings": true,
|
| 66 |
+
"unsloth_fixed": true,
|
| 67 |
+
"use_cache": true,
|
| 68 |
+
"vocab_size": 128256
|
| 69 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/54998903c673b03ab682.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 26 |
+
"checkpoint_revision": null,
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": false,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"local_ranks_size": 2,
|
| 34 |
+
"logical_nc_config": 1,
|
| 35 |
+
"max_batch_size": 1,
|
| 36 |
+
"max_context_length": 4096,
|
| 37 |
+
"max_topk": 256,
|
| 38 |
+
"n_active_tokens": 4096,
|
| 39 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 40 |
+
"num_cores_per_group": 1,
|
| 41 |
+
"on_device_sampling": false,
|
| 42 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 43 |
+
"output_logits": false,
|
| 44 |
+
"pp_degree": 1,
|
| 45 |
+
"sequence_length": 4096,
|
| 46 |
+
"speculation_length": 0,
|
| 47 |
+
"start_rank_id": 0,
|
| 48 |
+
"target": null,
|
| 49 |
+
"torch_dtype": "bfloat16",
|
| 50 |
+
"tp_degree": 2
|
| 51 |
+
},
|
| 52 |
+
"num_attention_heads": 32,
|
| 53 |
+
"num_hidden_layers": 16,
|
| 54 |
+
"num_key_value_heads": 8,
|
| 55 |
+
"pretraining_tp": 1,
|
| 56 |
+
"rms_norm_eps": 1e-05,
|
| 57 |
+
"rope_scaling": {
|
| 58 |
+
"factor": 32.0,
|
| 59 |
+
"high_freq_factor": 4.0,
|
| 60 |
+
"low_freq_factor": 1.0,
|
| 61 |
+
"original_max_position_embeddings": 8192,
|
| 62 |
+
"rope_type": "llama3"
|
| 63 |
+
},
|
| 64 |
+
"rope_theta": 500000.0,
|
| 65 |
+
"tie_word_embeddings": true,
|
| 66 |
+
"unsloth_fixed": true,
|
| 67 |
+
"use_cache": true,
|
| 68 |
+
"vocab_size": 128256
|
| 69 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/559712f03e12a7d3db9d.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"attn_kernel_enabled": false,
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"cc_pipeline_tiling_factor": 2,
|
| 24 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 25 |
+
"checkpoint_revision": null,
|
| 26 |
+
"continuous_batching": false,
|
| 27 |
+
"enable_bucketing": false,
|
| 28 |
+
"ep_degree": 1,
|
| 29 |
+
"flash_decoding_enabled": false,
|
| 30 |
+
"fused_qkv": false,
|
| 31 |
+
"glu_mlp": true,
|
| 32 |
+
"local_ranks_size": 2,
|
| 33 |
+
"logical_nc_config": 1,
|
| 34 |
+
"max_batch_size": 1,
|
| 35 |
+
"max_context_length": 4096,
|
| 36 |
+
"max_topk": 256,
|
| 37 |
+
"n_active_tokens": 4096,
|
| 38 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 39 |
+
"num_cores_per_group": 1,
|
| 40 |
+
"on_device_sampling": false,
|
| 41 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 42 |
+
"output_logits": false,
|
| 43 |
+
"pp_degree": 1,
|
| 44 |
+
"sequence_length": 4096,
|
| 45 |
+
"speculation_length": 5,
|
| 46 |
+
"start_rank_id": 0,
|
| 47 |
+
"target": null,
|
| 48 |
+
"torch_dtype": "bfloat16",
|
| 49 |
+
"tp_degree": 2
|
| 50 |
+
},
|
| 51 |
+
"num_attention_heads": 32,
|
| 52 |
+
"num_hidden_layers": 16,
|
| 53 |
+
"num_key_value_heads": 8,
|
| 54 |
+
"pretraining_tp": 1,
|
| 55 |
+
"rms_norm_eps": 1e-05,
|
| 56 |
+
"rope_scaling": {
|
| 57 |
+
"factor": 32.0,
|
| 58 |
+
"high_freq_factor": 4.0,
|
| 59 |
+
"low_freq_factor": 1.0,
|
| 60 |
+
"original_max_position_embeddings": 8192,
|
| 61 |
+
"rope_type": "llama3"
|
| 62 |
+
},
|
| 63 |
+
"rope_theta": 500000.0,
|
| 64 |
+
"tie_word_embeddings": true,
|
| 65 |
+
"unsloth_fixed": true,
|
| 66 |
+
"use_cache": true,
|
| 67 |
+
"vocab_size": 128256
|
| 68 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/62c9b979338956707306.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 26 |
+
"checkpoint_revision": null,
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": false,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 1,
|
| 37 |
+
"max_context_length": 4096,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"n_active_tokens": 4096,
|
| 40 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 41 |
+
"num_cores_per_group": 1,
|
| 42 |
+
"on_device_sampling": false,
|
| 43 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 44 |
+
"output_logits": false,
|
| 45 |
+
"pp_degree": 1,
|
| 46 |
+
"sequence_length": 4096,
|
| 47 |
+
"sequence_parallel_enabled": false,
|
| 48 |
+
"speculation_length": 5,
|
| 49 |
+
"start_rank_id": 0,
|
| 50 |
+
"target": null,
|
| 51 |
+
"torch_dtype": "bfloat16",
|
| 52 |
+
"tp_degree": 2
|
| 53 |
+
},
|
| 54 |
+
"num_attention_heads": 32,
|
| 55 |
+
"num_hidden_layers": 16,
|
| 56 |
+
"num_key_value_heads": 8,
|
| 57 |
+
"pretraining_tp": 1,
|
| 58 |
+
"rms_norm_eps": 1e-05,
|
| 59 |
+
"rope_scaling": {
|
| 60 |
+
"factor": 32.0,
|
| 61 |
+
"high_freq_factor": 4.0,
|
| 62 |
+
"low_freq_factor": 1.0,
|
| 63 |
+
"original_max_position_embeddings": 8192,
|
| 64 |
+
"rope_type": "llama3"
|
| 65 |
+
},
|
| 66 |
+
"rope_theta": 500000.0,
|
| 67 |
+
"tie_word_embeddings": true,
|
| 68 |
+
"unsloth_fixed": true,
|
| 69 |
+
"use_cache": true,
|
| 70 |
+
"vocab_size": 128256
|
| 71 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/7b9ae8b155ce16ab1f81.json
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"attn_kernel_enabled": false,
|
| 21 |
+
"batch_size": 1,
|
| 22 |
+
"capacity_factor": null,
|
| 23 |
+
"cc_pipeline_tiling_factor": 2,
|
| 24 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 25 |
+
"checkpoint_revision": null,
|
| 26 |
+
"continuous_batching": false,
|
| 27 |
+
"enable_bucketing": false,
|
| 28 |
+
"ep_degree": 1,
|
| 29 |
+
"flash_decoding_enabled": false,
|
| 30 |
+
"fused_qkv": false,
|
| 31 |
+
"glu_mlp": true,
|
| 32 |
+
"local_ranks_size": 2,
|
| 33 |
+
"logical_nc_config": 1,
|
| 34 |
+
"max_batch_size": 1,
|
| 35 |
+
"max_context_length": 4096,
|
| 36 |
+
"max_topk": 256,
|
| 37 |
+
"n_active_tokens": 4096,
|
| 38 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 39 |
+
"num_cores_per_group": 1,
|
| 40 |
+
"on_device_sampling": false,
|
| 41 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 42 |
+
"output_logits": false,
|
| 43 |
+
"pp_degree": 1,
|
| 44 |
+
"sequence_length": 4096,
|
| 45 |
+
"speculation_length": 0,
|
| 46 |
+
"start_rank_id": 0,
|
| 47 |
+
"target": null,
|
| 48 |
+
"torch_dtype": "bfloat16",
|
| 49 |
+
"tp_degree": 2
|
| 50 |
+
},
|
| 51 |
+
"num_attention_heads": 32,
|
| 52 |
+
"num_hidden_layers": 16,
|
| 53 |
+
"num_key_value_heads": 8,
|
| 54 |
+
"pretraining_tp": 1,
|
| 55 |
+
"rms_norm_eps": 1e-05,
|
| 56 |
+
"rope_scaling": {
|
| 57 |
+
"factor": 32.0,
|
| 58 |
+
"high_freq_factor": 4.0,
|
| 59 |
+
"low_freq_factor": 1.0,
|
| 60 |
+
"original_max_position_embeddings": 8192,
|
| 61 |
+
"rope_type": "llama3"
|
| 62 |
+
},
|
| 63 |
+
"rope_theta": 500000.0,
|
| 64 |
+
"tie_word_embeddings": true,
|
| 65 |
+
"unsloth_fixed": true,
|
| 66 |
+
"use_cache": true,
|
| 67 |
+
"vocab_size": 128256
|
| 68 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/8eae465f9a28e71d02c7.json
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 4,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 26 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 27 |
+
"continuous_batching": true,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 4,
|
| 37 |
+
"max_context_length": 4096,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"n_active_tokens": 4096,
|
| 40 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 41 |
+
"num_cores_per_group": 1,
|
| 42 |
+
"on_device_sampling": true,
|
| 43 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 44 |
+
"output_logits": false,
|
| 45 |
+
"pp_degree": 1,
|
| 46 |
+
"qkv_kernel_enabled": false,
|
| 47 |
+
"sequence_length": 4096,
|
| 48 |
+
"sequence_parallel_enabled": false,
|
| 49 |
+
"speculation_length": 0,
|
| 50 |
+
"start_rank_id": 0,
|
| 51 |
+
"target": null,
|
| 52 |
+
"torch_dtype": "float16",
|
| 53 |
+
"tp_degree": 2
|
| 54 |
+
},
|
| 55 |
+
"num_attention_heads": 32,
|
| 56 |
+
"num_hidden_layers": 16,
|
| 57 |
+
"num_key_value_heads": 8,
|
| 58 |
+
"pretraining_tp": 1,
|
| 59 |
+
"rms_norm_eps": 1e-05,
|
| 60 |
+
"rope_scaling": {
|
| 61 |
+
"factor": 32.0,
|
| 62 |
+
"high_freq_factor": 4.0,
|
| 63 |
+
"low_freq_factor": 1.0,
|
| 64 |
+
"original_max_position_embeddings": 8192,
|
| 65 |
+
"rope_type": "llama3"
|
| 66 |
+
},
|
| 67 |
+
"rope_theta": 500000.0,
|
| 68 |
+
"tie_word_embeddings": true,
|
| 69 |
+
"unsloth_fixed": true,
|
| 70 |
+
"use_cache": true,
|
| 71 |
+
"vocab_size": 128256
|
| 72 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/94b3194811206ba2227e.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"batch_size": 4,
|
| 21 |
+
"capacity_factor": null,
|
| 22 |
+
"cc_pipeline_tiling_factor": 2,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 25 |
+
"continuous_batching": true,
|
| 26 |
+
"enable_bucketing": false,
|
| 27 |
+
"ep_degree": 1,
|
| 28 |
+
"fused_qkv": true,
|
| 29 |
+
"glu_mlp": true,
|
| 30 |
+
"local_ranks_size": 2,
|
| 31 |
+
"logical_nc_config": 1,
|
| 32 |
+
"max_batch_size": 4,
|
| 33 |
+
"max_context_length": 4096,
|
| 34 |
+
"max_topk": 256,
|
| 35 |
+
"n_active_tokens": 4096,
|
| 36 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 37 |
+
"on_device_sampling": true,
|
| 38 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 39 |
+
"output_logits": false,
|
| 40 |
+
"pp_degree": 1,
|
| 41 |
+
"sequence_length": 4096,
|
| 42 |
+
"speculation_length": 0,
|
| 43 |
+
"start_rank_id": 0,
|
| 44 |
+
"target": null,
|
| 45 |
+
"torch_dtype": "float16",
|
| 46 |
+
"tp_degree": 2
|
| 47 |
+
},
|
| 48 |
+
"num_attention_heads": 32,
|
| 49 |
+
"num_hidden_layers": 16,
|
| 50 |
+
"num_key_value_heads": 8,
|
| 51 |
+
"pretraining_tp": 1,
|
| 52 |
+
"rms_norm_eps": 1e-05,
|
| 53 |
+
"rope_scaling": {
|
| 54 |
+
"factor": 32.0,
|
| 55 |
+
"high_freq_factor": 4.0,
|
| 56 |
+
"low_freq_factor": 1.0,
|
| 57 |
+
"original_max_position_embeddings": 8192,
|
| 58 |
+
"rope_type": "llama3"
|
| 59 |
+
},
|
| 60 |
+
"rope_theta": 500000.0,
|
| 61 |
+
"tie_word_embeddings": true,
|
| 62 |
+
"unsloth_fixed": true,
|
| 63 |
+
"use_cache": true,
|
| 64 |
+
"vocab_size": 128256
|
| 65 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/bc4061b1ead7bafcdaaf.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"batch_size": 1,
|
| 21 |
+
"capacity_factor": null,
|
| 22 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 23 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 24 |
+
"continuous_batching": false,
|
| 25 |
+
"enable_bucketing": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 24,
|
| 30 |
+
"logical_nc_config": 1,
|
| 31 |
+
"max_batch_size": 1,
|
| 32 |
+
"max_context_length": 4096,
|
| 33 |
+
"max_topk": 256,
|
| 34 |
+
"n_active_tokens": 4096,
|
| 35 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 36 |
+
"on_device_sampling": true,
|
| 37 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 38 |
+
"output_logits": false,
|
| 39 |
+
"pp_degree": 1,
|
| 40 |
+
"sequence_length": 4096,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": null,
|
| 44 |
+
"torch_dtype": "bfloat16",
|
| 45 |
+
"tp_degree": 24
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/c6f1e5861bd12b93b78f.json
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 26 |
+
"checkpoint_revision": null,
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": false,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"local_ranks_size": 2,
|
| 34 |
+
"logical_nc_config": 1,
|
| 35 |
+
"max_batch_size": 1,
|
| 36 |
+
"max_context_length": 4096,
|
| 37 |
+
"max_topk": 256,
|
| 38 |
+
"n_active_tokens": 4096,
|
| 39 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 40 |
+
"num_cores_per_group": 1,
|
| 41 |
+
"on_device_sampling": false,
|
| 42 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 43 |
+
"output_logits": false,
|
| 44 |
+
"pp_degree": 1,
|
| 45 |
+
"sequence_length": 4096,
|
| 46 |
+
"speculation_length": 5,
|
| 47 |
+
"start_rank_id": 0,
|
| 48 |
+
"target": null,
|
| 49 |
+
"torch_dtype": "bfloat16",
|
| 50 |
+
"tp_degree": 2
|
| 51 |
+
},
|
| 52 |
+
"num_attention_heads": 32,
|
| 53 |
+
"num_hidden_layers": 16,
|
| 54 |
+
"num_key_value_heads": 8,
|
| 55 |
+
"pretraining_tp": 1,
|
| 56 |
+
"rms_norm_eps": 1e-05,
|
| 57 |
+
"rope_scaling": {
|
| 58 |
+
"factor": 32.0,
|
| 59 |
+
"high_freq_factor": 4.0,
|
| 60 |
+
"low_freq_factor": 1.0,
|
| 61 |
+
"original_max_position_embeddings": 8192,
|
| 62 |
+
"rope_type": "llama3"
|
| 63 |
+
},
|
| 64 |
+
"rope_theta": 500000.0,
|
| 65 |
+
"tie_word_embeddings": true,
|
| 66 |
+
"unsloth_fixed": true,
|
| 67 |
+
"use_cache": true,
|
| 68 |
+
"vocab_size": 128256
|
| 69 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/d03410f237213137456b.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 4,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 26 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 27 |
+
"continuous_batching": true,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": true,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 4,
|
| 37 |
+
"max_context_length": 4096,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"n_active_tokens": 4096,
|
| 40 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 41 |
+
"num_cores_per_group": 1,
|
| 42 |
+
"on_device_sampling": true,
|
| 43 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 44 |
+
"output_logits": false,
|
| 45 |
+
"pp_degree": 1,
|
| 46 |
+
"sequence_length": 4096,
|
| 47 |
+
"sequence_parallel_enabled": false,
|
| 48 |
+
"speculation_length": 0,
|
| 49 |
+
"start_rank_id": 0,
|
| 50 |
+
"target": null,
|
| 51 |
+
"torch_dtype": "float16",
|
| 52 |
+
"tp_degree": 2
|
| 53 |
+
},
|
| 54 |
+
"num_attention_heads": 32,
|
| 55 |
+
"num_hidden_layers": 16,
|
| 56 |
+
"num_key_value_heads": 8,
|
| 57 |
+
"pretraining_tp": 1,
|
| 58 |
+
"rms_norm_eps": 1e-05,
|
| 59 |
+
"rope_scaling": {
|
| 60 |
+
"factor": 32.0,
|
| 61 |
+
"high_freq_factor": 4.0,
|
| 62 |
+
"low_freq_factor": 1.0,
|
| 63 |
+
"original_max_position_embeddings": 8192,
|
| 64 |
+
"rope_type": "llama3"
|
| 65 |
+
},
|
| 66 |
+
"rope_theta": 500000.0,
|
| 67 |
+
"tie_word_embeddings": true,
|
| 68 |
+
"unsloth_fixed": true,
|
| 69 |
+
"use_cache": true,
|
| 70 |
+
"vocab_size": 128256
|
| 71 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/e15ce75e921fd9551605.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"batch_size": 4,
|
| 21 |
+
"capacity_factor": null,
|
| 22 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 23 |
+
"checkpoint_revision": "5a8abab4a5d6f164389b1079fb721cfab8d7126c",
|
| 24 |
+
"continuous_batching": true,
|
| 25 |
+
"enable_bucketing": false,
|
| 26 |
+
"ep_degree": 1,
|
| 27 |
+
"fused_qkv": true,
|
| 28 |
+
"glu_mlp": true,
|
| 29 |
+
"local_ranks_size": 2,
|
| 30 |
+
"logical_nc_config": 1,
|
| 31 |
+
"max_batch_size": 4,
|
| 32 |
+
"max_context_length": 4096,
|
| 33 |
+
"max_topk": 256,
|
| 34 |
+
"n_active_tokens": 4096,
|
| 35 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 36 |
+
"on_device_sampling": true,
|
| 37 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 38 |
+
"output_logits": false,
|
| 39 |
+
"pp_degree": 1,
|
| 40 |
+
"sequence_length": 4096,
|
| 41 |
+
"speculation_length": 0,
|
| 42 |
+
"start_rank_id": 0,
|
| 43 |
+
"target": null,
|
| 44 |
+
"torch_dtype": "float16",
|
| 45 |
+
"tp_degree": 2
|
| 46 |
+
},
|
| 47 |
+
"num_attention_heads": 32,
|
| 48 |
+
"num_hidden_layers": 16,
|
| 49 |
+
"num_key_value_heads": 8,
|
| 50 |
+
"pretraining_tp": 1,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_scaling": {
|
| 53 |
+
"factor": 32.0,
|
| 54 |
+
"high_freq_factor": 4.0,
|
| 55 |
+
"low_freq_factor": 1.0,
|
| 56 |
+
"original_max_position_embeddings": 8192,
|
| 57 |
+
"rope_type": "llama3"
|
| 58 |
+
},
|
| 59 |
+
"rope_theta": 500000.0,
|
| 60 |
+
"tie_word_embeddings": true,
|
| 61 |
+
"unsloth_fixed": true,
|
| 62 |
+
"use_cache": true,
|
| 63 |
+
"vocab_size": 128256
|
| 64 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f70dc623c263d5d225a1.json
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"batch_size": 1,
|
| 21 |
+
"capacity_factor": null,
|
| 22 |
+
"cc_pipeline_tiling_factor": 2,
|
| 23 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 24 |
+
"checkpoint_revision": null,
|
| 25 |
+
"continuous_batching": false,
|
| 26 |
+
"enable_bucketing": false,
|
| 27 |
+
"ep_degree": 1,
|
| 28 |
+
"fused_qkv": false,
|
| 29 |
+
"glu_mlp": true,
|
| 30 |
+
"local_ranks_size": 2,
|
| 31 |
+
"logical_nc_config": 1,
|
| 32 |
+
"max_batch_size": 1,
|
| 33 |
+
"max_context_length": 4096,
|
| 34 |
+
"max_topk": 256,
|
| 35 |
+
"n_active_tokens": 4096,
|
| 36 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 37 |
+
"on_device_sampling": false,
|
| 38 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 39 |
+
"output_logits": false,
|
| 40 |
+
"pp_degree": 1,
|
| 41 |
+
"sequence_length": 4096,
|
| 42 |
+
"speculation_length": 0,
|
| 43 |
+
"start_rank_id": 0,
|
| 44 |
+
"target": null,
|
| 45 |
+
"torch_dtype": "bfloat16",
|
| 46 |
+
"tp_degree": 2
|
| 47 |
+
},
|
| 48 |
+
"num_attention_heads": 32,
|
| 49 |
+
"num_hidden_layers": 16,
|
| 50 |
+
"num_key_value_heads": 8,
|
| 51 |
+
"pretraining_tp": 1,
|
| 52 |
+
"rms_norm_eps": 1e-05,
|
| 53 |
+
"rope_scaling": {
|
| 54 |
+
"factor": 32.0,
|
| 55 |
+
"high_freq_factor": 4.0,
|
| 56 |
+
"low_freq_factor": 1.0,
|
| 57 |
+
"original_max_position_embeddings": 8192,
|
| 58 |
+
"rope_type": "llama3"
|
| 59 |
+
},
|
| 60 |
+
"rope_theta": 500000.0,
|
| 61 |
+
"tie_word_embeddings": true,
|
| 62 |
+
"unsloth_fixed": true,
|
| 63 |
+
"use_cache": true,
|
| 64 |
+
"vocab_size": 128256
|
| 65 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/llama/unsloth/Llama-3.2-1B-Instruct/f98ea9d9fe79ee8c6c52.json
ADDED
|
@@ -0,0 +1,71 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"LlamaForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_bias": false,
|
| 9 |
+
"attention_dropout": 0.0,
|
| 10 |
+
"head_dim": 64,
|
| 11 |
+
"hidden_act": "silu",
|
| 12 |
+
"hidden_size": 2048,
|
| 13 |
+
"initializer_range": 0.02,
|
| 14 |
+
"intermediate_size": 8192,
|
| 15 |
+
"max_position_embeddings": 131072,
|
| 16 |
+
"mlp_bias": false,
|
| 17 |
+
"model_type": "llama",
|
| 18 |
+
"neuron": {
|
| 19 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 20 |
+
"async_mode": false,
|
| 21 |
+
"attn_kernel_enabled": false,
|
| 22 |
+
"batch_size": 1,
|
| 23 |
+
"capacity_factor": null,
|
| 24 |
+
"cc_pipeline_tiling_factor": 2,
|
| 25 |
+
"checkpoint_id": "unsloth/Llama-3.2-1B-Instruct",
|
| 26 |
+
"checkpoint_revision": null,
|
| 27 |
+
"continuous_batching": false,
|
| 28 |
+
"enable_bucketing": false,
|
| 29 |
+
"ep_degree": 1,
|
| 30 |
+
"flash_decoding_enabled": false,
|
| 31 |
+
"fused_qkv": false,
|
| 32 |
+
"glu_mlp": true,
|
| 33 |
+
"is_chunked_prefill": false,
|
| 34 |
+
"local_ranks_size": 2,
|
| 35 |
+
"logical_nc_config": 1,
|
| 36 |
+
"max_batch_size": 1,
|
| 37 |
+
"max_context_length": 4096,
|
| 38 |
+
"max_topk": 256,
|
| 39 |
+
"n_active_tokens": 4096,
|
| 40 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 41 |
+
"num_cores_per_group": 1,
|
| 42 |
+
"on_device_sampling": false,
|
| 43 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 44 |
+
"output_logits": false,
|
| 45 |
+
"pp_degree": 1,
|
| 46 |
+
"sequence_length": 4096,
|
| 47 |
+
"sequence_parallel_enabled": false,
|
| 48 |
+
"speculation_length": 0,
|
| 49 |
+
"start_rank_id": 0,
|
| 50 |
+
"target": null,
|
| 51 |
+
"torch_dtype": "bfloat16",
|
| 52 |
+
"tp_degree": 2
|
| 53 |
+
},
|
| 54 |
+
"num_attention_heads": 32,
|
| 55 |
+
"num_hidden_layers": 16,
|
| 56 |
+
"num_key_value_heads": 8,
|
| 57 |
+
"pretraining_tp": 1,
|
| 58 |
+
"rms_norm_eps": 1e-05,
|
| 59 |
+
"rope_scaling": {
|
| 60 |
+
"factor": 32.0,
|
| 61 |
+
"high_freq_factor": 4.0,
|
| 62 |
+
"low_freq_factor": 1.0,
|
| 63 |
+
"original_max_position_embeddings": 8192,
|
| 64 |
+
"rope_type": "llama3"
|
| 65 |
+
},
|
| 66 |
+
"rope_theta": 500000.0,
|
| 67 |
+
"tie_word_embeddings": true,
|
| 68 |
+
"unsloth_fixed": true,
|
| 69 |
+
"use_cache": true,
|
| 70 |
+
"vocab_size": 128256
|
| 71 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev5/mixtral/dacorvo/Mixtral-tiny/039cd3c8f5f1a95e9368.json
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_entry_class": "SingleModelCacheEntry",
|
| 3 |
+
"_model_id": "dacorvo/Mixtral-tiny",
|
| 4 |
+
"_task": "text-generation",
|
| 5 |
+
"architectures": [
|
| 6 |
+
"MixtralForCausalLM"
|
| 7 |
+
],
|
| 8 |
+
"attention_dropout": 0.0,
|
| 9 |
+
"head_dim": 32,
|
| 10 |
+
"hidden_act": "silu",
|
| 11 |
+
"hidden_size": 1024,
|
| 12 |
+
"initializer_range": 0.02,
|
| 13 |
+
"intermediate_size": 3584,
|
| 14 |
+
"max_position_embeddings": 1024,
|
| 15 |
+
"model_type": "mixtral",
|
| 16 |
+
"neuron": {
|
| 17 |
+
"_serialized_key": "NxDNeuronConfig",
|
| 18 |
+
"batch_size": 1,
|
| 19 |
+
"capacity_factor": null,
|
| 20 |
+
"checkpoint_id": "dacorvo/Mixtral-tiny",
|
| 21 |
+
"checkpoint_revision": "c557ba205ddff6ea911f4719e0d543d6c08356b6",
|
| 22 |
+
"continuous_batching": false,
|
| 23 |
+
"enable_bucketing": false,
|
| 24 |
+
"ep_degree": 1,
|
| 25 |
+
"fused_qkv": false,
|
| 26 |
+
"glu_mlp": true,
|
| 27 |
+
"local_ranks_size": 2,
|
| 28 |
+
"logical_nc_config": 1,
|
| 29 |
+
"max_batch_size": 1,
|
| 30 |
+
"max_context_length": 1024,
|
| 31 |
+
"max_topk": 256,
|
| 32 |
+
"n_active_tokens": 1024,
|
| 33 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
| 34 |
+
"on_device_sampling": false,
|
| 35 |
+
"optimum_neuron_version": "0.3.1.dev5",
|
| 36 |
+
"output_logits": false,
|
| 37 |
+
"pp_degree": 1,
|
| 38 |
+
"sequence_length": 1024,
|
| 39 |
+
"speculation_length": 0,
|
| 40 |
+
"start_rank_id": 0,
|
| 41 |
+
"target": null,
|
| 42 |
+
"torch_dtype": "bfloat16",
|
| 43 |
+
"tp_degree": 2
|
| 44 |
+
},
|
| 45 |
+
"num_attention_heads": 32,
|
| 46 |
+
"num_experts_per_tok": 2,
|
| 47 |
+
"num_hidden_layers": 2,
|
| 48 |
+
"num_key_value_heads": 8,
|
| 49 |
+
"num_local_experts": 8,
|
| 50 |
+
"output_router_logits": false,
|
| 51 |
+
"rms_norm_eps": 1e-05,
|
| 52 |
+
"rope_theta": 10000.0,
|
| 53 |
+
"router_aux_loss_coef": 0.001,
|
| 54 |
+
"router_jitter_noise": 0.0,
|
| 55 |
+
"sliding_window": 4096,
|
| 56 |
+
"tie_word_embeddings": false,
|
| 57 |
+
"use_cache": true,
|
| 58 |
+
"vocab_size": 32000
|
| 59 |
+
}
|