JunHowie commited on
Commit
870f3ec
·
verified ·
1 Parent(s): 19cdb6f

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -64,7 +64,7 @@ export OMP_NUM_THREADS=4
64
 
65
  CONTEXT_LENGTH=32768
66
  vllm serve \
67
- __YOUR_PATH__/tclf90/DeepSeek-V3.2-Speciale-AWQ \
68
  --served-model-name MY_MODEL_NAME \
69
  --enable-auto-tool-choice \
70
  --tool-call-parser deepseek_v31 \
@@ -75,7 +75,7 @@ vllm serve \
75
  --gpu-memory-utilization 0.9 \
76
  --tensor-parallel-size 8 \
77
  --enable-expert-parallel \ # optional
78
- --speculative-config '{"model": "__YOUR_PATH__/tclf90/DeepSeek-V3.2-Speciale-AWQ", "num_speculative_tokens": 1}' \ # optional, 50%+- throughput increase is observed
79
  --trust-remote-code \
80
  --host 0.0.0.0 \
81
  --port 8000
@@ -95,7 +95,7 @@ vllm serve \
95
  ### 【Model Download】
96
  ```python
97
  from modelscope import snapshot_download
98
- snapshot_download('tclf90/DeepSeek-V3.2-Speciale-AWQ', cache_dir="your_local_path")
99
  ```
100
 
101
  ### 【Overview】
 
64
 
65
  CONTEXT_LENGTH=32768
66
  vllm serve \
67
+ __YOUR_PATH__/QuantTrio/DeepSeek-V3.2-Speciale-AWQ \
68
  --served-model-name MY_MODEL_NAME \
69
  --enable-auto-tool-choice \
70
  --tool-call-parser deepseek_v31 \
 
75
  --gpu-memory-utilization 0.9 \
76
  --tensor-parallel-size 8 \
77
  --enable-expert-parallel \ # optional
78
+ --speculative-config '{"model": "__YOUR_PATH__/QuantTrio/DeepSeek-V3.2-Speciale-AWQ", "num_speculative_tokens": 1}' \ # optional, 50%+- throughput increase is observed
79
  --trust-remote-code \
80
  --host 0.0.0.0 \
81
  --port 8000
 
95
  ### 【Model Download】
96
  ```python
97
  from modelscope import snapshot_download
98
+ snapshot_download('QuantTrio/DeepSeek-V3.2-Speciale-AWQ', cache_dir="your_local_path")
99
  ```
100
 
101
  ### 【Overview】