Morgan Funtowicz
commited on
Commit
·
0727aa0
1
Parent(s):
36406e7
feat(text-generation): default to transformers backend as much as we can
Browse files- Dockerfile +1 -0
- entrypoint.sh +1 -0
Dockerfile
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
FROM lmsysorg/sglang:latest
|
| 2 |
|
| 3 |
ENV MODEL_ID="/repository"
|
|
|
|
| 4 |
ENV KV_CACHE_DTYPE="auto"
|
| 5 |
ENV TP_SIZE="1"
|
| 6 |
ENV QUANT_METHOD="w8a8_int8"
|
|
|
|
| 1 |
FROM lmsysorg/sglang:latest
|
| 2 |
|
| 3 |
ENV MODEL_ID="/repository"
|
| 4 |
+
ENV MODEL_IMPL="transformers"
|
| 5 |
ENV KV_CACHE_DTYPE="auto"
|
| 6 |
ENV TP_SIZE="1"
|
| 7 |
ENV QUANT_METHOD="w8a8_int8"
|
entrypoint.sh
CHANGED
|
@@ -8,6 +8,7 @@ python3 -m sglang.launch_server \
|
|
| 8 |
--quantization $QUANT_METHOD \
|
| 9 |
--enable-torch-compile \
|
| 10 |
--enable-ep-moe \
|
|
|
|
| 11 |
--tool-call-parser qwen25 \
|
| 12 |
--host 0.0.0.0 \
|
| 13 |
--port 80
|
|
|
|
| 8 |
--quantization $QUANT_METHOD \
|
| 9 |
--enable-torch-compile \
|
| 10 |
--enable-ep-moe \
|
| 11 |
+
--model-impl $MODEL_IMPL \
|
| 12 |
--tool-call-parser qwen25 \
|
| 13 |
--host 0.0.0.0 \
|
| 14 |
--port 80
|