hfendpoints-images
/

text-generation-sglang-gpu

Morgan Funtowicz commited on Apr 30

Commit

0727aa0

1 Parent(s): 36406e7

feat(text-generation): default to transformers backend as much as we can

Files changed (2) hide show

Dockerfile CHANGED Viewed

@@ -1,6 +1,7 @@
 FROM lmsysorg/sglang:latest
 ENV MODEL_ID="/repository"
 ENV KV_CACHE_DTYPE="auto"
 ENV TP_SIZE="1"
 ENV QUANT_METHOD="w8a8_int8"

 FROM lmsysorg/sglang:latest
 ENV MODEL_ID="/repository"
+ENV MODEL_IMPL="transformers"
 ENV KV_CACHE_DTYPE="auto"
 ENV TP_SIZE="1"
 ENV QUANT_METHOD="w8a8_int8"

entrypoint.sh CHANGED Viewed

@@ -8,6 +8,7 @@ python3 -m sglang.launch_server \
   --quantization $QUANT_METHOD \
   --enable-torch-compile \
   --enable-ep-moe \
   --tool-call-parser qwen25 \
   --host 0.0.0.0 \
   --port 80

   --quantization $QUANT_METHOD \
   --enable-torch-compile \
   --enable-ep-moe \
+  --model-impl $MODEL_IMPL \
   --tool-call-parser qwen25 \
   --host 0.0.0.0 \
   --port 80