Spaces:
Paused
Paused
fix(runner.sh): explicitly disabling enforce_eager
Browse files- run-llama.sh +1 -0
- run-sailor.sh +1 -0
- runner.sh +1 -0
run-llama.sh
CHANGED
|
@@ -27,4 +27,5 @@ python -u /app/openai_compatible_api_server.py \
|
|
| 27 |
--max-num-batched-tokens 32768 \
|
| 28 |
--max-model-len 32768 \
|
| 29 |
--dtype float16 \
|
|
|
|
| 30 |
--gpu-memory-utilization 0.85
|
|
|
|
| 27 |
--max-num-batched-tokens 32768 \
|
| 28 |
--max-model-len 32768 \
|
| 29 |
--dtype float16 \
|
| 30 |
+
--enforce-eager false \
|
| 31 |
--gpu-memory-utilization 0.85
|
run-sailor.sh
CHANGED
|
@@ -29,4 +29,5 @@ python -u /app/openai_compatible_api_server.py \
|
|
| 29 |
--max-num-batched-tokens 32768 \
|
| 30 |
--max-model-len 32768 \
|
| 31 |
--dtype float16 \
|
|
|
|
| 32 |
--gpu-memory-utilization 0.85
|
|
|
|
| 29 |
--max-num-batched-tokens 32768 \
|
| 30 |
--max-model-len 32768 \
|
| 31 |
--dtype float16 \
|
| 32 |
+
--enforce-eager false \
|
| 33 |
--gpu-memory-utilization 0.85
|
runner.sh
CHANGED
|
@@ -51,4 +51,5 @@ python -u /app/openai_compatible_api_server.py \
|
|
| 51 |
--max-num-batched-tokens 32768 \
|
| 52 |
--max-model-len 32768 \
|
| 53 |
--dtype float16 \
|
|
|
|
| 54 |
--gpu-memory-utilization 0.9
|
|
|
|
| 51 |
--max-num-batched-tokens 32768 \
|
| 52 |
--max-model-len 32768 \
|
| 53 |
--dtype float16 \
|
| 54 |
+
--enforce-eager false \
|
| 55 |
--gpu-memory-utilization 0.9
|