geoffmunn's picture
Add Q2–Q8_0 quantized models with per-model cards, MODELFILE, CLI examples, and auto-upload
8653bef verified
raw
history blame contribute delete
518 Bytes
# MODELFILE for Qwen3Guard-Stream-8B
# Used by LM Studio, OpenWebUI, etc.
context_length: 8192
embedding: false
f16: cpu
# Prompt template for real-time streaming classification
prompt_template: >-
AnalyzeStream: {prompt}
# Output format: {"safe": true/false, "categories": [...], "partial": bool, "confidence": float}
# Default parameters for reliable streaming classification
temperature: 0.0
top_p: 1.0
top_k: 40
repeat_penalty: 1.0
num_keep: 1
max_tokens: 128
# Stop tokens (optional)
stop: "{"
stop: "}"