import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
# --- MODEL CONFIG ---
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3" # You can swap for Llama 3, Qwen, etc.
# For better speed on free GPUs, use a quantized version like:
# MODEL_NAME = "TheBloke/Mistral-7B-Instruct-v0.3-GGUF" # GGUF + llama.cpp (requires different loader)
# But for simplicity & HF compatibility, we'll use the HF version with 4-bit quantization
# Load tokenizer and model with 4-bit quantization for low-memory usage
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
device_map="auto",
torch_dtype=torch.float16,
load_in_4bit=True,
trust_remote_code=True,
)
# System prompt to guide behavior (like HuggingChat)
SYSTEM_PROMPT = """You are RunAshChat, a helpful, honest, and harmless AI assistant.
You are open-source, privacy-respecting, and do not store any user data.
Answer clearly, concisely, and thoughtfully. Avoid harmful, unethical, or biased content.
If you don't know something, say so."""
def format_prompt(message, history):
# Format for Mistral-Instruct: [INST] prompt [/INST]
full_prompt = f"[INST] {SYSTEM_PROMPT}\n\n"
for user_msg, bot_msg in history:
full_prompt += f"{user_msg} [/INST] {bot_msg}[INST] "
full_prompt += f"{message} [/INST]"
return full_prompt
def respond(message, history):
prompt = format_prompt(message, history)
inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
with torch.no_grad():
outputs = model.generate(
**inputs,
max_new_tokens=512,
temperature=0.7,
top_p=0.9,
repetition_penalty=1.1,
do_sample=True,
pad_token_id=tokenizer.eos_token_id,
)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
# Extract only the assistant's reply (after last [/INST])
response = response.split("[/INST]")[-1].strip()
return response
# --- GRADIO INTERFACE ---
with gr.Blocks(theme=gr.themes.Soft(), title="RunAshChat") as demo:
gr.Markdown("""
# ๐ RunAshChat
*Your open-source, privacy-first AI chat companion โ inspired by HuggingChat.*
""")
chatbot = gr.Chatbot(
height=600,
bubble_full_width=False,
avatar_images=(None, "https://huggingface.co/datasets/huggingface/branding/resolve/main/huggingface-logo.svg")
)
msg = gr.Textbox(
placeholder="Ask me anything... (e.g., 'Explain quantum computing like I'm 10')",
label="Your message",
container=False
)
with gr.Row():
clear = gr.Button("๐งน Clear Chat")
export = gr.Button("๐พ Export Chat")
def clear_chat():
return None, ""
def export_chat(chat_history):
if not chat_history:
return "No conversation to export."
export_text = "\n\n".join([f"๐ค You: {q}\n๐ค RunAshChat: {a}" for q, a in chat_history])
return export_text
msg.submit(respond, [msg, chatbot], [chatbot])
clear.click(clear_chat, None, [chatbot, msg])
export.click(export_chat, chatbot, gr.Textbox(label="Exported Chat", lines=15))
demo.launch()