Spaces:

rammurmu
/

RunAshChat

Build error

App Files Files Community

rammurmu commited on Sep 20

Commit

7648e04

verified ·

1 Parent(s): 5f62831

Create app.py (#3)

Browse files

- Create app.py (bd39a7443a952fc8a03b34f33a5ef33e17ad2315)

Files changed (1) hide show

app.py +91 -0

app.py ADDED Viewed

	@@ -0,0 +1,91 @@

+import gradio as gr
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+# --- MODEL CONFIG ---
+MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3"  # You can swap for Llama 3, Qwen, etc.
+# For better speed on free GPUs, use a quantized version like:
+# MODEL_NAME = "TheBloke/Mistral-7B-Instruct-v0.3-GGUF"  # GGUF + llama.cpp (requires different loader)
+# But for simplicity & HF compatibility, we'll use the HF version with 4-bit quantization
+# Load tokenizer and model with 4-bit quantization for low-memory usage
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForCausalLM.from_pretrained(
+    MODEL_NAME,
+    device_map="auto",
+    torch_dtype=torch.float16,
+    load_in_4bit=True,
+    trust_remote_code=True,
+)
+# System prompt to guide behavior (like HuggingChat)
+SYSTEM_PROMPT = """You are RunAshChat, a helpful, honest, and harmless AI assistant.
+You are open-source, privacy-respecting, and do not store any user data.
+Answer clearly, concisely, and thoughtfully. Avoid harmful, unethical, or biased content.
+If you don't know something, say so."""
+def format_prompt(message, history):
+    # Format for Mistral-Instruct: [INST] prompt [/INST]
+    full_prompt = f"<s>[INST] {SYSTEM_PROMPT}\n\n"
+    for user_msg, bot_msg in history:
+        full_prompt += f"{user_msg} [/INST] {bot_msg}</s><s>[INST] "
+    full_prompt += f"{message} [/INST]"
+    return full_prompt
+def respond(message, history):
+    prompt = format_prompt(message, history)
+    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=512,
+            temperature=0.7,
+            top_p=0.9,
+            repetition_penalty=1.1,
+            do_sample=True,
+            pad_token_id=tokenizer.eos_token_id,
+        )
+    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    # Extract only the assistant's reply (after last [/INST])
+    response = response.split("[/INST]")[-1].strip()
+    return response
+# --- GRADIO INTERFACE ---
+with gr.Blocks(theme=gr.themes.Soft(), title="RunAshChat") as demo:
+    gr.Markdown("""
+    # 🚀 RunAshChat
+    *Your open-source, privacy-first AI chat companion — inspired by HuggingChat.*
+    """)
+    chatbot = gr.Chatbot(
+        height=600,
+        bubble_full_width=False,
+        avatar_images=(None, "https://huggingface.co/datasets/huggingface/branding/resolve/main/huggingface-logo.svg")
+    )
+    msg = gr.Textbox(
+        placeholder="Ask me anything... (e.g., 'Explain quantum computing like I'm 10')",
+        label="Your message",
+        container=False
+    )
+    with gr.Row():
+        clear = gr.Button("🧹 Clear Chat")
+        export = gr.Button("💾 Export Chat")
+    def clear_chat():
+        return None, ""
+    def export_chat(chat_history):
+        if not chat_history:
+            return "No conversation to export."
+        export_text = "\n\n".join([f"👤 You: {q}\n🤖 RunAshChat: {a}" for q, a in chat_history])
+        return export_text
+    msg.submit(respond, [msg, chatbot], [chatbot])
+    clear.click(clear_chat, None, [chatbot, msg])
+    export.click(export_chat, chatbot, gr.Textbox(label="Exported Chat", lines=15))
+demo.launch()