rammurmu commited on
Commit
7648e04
·
verified ·
1 Parent(s): 5f62831
Files changed (1) hide show
  1. app.py +91 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+
5
+ # --- MODEL CONFIG ---
6
+ MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.3" # You can swap for Llama 3, Qwen, etc.
7
+ # For better speed on free GPUs, use a quantized version like:
8
+ # MODEL_NAME = "TheBloke/Mistral-7B-Instruct-v0.3-GGUF" # GGUF + llama.cpp (requires different loader)
9
+ # But for simplicity & HF compatibility, we'll use the HF version with 4-bit quantization
10
+
11
+ # Load tokenizer and model with 4-bit quantization for low-memory usage
12
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
13
+ model = AutoModelForCausalLM.from_pretrained(
14
+ MODEL_NAME,
15
+ device_map="auto",
16
+ torch_dtype=torch.float16,
17
+ load_in_4bit=True,
18
+ trust_remote_code=True,
19
+ )
20
+
21
+ # System prompt to guide behavior (like HuggingChat)
22
+ SYSTEM_PROMPT = """You are RunAshChat, a helpful, honest, and harmless AI assistant.
23
+ You are open-source, privacy-respecting, and do not store any user data.
24
+ Answer clearly, concisely, and thoughtfully. Avoid harmful, unethical, or biased content.
25
+ If you don't know something, say so."""
26
+
27
+ def format_prompt(message, history):
28
+ # Format for Mistral-Instruct: [INST] prompt [/INST]
29
+ full_prompt = f"<s>[INST] {SYSTEM_PROMPT}\n\n"
30
+ for user_msg, bot_msg in history:
31
+ full_prompt += f"{user_msg} [/INST] {bot_msg}</s><s>[INST] "
32
+ full_prompt += f"{message} [/INST]"
33
+ return full_prompt
34
+
35
+ def respond(message, history):
36
+ prompt = format_prompt(message, history)
37
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
38
+
39
+ with torch.no_grad():
40
+ outputs = model.generate(
41
+ **inputs,
42
+ max_new_tokens=512,
43
+ temperature=0.7,
44
+ top_p=0.9,
45
+ repetition_penalty=1.1,
46
+ do_sample=True,
47
+ pad_token_id=tokenizer.eos_token_id,
48
+ )
49
+
50
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
51
+ # Extract only the assistant's reply (after last [/INST])
52
+ response = response.split("[/INST]")[-1].strip()
53
+ return response
54
+
55
+ # --- GRADIO INTERFACE ---
56
+ with gr.Blocks(theme=gr.themes.Soft(), title="RunAshChat") as demo:
57
+ gr.Markdown("""
58
+ # 🚀 RunAshChat
59
+ *Your open-source, privacy-first AI chat companion — inspired by HuggingChat.*
60
+ """)
61
+
62
+ chatbot = gr.Chatbot(
63
+ height=600,
64
+ bubble_full_width=False,
65
+ avatar_images=(None, "https://huggingface.co/datasets/huggingface/branding/resolve/main/huggingface-logo.svg")
66
+ )
67
+
68
+ msg = gr.Textbox(
69
+ placeholder="Ask me anything... (e.g., 'Explain quantum computing like I'm 10')",
70
+ label="Your message",
71
+ container=False
72
+ )
73
+
74
+ with gr.Row():
75
+ clear = gr.Button("🧹 Clear Chat")
76
+ export = gr.Button("💾 Export Chat")
77
+
78
+ def clear_chat():
79
+ return None, ""
80
+
81
+ def export_chat(chat_history):
82
+ if not chat_history:
83
+ return "No conversation to export."
84
+ export_text = "\n\n".join([f"👤 You: {q}\n🤖 RunAshChat: {a}" for q, a in chat_history])
85
+ return export_text
86
+
87
+ msg.submit(respond, [msg, chatbot], [chatbot])
88
+ clear.click(clear_chat, None, [chatbot, msg])
89
+ export.click(export_chat, chatbot, gr.Textbox(label="Exported Chat", lines=15))
90
+
91
+ demo.launch()