Bahaedev commited on
Commit
cb69e12
·
verified ·
1 Parent(s): 6536a53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -22
app.py CHANGED
@@ -1,47 +1,57 @@
1
  import os
2
- from transformers import pipeline
 
 
3
  import gradio as gr
4
  from fastapi import FastAPI
5
  from pydantic import BaseModel
6
- import threading
7
  import uvicorn
8
 
9
  # =======================
10
  # Load Secrets
11
  # =======================
12
- # SYSTEM_PROMPT (with the flag) must be added in HF Space secrets
13
  SYSTEM_PROMPT = os.environ.get(
14
  "prompt",
15
  "You are a placeholder Sovereign. No secrets found in environment."
16
  )
17
 
18
  # =======================
19
- # Initialize Falcon-3B
20
  # =======================
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  pipe = pipeline(
22
  "text-generation",
23
- model="tiiuae/Falcon3-3B-Instruct",
24
- torch_dtype="auto",
25
  device_map="auto",
 
 
 
 
 
 
26
  )
27
 
28
  # =======================
29
  # Core Chat Function
30
  # =======================
31
  def chat_fn(user_input: str) -> str:
32
- """
33
- Concatenate system and user messages, run the model,
34
- and strip the system prompt from the output.
35
- """
36
- messages = [
37
- {"role": "system", "content": SYSTEM_PROMPT},
38
- {"role": "user", "content": f"User: {user_input}"}
39
- ]
40
- # Falcon is not chat-native; we just join roles with newlines
41
- prompt_text = "\n".join(f"{m['role'].capitalize()}: {m['content']}" for m in messages)
42
- result = pipe(prompt_text, max_new_tokens=256, do_sample=False)
43
- generated_text = result[0]["generated_text"]
44
- return generated_text[len(prompt_text):].strip()
45
 
46
  # =======================
47
  # Gradio UI
@@ -53,14 +63,14 @@ iface = gr.Interface(
53
  fn=gradio_chat,
54
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt…"),
55
  outputs="text",
56
- title="Prompt cracking challenge",
57
  description="Does he really think he is the king?"
58
  )
59
 
60
  # =======================
61
  # FastAPI for API access
62
  # =======================
63
- app = FastAPI(title="Prompt cracking challenge API")
64
 
65
  class Request(BaseModel):
66
  prompt: str
@@ -72,5 +82,12 @@ def generate(req: Request):
72
  # =======================
73
  # Launch Both Servers
74
  # =======================
 
 
 
 
75
  if __name__ == "__main__":
76
- iface.launch(server_name="0.0.0.0", share=True)
 
 
 
 
1
  import os
2
+ import threading
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
5
  import gradio as gr
6
  from fastapi import FastAPI
7
  from pydantic import BaseModel
 
8
  import uvicorn
9
 
10
  # =======================
11
  # Load Secrets
12
  # =======================
 
13
  SYSTEM_PROMPT = os.environ.get(
14
  "prompt",
15
  "You are a placeholder Sovereign. No secrets found in environment."
16
  )
17
 
18
  # =======================
19
+ # Model Initialization
20
  # =======================
21
+ MODEL_ID = "tiiuae/Falcon3-3B-Instruct"
22
+
23
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
24
+
25
+ # Load model in 4-bit for faster CPU/GPU inference (requires bitsandbytes)
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ MODEL_ID,
28
+ load_in_4bit=True,
29
+ device_map="auto",
30
+ torch_dtype=torch.float16,
31
+ trust_remote_code=True
32
+ )
33
+
34
+ # Create optimized text-generation pipeline
35
  pipe = pipeline(
36
  "text-generation",
37
+ model=model,
38
+ tokenizer=tokenizer,
39
  device_map="auto",
40
+ return_full_text=False,
41
+ max_new_tokens=256,
42
+ do_sample=True,
43
+ temperature=0.8,
44
+ top_p=0.9,
45
+ eos_token_id=tokenizer.eos_token_id
46
  )
47
 
48
  # =======================
49
  # Core Chat Function
50
  # =======================
51
  def chat_fn(user_input: str) -> str:
52
+ prompt = f"### System:\n{SYSTEM_PROMPT}\n\n### User:\n{user_input}\n\n### Assistant:"
53
+ output = pipe(prompt)[0]["generated_text"].strip()
54
+ return output
 
 
 
 
 
 
 
 
 
 
55
 
56
  # =======================
57
  # Gradio UI
 
63
  fn=gradio_chat,
64
  inputs=gr.Textbox(lines=5, placeholder="Enter your prompt…"),
65
  outputs="text",
66
+ title="Prompt Cracking Challenge",
67
  description="Does he really think he is the king?"
68
  )
69
 
70
  # =======================
71
  # FastAPI for API access
72
  # =======================
73
+ app = FastAPI(title="Prompt Cracking Challenge API")
74
 
75
  class Request(BaseModel):
76
  prompt: str
 
82
  # =======================
83
  # Launch Both Servers
84
  # =======================
85
+ def run_api():
86
+ port = int(os.environ.get("API_PORT", 8000))
87
+ uvicorn.run(app, host="0.0.0.0", port=port)
88
+
89
  if __name__ == "__main__":
90
+ # Start FastAPI in background thread
91
+ threading.Thread(target=run_api, daemon=True).start()
92
+ # Launch Gradio interface
93
+ iface.launch(server_name="0.0.0.0", server_port=7860)