Spaces:
Sleeping
Sleeping
Fixing issues
Browse files
app.py
CHANGED
|
@@ -119,17 +119,18 @@ def generate_response(prompt, max_tokens=None, temperature=None, top_p=None):
|
|
| 119 |
try:
|
| 120 |
# Format prompt based on model type
|
| 121 |
if "flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower():
|
| 122 |
-
#
|
| 123 |
-
|
|
|
|
| 124 |
else:
|
| 125 |
# Causal LM format
|
| 126 |
-
|
| 127 |
|
| 128 |
-
print(f"Full
|
| 129 |
|
| 130 |
-
# Tokenize input with proper truncation
|
| 131 |
inputs = tokenizer(
|
| 132 |
-
|
| 133 |
return_tensors="pt",
|
| 134 |
truncation=True,
|
| 135 |
max_length=512,
|
|
@@ -140,28 +141,19 @@ def generate_response(prompt, max_tokens=None, temperature=None, top_p=None):
|
|
| 140 |
device = next(model.parameters()).device
|
| 141 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 142 |
|
| 143 |
-
# Generation parameters -
|
| 144 |
-
|
| 145 |
-
#
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
generation_kwargs = {
|
| 157 |
-
"max_new_tokens": min(max_tokens, 1024),
|
| 158 |
-
"temperature": temperature,
|
| 159 |
-
"top_p": top_p,
|
| 160 |
-
"do_sample": GENERATION_DEFAULTS["do_sample"],
|
| 161 |
-
"pad_token_id": tokenizer.eos_token_id,
|
| 162 |
-
"repetition_penalty": GENERATION_DEFAULTS["repetition_penalty"],
|
| 163 |
-
"no_repeat_ngram_size": GENERATION_DEFAULTS["no_repeat_ngram_size"]
|
| 164 |
-
}
|
| 165 |
|
| 166 |
print(f"Generating with kwargs: {generation_kwargs}")
|
| 167 |
|
|
@@ -183,7 +175,7 @@ def generate_response(prompt, max_tokens=None, temperature=None, top_p=None):
|
|
| 183 |
else:
|
| 184 |
# Causal models generate prompt + answer, need to remove prompt
|
| 185 |
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 186 |
-
response = full_response.replace(
|
| 187 |
|
| 188 |
print(f"Generated response: {response}")
|
| 189 |
|
|
|
|
| 119 |
try:
|
| 120 |
# Format prompt based on model type
|
| 121 |
if "flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower():
|
| 122 |
+
# Use a concise instruction prefix for T5
|
| 123 |
+
instruction = "You are a friendly medical assistant. Answer with short, clear health info. Use emojis like π. For serious issues, suggest seeing a doctor."
|
| 124 |
+
full_input = f"{instruction}\nQuestion: {prompt} Answer:"
|
| 125 |
else:
|
| 126 |
# Causal LM format
|
| 127 |
+
full_input = f"{MEDICAL_SYSTEM_PROMPT}\n\nPatient/User: {prompt}\n"
|
| 128 |
|
| 129 |
+
print(f"Full input: {full_input}")
|
| 130 |
|
| 131 |
+
# Tokenize input with proper truncation (reduced max_length for T5)
|
| 132 |
inputs = tokenizer(
|
| 133 |
+
full_input,
|
| 134 |
return_tensors="pt",
|
| 135 |
truncation=True,
|
| 136 |
max_length=512,
|
|
|
|
| 141 |
device = next(model.parameters()).device
|
| 142 |
inputs = {k: v.to(device) for k, v in inputs.items()}
|
| 143 |
|
| 144 |
+
# Generation parameters - optimized for T5
|
| 145 |
+
generation_kwargs = {
|
| 146 |
+
"max_new_tokens": min(max_tokens, 256), # Reduced to 256 for control
|
| 147 |
+
"temperature": temperature,
|
| 148 |
+
"top_p": top_p,
|
| 149 |
+
"do_sample": GENERATION_DEFAULTS["do_sample"],
|
| 150 |
+
"repetition_penalty": GENERATION_DEFAULTS["repetition_penalty"],
|
| 151 |
+
"no_repeat_ngram_size": GENERATION_DEFAULTS["no_repeat_ngram_size"]
|
| 152 |
+
}
|
| 153 |
+
|
| 154 |
+
# Add pad_token_id for non-T5 models
|
| 155 |
+
if not ("flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower()):
|
| 156 |
+
generation_kwargs["pad_token_id"] = tokenizer.eos_token_id
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
print(f"Generating with kwargs: {generation_kwargs}")
|
| 159 |
|
|
|
|
| 175 |
else:
|
| 176 |
# Causal models generate prompt + answer, need to remove prompt
|
| 177 |
full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
| 178 |
+
response = full_response.replace(full_input, "").strip()
|
| 179 |
|
| 180 |
print(f"Generated response: {response}")
|
| 181 |
|
config.py
CHANGED
|
@@ -16,8 +16,8 @@ MODEL_CONFIGS = {
|
|
| 16 |
}
|
| 17 |
}
|
| 18 |
|
| 19 |
-
# Default model to use -
|
| 20 |
-
DEFAULT_MODEL = "
|
| 21 |
|
| 22 |
# Model loading settings (optimized for CPU)
|
| 23 |
MODEL_SETTINGS = {
|
|
@@ -29,45 +29,18 @@ MODEL_SETTINGS = {
|
|
| 29 |
"device_map": "cpu" # Force CPU to avoid device mapping issues
|
| 30 |
}
|
| 31 |
|
| 32 |
-
# Generation settings (
|
| 33 |
GENERATION_DEFAULTS = {
|
| 34 |
-
"max_new_tokens":
|
| 35 |
-
"temperature": 0.7,
|
| 36 |
-
"top_p": 0.9,
|
| 37 |
"do_sample": True,
|
| 38 |
-
"repetition_penalty": 1.5,
|
| 39 |
"no_repeat_ngram_size": 3
|
| 40 |
}
|
| 41 |
|
| 42 |
-
#
|
| 43 |
-
MEDICAL_SYSTEM_PROMPT = "
|
| 44 |
-
|
| 45 |
-
Your answers should:
|
| 46 |
-
- Stay focused. No long essays or extra fluff.
|
| 47 |
-
- Give basic helpful steps for common symptoms like fever, cough, or headache (e.g., rest, drink fluids, take paracetamol if needed).
|
| 48 |
-
- For any serious or unclear issues, remind the user to see a doctor β but do it briefly and naturally.
|
| 49 |
-
- Keep responses concise and under 4 sentences when possible.
|
| 50 |
-
|
| 51 |
-
Tone:
|
| 52 |
-
- Friendly, supportive, and calm.
|
| 53 |
-
- No robotic warnings unless needed. Keep it real and human.
|
| 54 |
-
- Use emojis like π or π occasionally to appear friendly.
|
| 55 |
-
|
| 56 |
-
Important rules:
|
| 57 |
-
- NEVER include text in parentheses in your responses.
|
| 58 |
-
- NEVER include any meta-instructions in your responses.
|
| 59 |
-
- NEVER include reminders about what you should do in future responses.
|
| 60 |
-
- DO NOT include phrases like "We're here to help" or "I'm just an AI".
|
| 61 |
-
- DO NOT include any text that instructs you what to do or how to behave.
|
| 62 |
-
- DO NOT include any sentences that start with "If the user asks..." or "Remember..."
|
| 63 |
-
- DO NOT include "(smile)" - instead, use actual emojis like π or π when appropriate.
|
| 64 |
-
- DO NOT include numbered references like [1], [2], etc. in your responses.
|
| 65 |
-
- DO NOT include any text that explains what your response is doing.
|
| 66 |
-
- DO NOT include "user:" or "assistant:" prefixes in your responses.
|
| 67 |
-
- DO NOT include hypothetical user questions in your responses.
|
| 68 |
-
- DO NOT refuse to answer harmless non-medical questions like jokes or general knowledge.
|
| 69 |
-
- Don't give exact dosages or diagnoses.
|
| 70 |
-
- Be consistent in your responses regardless of the user's role."""
|
| 71 |
|
| 72 |
# UI settings
|
| 73 |
UI_CONFIG = {
|
|
|
|
| 16 |
}
|
| 17 |
}
|
| 18 |
|
| 19 |
+
# Default model to use - reliable for medical chat
|
| 20 |
+
DEFAULT_MODEL = "dialogpt_medium"
|
| 21 |
|
| 22 |
# Model loading settings (optimized for CPU)
|
| 23 |
MODEL_SETTINGS = {
|
|
|
|
| 29 |
"device_map": "cpu" # Force CPU to avoid device mapping issues
|
| 30 |
}
|
| 31 |
|
| 32 |
+
# Generation settings (optimized for T5 output)
|
| 33 |
GENERATION_DEFAULTS = {
|
| 34 |
+
"max_new_tokens": 256,
|
| 35 |
+
"temperature": 0.7,
|
| 36 |
+
"top_p": 0.9,
|
| 37 |
"do_sample": True,
|
| 38 |
+
"repetition_penalty": 1.5,
|
| 39 |
"no_repeat_ngram_size": 3
|
| 40 |
}
|
| 41 |
|
| 42 |
+
# Simplified medical prompt for T5
|
| 43 |
+
MEDICAL_SYSTEM_PROMPT = "You are a friendly medical assistant. Answer with short, clear health info. Use emojis like π. For serious issues, suggest seeing a doctor."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 44 |
|
| 45 |
# UI settings
|
| 46 |
UI_CONFIG = {
|