Spaces:

Deva1211
/

medical_model

Sleeping

App Files Files Community

Deva1211 commited on Jul 27

Commit

eb53fd2

1 Parent(s): 5bb3d19

Fixing issues

Browse files

Files changed (2) hide show

app.py +21 -29
config.py +9 -36

app.py CHANGED Viewed

@@ -119,17 +119,18 @@ def generate_response(prompt, max_tokens=None, temperature=None, top_p=None):
     try:
         # Format prompt based on model type
         if "flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower():
-            # T5 instruction format
-            full_prompt = f"{MEDICAL_SYSTEM_PROMPT}\n\nQuestion: {prompt}\nAnswer:"
         else:
             # Causal LM format
-            full_prompt = f"{MEDICAL_SYSTEM_PROMPT}\n\nPatient/User: {prompt}\n"
-        print(f"Full prompt: {full_prompt}")
-        # Tokenize input with proper truncation
         inputs = tokenizer(
-            full_prompt,
             return_tensors="pt",
             truncation=True,
             max_length=512,
@@ -140,28 +141,19 @@ def generate_response(prompt, max_tokens=None, temperature=None, top_p=None):
         device = next(model.parameters()).device
         inputs = {k: v.to(device) for k, v in inputs.items()}
-        # Generation parameters - different for T5 vs causal models
-        if "flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower():
-            # T5 seq2seq generation parameters
-            generation_kwargs = {
-                "max_new_tokens": min(max_tokens, 100),
-                "temperature": temperature,
-                "top_p": top_p,
-                "do_sample": GENERATION_DEFAULTS["do_sample"],
-                "repetition_penalty": GENERATION_DEFAULTS["repetition_penalty"],
-                "early_stopping": True
-            }
-        else:
-            # Causal LM generation parameters
-            generation_kwargs = {
-                "max_new_tokens": min(max_tokens, 1024),
-                "temperature": temperature,
-                "top_p": top_p,
-                "do_sample": GENERATION_DEFAULTS["do_sample"],
-                "pad_token_id": tokenizer.eos_token_id,
-                "repetition_penalty": GENERATION_DEFAULTS["repetition_penalty"],
-                "no_repeat_ngram_size": GENERATION_DEFAULTS["no_repeat_ngram_size"]
-            }
         print(f"Generating with kwargs: {generation_kwargs}")
@@ -183,7 +175,7 @@ def generate_response(prompt, max_tokens=None, temperature=None, top_p=None):
         else:
             # Causal models generate prompt + answer, need to remove prompt
             full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            response = full_response.replace(full_prompt, "").strip()
         print(f"Generated response: {response}")

     try:
         # Format prompt based on model type
         if "flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower():
+            # Use a concise instruction prefix for T5
+            instruction = "You are a friendly medical assistant. Answer with short, clear health info. Use emojis like 😊. For serious issues, suggest seeing a doctor."
+            full_input = f"{instruction}\nQuestion: {prompt} Answer:"
         else:
             # Causal LM format
+            full_input = f"{MEDICAL_SYSTEM_PROMPT}\n\nPatient/User: {prompt}\n"
+        print(f"Full input: {full_input}")
+        # Tokenize input with proper truncation (reduced max_length for T5)
         inputs = tokenizer(
+            full_input,
             return_tensors="pt",
             truncation=True,
             max_length=512,
         device = next(model.parameters()).device
         inputs = {k: v.to(device) for k, v in inputs.items()}
+        # Generation parameters - optimized for T5
+        generation_kwargs = {
+            "max_new_tokens": min(max_tokens, 256),  # Reduced to 256 for control
+            "temperature": temperature,
+            "top_p": top_p,
+            "do_sample": GENERATION_DEFAULTS["do_sample"],
+            "repetition_penalty": GENERATION_DEFAULTS["repetition_penalty"],
+            "no_repeat_ngram_size": GENERATION_DEFAULTS["no_repeat_ngram_size"]
+        }
+        # Add pad_token_id for non-T5 models
+        if not ("flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower()):
+            generation_kwargs["pad_token_id"] = tokenizer.eos_token_id
         print(f"Generating with kwargs: {generation_kwargs}")
         else:
             # Causal models generate prompt + answer, need to remove prompt
             full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+            response = full_response.replace(full_input, "").strip()
         print(f"Generated response: {response}")

config.py CHANGED Viewed

@@ -16,8 +16,8 @@ MODEL_CONFIGS = {
     }
 }
-# Default model to use - lightweight for 16GB memory limit
-DEFAULT_MODEL = "flan_t5_small"
 # Model loading settings (optimized for CPU)
 MODEL_SETTINGS = {
@@ -29,45 +29,18 @@ MODEL_SETTINGS = {
     "device_map": "cpu"  # Force CPU to avoid device mapping issues
 }
-# Generation settings (constrained for better output)
 GENERATION_DEFAULTS = {
-    "max_new_tokens": 100,  # Reduce to limit response length
-    "temperature": 0.7,     # Moderate creativity
-    "top_p": 0.9,          # Focus on relevant tokens
     "do_sample": True,
-    "repetition_penalty": 1.5,  # Prevent repetition
     "no_repeat_ngram_size": 3
 }
-# Medical system prompt from CareConnect - detailed specifications
-MEDICAL_SYSTEM_PROMPT = """You are a friendly and smart medical assistant. Your job is to give short, clear, and helpful health information.
-Your answers should:
-- Stay focused. No long essays or extra fluff.
-- Give basic helpful steps for common symptoms like fever, cough, or headache (e.g., rest, drink fluids, take paracetamol if needed).
-- For any serious or unclear issues, remind the user to see a doctor — but do it briefly and naturally.
-- Keep responses concise and under 4 sentences when possible.
-Tone:
-- Friendly, supportive, and calm.
-- No robotic warnings unless needed. Keep it real and human.
-- Use emojis like 😊 or 👍 occasionally to appear friendly.
-Important rules:
-- NEVER include text in parentheses in your responses.
-- NEVER include any meta-instructions in your responses.
-- NEVER include reminders about what you should do in future responses.
-- DO NOT include phrases like "We're here to help" or "I'm just an AI".
-- DO NOT include any text that instructs you what to do or how to behave.
-- DO NOT include any sentences that start with "If the user asks..." or "Remember..."
-- DO NOT include "(smile)" - instead, use actual emojis like 😊 or 👍 when appropriate.
-- DO NOT include numbered references like [1], [2], etc. in your responses.
-- DO NOT include any text that explains what your response is doing.
-- DO NOT include "user:" or "assistant:" prefixes in your responses.
-- DO NOT include hypothetical user questions in your responses.
-- DO NOT refuse to answer harmless non-medical questions like jokes or general knowledge.
-- Don't give exact dosages or diagnoses.
-- Be consistent in your responses regardless of the user's role."""
 # UI settings
 UI_CONFIG = {

     }
 }
+# Default model to use - reliable for medical chat
+DEFAULT_MODEL = "dialogpt_medium"
 # Model loading settings (optimized for CPU)
 MODEL_SETTINGS = {
     "device_map": "cpu"  # Force CPU to avoid device mapping issues
 }
+# Generation settings (optimized for T5 output)
 GENERATION_DEFAULTS = {
+    "max_new_tokens": 256,
+    "temperature": 0.7,
+    "top_p": 0.9,
     "do_sample": True,
+    "repetition_penalty": 1.5,
     "no_repeat_ngram_size": 3
 }
+# Simplified medical prompt for T5
+MEDICAL_SYSTEM_PROMPT = "You are a friendly medical assistant. Answer with short, clear health info. Use emojis like 😊. For serious issues, suggest seeing a doctor."
 # UI settings
 UI_CONFIG = {