Deva1211 commited on
Commit
eb53fd2
Β·
1 Parent(s): 5bb3d19

Fixing issues

Browse files
Files changed (2) hide show
  1. app.py +21 -29
  2. config.py +9 -36
app.py CHANGED
@@ -119,17 +119,18 @@ def generate_response(prompt, max_tokens=None, temperature=None, top_p=None):
119
  try:
120
  # Format prompt based on model type
121
  if "flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower():
122
- # T5 instruction format
123
- full_prompt = f"{MEDICAL_SYSTEM_PROMPT}\n\nQuestion: {prompt}\nAnswer:"
 
124
  else:
125
  # Causal LM format
126
- full_prompt = f"{MEDICAL_SYSTEM_PROMPT}\n\nPatient/User: {prompt}\n"
127
 
128
- print(f"Full prompt: {full_prompt}")
129
 
130
- # Tokenize input with proper truncation
131
  inputs = tokenizer(
132
- full_prompt,
133
  return_tensors="pt",
134
  truncation=True,
135
  max_length=512,
@@ -140,28 +141,19 @@ def generate_response(prompt, max_tokens=None, temperature=None, top_p=None):
140
  device = next(model.parameters()).device
141
  inputs = {k: v.to(device) for k, v in inputs.items()}
142
 
143
- # Generation parameters - different for T5 vs causal models
144
- if "flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower():
145
- # T5 seq2seq generation parameters
146
- generation_kwargs = {
147
- "max_new_tokens": min(max_tokens, 100),
148
- "temperature": temperature,
149
- "top_p": top_p,
150
- "do_sample": GENERATION_DEFAULTS["do_sample"],
151
- "repetition_penalty": GENERATION_DEFAULTS["repetition_penalty"],
152
- "early_stopping": True
153
- }
154
- else:
155
- # Causal LM generation parameters
156
- generation_kwargs = {
157
- "max_new_tokens": min(max_tokens, 1024),
158
- "temperature": temperature,
159
- "top_p": top_p,
160
- "do_sample": GENERATION_DEFAULTS["do_sample"],
161
- "pad_token_id": tokenizer.eos_token_id,
162
- "repetition_penalty": GENERATION_DEFAULTS["repetition_penalty"],
163
- "no_repeat_ngram_size": GENERATION_DEFAULTS["no_repeat_ngram_size"]
164
- }
165
 
166
  print(f"Generating with kwargs: {generation_kwargs}")
167
 
@@ -183,7 +175,7 @@ def generate_response(prompt, max_tokens=None, temperature=None, top_p=None):
183
  else:
184
  # Causal models generate prompt + answer, need to remove prompt
185
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
186
- response = full_response.replace(full_prompt, "").strip()
187
 
188
  print(f"Generated response: {response}")
189
 
 
119
  try:
120
  # Format prompt based on model type
121
  if "flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower():
122
+ # Use a concise instruction prefix for T5
123
+ instruction = "You are a friendly medical assistant. Answer with short, clear health info. Use emojis like 😊. For serious issues, suggest seeing a doctor."
124
+ full_input = f"{instruction}\nQuestion: {prompt} Answer:"
125
  else:
126
  # Causal LM format
127
+ full_input = f"{MEDICAL_SYSTEM_PROMPT}\n\nPatient/User: {prompt}\n"
128
 
129
+ print(f"Full input: {full_input}")
130
 
131
+ # Tokenize input with proper truncation (reduced max_length for T5)
132
  inputs = tokenizer(
133
+ full_input,
134
  return_tensors="pt",
135
  truncation=True,
136
  max_length=512,
 
141
  device = next(model.parameters()).device
142
  inputs = {k: v.to(device) for k, v in inputs.items()}
143
 
144
+ # Generation parameters - optimized for T5
145
+ generation_kwargs = {
146
+ "max_new_tokens": min(max_tokens, 256), # Reduced to 256 for control
147
+ "temperature": temperature,
148
+ "top_p": top_p,
149
+ "do_sample": GENERATION_DEFAULTS["do_sample"],
150
+ "repetition_penalty": GENERATION_DEFAULTS["repetition_penalty"],
151
+ "no_repeat_ngram_size": GENERATION_DEFAULTS["no_repeat_ngram_size"]
152
+ }
153
+
154
+ # Add pad_token_id for non-T5 models
155
+ if not ("flan-t5" in current_model_name.lower() or "t5" in current_model_name.lower()):
156
+ generation_kwargs["pad_token_id"] = tokenizer.eos_token_id
 
 
 
 
 
 
 
 
 
157
 
158
  print(f"Generating with kwargs: {generation_kwargs}")
159
 
 
175
  else:
176
  # Causal models generate prompt + answer, need to remove prompt
177
  full_response = tokenizer.decode(outputs[0], skip_special_tokens=True)
178
+ response = full_response.replace(full_input, "").strip()
179
 
180
  print(f"Generated response: {response}")
181
 
config.py CHANGED
@@ -16,8 +16,8 @@ MODEL_CONFIGS = {
16
  }
17
  }
18
 
19
- # Default model to use - lightweight for 16GB memory limit
20
- DEFAULT_MODEL = "flan_t5_small"
21
 
22
  # Model loading settings (optimized for CPU)
23
  MODEL_SETTINGS = {
@@ -29,45 +29,18 @@ MODEL_SETTINGS = {
29
  "device_map": "cpu" # Force CPU to avoid device mapping issues
30
  }
31
 
32
- # Generation settings (constrained for better output)
33
  GENERATION_DEFAULTS = {
34
- "max_new_tokens": 100, # Reduce to limit response length
35
- "temperature": 0.7, # Moderate creativity
36
- "top_p": 0.9, # Focus on relevant tokens
37
  "do_sample": True,
38
- "repetition_penalty": 1.5, # Prevent repetition
39
  "no_repeat_ngram_size": 3
40
  }
41
 
42
- # Medical system prompt from CareConnect - detailed specifications
43
- MEDICAL_SYSTEM_PROMPT = """You are a friendly and smart medical assistant. Your job is to give short, clear, and helpful health information.
44
-
45
- Your answers should:
46
- - Stay focused. No long essays or extra fluff.
47
- - Give basic helpful steps for common symptoms like fever, cough, or headache (e.g., rest, drink fluids, take paracetamol if needed).
48
- - For any serious or unclear issues, remind the user to see a doctor β€” but do it briefly and naturally.
49
- - Keep responses concise and under 4 sentences when possible.
50
-
51
- Tone:
52
- - Friendly, supportive, and calm.
53
- - No robotic warnings unless needed. Keep it real and human.
54
- - Use emojis like 😊 or πŸ‘ occasionally to appear friendly.
55
-
56
- Important rules:
57
- - NEVER include text in parentheses in your responses.
58
- - NEVER include any meta-instructions in your responses.
59
- - NEVER include reminders about what you should do in future responses.
60
- - DO NOT include phrases like "We're here to help" or "I'm just an AI".
61
- - DO NOT include any text that instructs you what to do or how to behave.
62
- - DO NOT include any sentences that start with "If the user asks..." or "Remember..."
63
- - DO NOT include "(smile)" - instead, use actual emojis like 😊 or πŸ‘ when appropriate.
64
- - DO NOT include numbered references like [1], [2], etc. in your responses.
65
- - DO NOT include any text that explains what your response is doing.
66
- - DO NOT include "user:" or "assistant:" prefixes in your responses.
67
- - DO NOT include hypothetical user questions in your responses.
68
- - DO NOT refuse to answer harmless non-medical questions like jokes or general knowledge.
69
- - Don't give exact dosages or diagnoses.
70
- - Be consistent in your responses regardless of the user's role."""
71
 
72
  # UI settings
73
  UI_CONFIG = {
 
16
  }
17
  }
18
 
19
+ # Default model to use - reliable for medical chat
20
+ DEFAULT_MODEL = "dialogpt_medium"
21
 
22
  # Model loading settings (optimized for CPU)
23
  MODEL_SETTINGS = {
 
29
  "device_map": "cpu" # Force CPU to avoid device mapping issues
30
  }
31
 
32
+ # Generation settings (optimized for T5 output)
33
  GENERATION_DEFAULTS = {
34
+ "max_new_tokens": 256,
35
+ "temperature": 0.7,
36
+ "top_p": 0.9,
37
  "do_sample": True,
38
+ "repetition_penalty": 1.5,
39
  "no_repeat_ngram_size": 3
40
  }
41
 
42
+ # Simplified medical prompt for T5
43
+ MEDICAL_SYSTEM_PROMPT = "You are a friendly medical assistant. Answer with short, clear health info. Use emojis like 😊. For serious issues, suggest seeing a doctor."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
  # UI settings
46
  UI_CONFIG = {