Spaces:

Shreyas94
/

Sentinel02

Sleeping

App Files Files Community

Shreyas94 commited on Aug 4

Commit

8675311

verified ·

1 Parent(s): cf5225f

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -17

app.py CHANGED Viewed

@@ -594,16 +594,16 @@ class LLMSummarizer:
         return """You are an expert research assistant. Your task is to analyze search results and provide a comprehensive, accurate summary that directly answers the user's query.
 CRITICAL INSTRUCTIONS:
-1. ONLY use information that is directly relevant to the user's query
-2. If the search results don't contain relevant information, explicitly state this
-3. Don't make up information or provide generic advice
-4. Synthesize information from multiple sources when available
 5. Include specific facts, dates, numbers, and quotes when present
 6. If information is contradictory between sources, mention this
 7. Cite sources by mentioning the publication or website name
-8. Be specific and detailed rather than vague
-If the search results are not relevant to the query, respond with: "The search results do not contain sufficient relevant information to answer your query about [topic]. The results primarily contained [brief description of what was actually found]."
 Format your response as a comprehensive summary, not bullet points."""
@@ -612,10 +612,35 @@ Format your response as a comprehensive summary, not bullet points."""
         valid_results = []
         validation_info = []
-        query_keywords = set(query.lower().split())
         for result in search_results:
-            if not result.content or len(result.content.strip()) < 100:
                 validation_info.append(f"Skipped '{result.title}' - insufficient content")
                 continue
@@ -623,23 +648,45 @@ Format your response as a comprehensive summary, not bullet points."""
             content_lower = result.content.lower()
             title_lower = result.title.lower()
             snippet_lower = result.snippet.lower()
-            # Count relevant keywords
             relevant_score = 0
-            for keyword in query_keywords:
-                if len(keyword) > 2:  # Skip very short words
                     if keyword in content_lower:
                         relevant_score += 2
                     elif keyword in title_lower:
-                        relevant_score += 1
                     elif keyword in snippet_lower:
-                        relevant_score += 0.5
-            if relevant_score > 0:
                 valid_results.append(result)
-                validation_info.append(f"✓ '{result.title}' - relevance score: {relevant_score}")
             else:
-                validation_info.append(f"Skipped '{result.title}' - not relevant to query")
         validation_summary = "\n".join(validation_info)
         return valid_results, validation_summary
@@ -655,7 +702,7 @@ Format your response as a comprehensive summary, not bullet points."""
 VALIDATION RESULTS:
 {validation_info}
-No search results contained relevant content for this query. Please provide a response indicating that insufficient relevant information was found."""
         content_parts = [f'User Query: "{query}"\n']
         content_parts.append(f"Number of relevant sources found: {len(valid_results)}\n")

         return """You are an expert research assistant. Your task is to analyze search results and provide a comprehensive, accurate summary that directly answers the user's query.
 CRITICAL INSTRUCTIONS:
+1. Analyze ALL provided content carefully - even if it seems only tangentially related
+2. Look for connections between the query and the content, even if not immediately obvious
+3. If content is about a parent company/organization mentioned in the query, include relevant information
+4. Extract and synthesize any information that could be relevant to answering the user's question
 5. Include specific facts, dates, numbers, and quotes when present
 6. If information is contradictory between sources, mention this
 7. Cite sources by mentioning the publication or website name
+8. Be thorough and detailed rather than dismissive
+ONLY state that results are not relevant if they are completely unrelated to any aspect of the query. If there is ANY connection (like parent company info, related business segments, etc.), include that information.
 Format your response as a comprehensive summary, not bullet points."""
         valid_results = []
         validation_info = []
+        # More intelligent keyword extraction
+        query_lower = query.lower()
+        # Extract key entities and terms
+        important_keywords = []
+        # Split query into words and extract meaningful terms
+        words = query_lower.split()
+        for word in words:
+            if len(word) > 2 and word not in ['news', 'latest', 'recent', 'update', 'information', 'about']:
+                important_keywords.append(word)
+        # Also look for multi-word entities (like company names)
+        # Extract potential company/entity names from query
+        entity_patterns = [
+            r'\b[A-Z][a-z]+ [A-Z][a-z]+(?:\s+[A-Z][a-z]+)*\b',  # Proper names
+            r'\b[A-Z]{2,}(?:\s+[A-Z][a-z]+)*\b',  # Acronyms
+        ]
+        for pattern in entity_patterns:
+            matches = re.findall(pattern, query)
+            for match in matches:
+                important_keywords.extend(match.lower().split())
+        # Remove duplicates
+        important_keywords = list(set(important_keywords))
         for result in search_results:
+            if not result.content or len(result.content.strip()) < 50:  # Lowered threshold
                 validation_info.append(f"Skipped '{result.title}' - insufficient content")
                 continue
             content_lower = result.content.lower()
             title_lower = result.title.lower()
             snippet_lower = result.snippet.lower()
+            combined_text = f"{title_lower} {snippet_lower} {content_lower}"
+            # More flexible relevance scoring
             relevant_score = 0
+            matched_keywords = []
+            for keyword in important_keywords:
+                if keyword in combined_text:
                     if keyword in content_lower:
                         relevant_score += 2
+                        matched_keywords.append(keyword)
                     elif keyword in title_lower:
+                        relevant_score += 3  # Title matches are very important
+                        matched_keywords.append(keyword)
                     elif keyword in snippet_lower:
+                        relevant_score += 1
+                        matched_keywords.append(keyword)
+            # Special handling for acronyms and company names
+            # If query contains a company acronym (like KKR), be more lenient
+            has_company_match = any(len(kw) <= 4 and kw.isupper() for kw in query.split())
+            if has_company_match:
+                relevant_score += 1  # Boost score for company-related queries
+            # Lower the threshold and accept more results
+            if relevant_score >= 1 or len(matched_keywords) >= 1:
                 valid_results.append(result)
+                validation_info.append(f"✓ '{result.title}' - score: {relevant_score}, matched: {matched_keywords}")
             else:
+                validation_info.append(f"Skipped '{result.title}' - no relevant keywords found")
+        # If we filtered out too many results, be more lenient
+        if len(valid_results) < len(search_results) * 0.3:  # If we filtered out more than 70%
+            validation_info.append("⚠️ Too many results filtered, being more lenient...")
+            # Add back results that have any content
+            for result in search_results:
+                if result not in valid_results and result.content.strip():
+                    valid_results.append(result)
+                    validation_info.append(f"✓ '{result.title}' - added back (lenient mode)")
         validation_summary = "\n".join(validation_info)
         return valid_results, validation_summary
 VALIDATION RESULTS:
 {validation_info}
+The search results did not pass the initial relevance filter, but this might be overly restrictive. Please analyze the raw content provided and extract any information that could be relevant to answering the user's query, even if the connection is not immediately obvious."""
         content_parts = [f'User Query: "{query}"\n']
         content_parts.append(f"Number of relevant sources found: {len(valid_results)}\n")