Spaces:

NLong
/

FakeNews_Detector

Running

App Files Files Community

NLong commited on Sep 28

Commit

95ca342

verified ·

1 Parent(s): c20ba74

Upload app.py

Browse files

Files changed (1) hide show

app.py +245 -42

app.py CHANGED Viewed

@@ -7,6 +7,10 @@ from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import re
 import os
 import numpy as np
 GOOGLE_API_KEY = "AIzaSyASwqVh3ELFVKH-W3WuHtmjg3XgtwjJQKg"
 SEARCH_ENGINE_ID = "f34f8a4816771488b"
@@ -15,6 +19,11 @@ MODEL_PATH = "./vietnamese_fake_news_model"
 genai.configure(api_key=GEMINI_API_KEY)
 print("Loading the DistilBERT model we trained...")
 try:
     if os.path.exists(MODEL_PATH):
@@ -44,6 +53,166 @@ except Exception as e:
         tokenizer = None
         model = None
 CREDIBLE_SOURCES = {
     'vnexpress.net': 0.95,
     'tuoitre.vn': 0.95,
@@ -219,7 +388,7 @@ def google_search(news_text):
             result = service.cse().list(
                 q=search_query,
                 cx=SEARCH_ENGINE_ID,
-                num=10
             ).execute()
             print(f"API Response keys: {list(result.keys())}")
@@ -351,6 +520,13 @@ def analyze_source_support(news_text, search_results):
 def analyze_with_gemini(news_text, search_results, distilbert_prediction, distilbert_confidence):
     """Use Gemini AI to analyze the news and compare with our model results"""
     try:
         # Try to use the latest Gemini model available
         try:
             model = genai.GenerativeModel('gemini-2.0-flash-exp')
@@ -363,11 +539,11 @@ def analyze_with_gemini(news_text, search_results, distilbert_prediction, distil
                 except:
                     model = genai.GenerativeModel('gemini-1.5-flash')
-        # Format the search results for Gemini
         search_summary = ""
         if search_results:
             search_summary = "Kết quả tìm kiếm Google:\n"
-            for i, result in enumerate(search_results[:5], 1):
                 search_summary += f"{i}. {result['title']}\n   {result['snippet']}\n   Nguồn: {result['link']}\n\n"
         else:
             search_summary = "Không tìm thấy kết quả tìm kiếm Google cho tin tức này. Điều này có thể do API bị giới hạn hoặc tin tức quá mới/chưa được đăng tải."
@@ -381,35 +557,37 @@ Bạn là một chuyên gia phân tích tin tức chuyên nghiệp. Hãy phân t
 {search_summary}
 Hãy thực hiện phân tích toàn diện theo các tiêu chí sau:
-1. **Phân tích nội dung**: Kiểm tra tính logic, mâu thuẫn, ngôn ngữ cảm xúc thái quá
-2. **Phân tích nguồn tin**: Đánh giá uy tín và độ tin cậy của nguồn
-3. **Phân tích ngữ cảnh**: So sánh với thông tin có sẵn và kiến thức thực tế
-4. **Phân tích ngôn ngữ**: Tìm dấu hiệu của tin giả như từ ngữ gây sốc, cảm xúc
-5. **Phân tích thời gian**: Kiểm tra tính hợp lý về mặt thời gian
 Trả lời theo định dạng sau (chỉ bằng tiếng Việt, viết chi tiết và chuyên nghiệp):
-**1. KẾT LUẬN:** [THẬT/GIẢ/KHÔNG XÁC ĐỊNH]
-**2. ĐỘ TIN CẬY:** [X%/Y%] (Trong đó X% là độ tin cậy tin THẬT, Y% là độ tin cậy tin GIẢ, X+Y=100%)
-**3. PHÂN TÍCH CHI TIẾT:**
-- **Nội dung:** [Phân tích chi tiết về nội dung tin tức]
-- **Nguồn tin:** [Đánh giá về nguồn và độ tin cậy]
-- **Ngữ cảnh:** [So sánh với thông tin có sẵn]
-- **Ngôn ngữ:** [Phân tích cách sử dụng từ ngữ]
-- **Thời gian:** [Kiểm tra tính hợp lý về mặt thời gian]
-**4. CÁC DẤU HIỆU CẢNH BÁO:** [Liệt kê các dấu hiệu đáng ngờ nếu có]
-**5. KHUYẾN NGHỊ CHO NGƯỜI ĐỌC:**
 - [Hướng dẫn cụ thể để kiểm chứng thông tin]
 - [Các nguồn tin đáng tin cậy để tham khảo]
 - [Cách phân biệt tin thật và tin giả]
-QUAN TRỌNG: Trong phần "ĐỘ TIN CẬY", hãy cung cấp tỷ lệ phần trăm chính xác dựa trên phân tích của bạn. Ví dụ: "95%/5%" nghĩa là 95% tin tức này là THẬT, 5% là GIẢ.
 Viết chi tiết, chuyên nghiệp và hữu ích cho người đọc.
 """
@@ -420,12 +598,12 @@ Viết chi tiết, chuyên nghiệp và hữu ích cho người đọc.
         if search_results:
             print(f"DEBUG - First search result title: {search_results[0].get('title', 'No title')}")
-        # Use settings optimized for detailed analysis
         generation_config = genai.types.GenerationConfig(
-            temperature=0.3,  # Slightly higher for more creative analysis
-            top_p=0.9,        # Allow more diverse vocabulary
-            top_k=40,         # More vocabulary choices for detailed writing
-            max_output_tokens=2000  # Allow much longer responses
         )
         response = model.generate_content(prompt, generation_config=generation_config)
         print("Gemini API response received successfully")
@@ -473,25 +651,25 @@ Viết chi tiết, chuyên nghiệp và hữu ích cho người đọc.
             if len(news_text) < 100:
                 warning_signs.append("Tin tức quá ngắn, thiếu thông tin chi tiết")
-            fallback_analysis = f"""**1. KẾT LUẬN:** {conclusion}
-**2. ĐỘ TIN CẬY:** {'5%/95%' if conclusion == 'GIẢ' else '95%/5%' if conclusion == 'THẬT' else '50%/50%'}
-**3. PHÂN TÍCH CHI TIẾT:**
-- **Nội dung:** {'Tin tức có vẻ hợp lý' if distilbert_prediction == 'REAL' else 'Tin tức có nhiều dấu hiệu đáng ngờ' if distilbert_prediction == 'FAKE' else 'Nội dung không rõ ràng'}
-- **Nguồn tin:** Google Search không khả dụng (hết quota) - không thể kiểm tra nguồn
-- **Ngữ cảnh:** Phân tích từ khóa: {confidence_boost}
-- **Ngôn ngữ:** {'Ngôn ngữ trung tính' if fake_score == real_score else 'Có dấu hiệu cảm xúc thái quá' if fake_score > real_score else 'Ngôn ngữ khách quan'}
-- **Thời gian:** Không thể xác minh do thiếu thông tin bổ sung
-**4. CÁC DẤU HIỆU CẢNH BÁO:**
 {chr(10).join([f"- {sign}" for sign in warning_signs]) if warning_signs else "- Không phát hiện dấu hiệu cảnh báo rõ ràng"}
-**5. KHUYẾN NGHỊ CHO NGƯỜI ĐỌC:**
-- **Kiểm tra nguồn:** Tìm kiếm thông tin tương tự trên các trang báo uy tín như VnExpress, Tuổi Trẻ, Thanh Niên
-- **Xác minh thời gian:** Kiểm tra xem tin tức có được đăng tải đồng thời trên nhiều nguồn không
-- **Đánh giá ngôn ngữ:** Tránh chia sẻ tin tức có ngôn ngữ cảm xúc thái quá hoặc tạo cảm giác cấp bách
-- **Lưu ý:** Do hệ thống API tạm thời không khả dụng, kết quả phân tích có thể không hoàn toàn chính xác"""
             return fallback_analysis
         # For other errors, see what models are available
@@ -513,8 +691,8 @@ def extract_gemini_percentage(gemini_analysis):
         # Look for the confidence percentage pattern
         import re
-        # Pattern to match "X%/Y%" format
-        percentage_pattern = r'độ tin cậy.*?(\d+)%/(\d+)%'
         match = re.search(percentage_pattern, gemini_lower)
         if match:
@@ -647,10 +825,10 @@ def calculate_combined_confidence(distilbert_prediction, distilbert_confidence,
     else:
         # Fallback to conclusion analysis
         conclusion_score = 0.5  # Default neutral
-        if "**kết luận:** giả" in gemini_lower or "kết luận:** fake" in gemini_lower or "kết luận:** giả" in gemini_lower:
             conclusion_score = 0.1  # Very low for FAKE
             print("Gemini Conclusion: FAKE")
-        elif "**kết luận:** thật" in gemini_lower or "kết luận:** real" in gemini_lower or "kết luận:** thật" in gemini_lower:
             conclusion_score = 0.9  # Very high for REAL
             print("Gemini Conclusion: REAL")
         elif "giả" in gemini_lower and "kết luận" in gemini_lower:
@@ -814,6 +992,26 @@ def analyze_news(news_text):
         # Step 6: Format the final results
         real_confidence = combined_confidence
         fake_confidence = 1 - combined_confidence
         # Build the detailed report with better formatting
         # Use combined_confidence to determine the final classification (not just DistilBERT)
@@ -953,6 +1151,11 @@ def create_interface():
         <div style="background: #f8f9fa; padding: 15px; border-radius: 10px; border-left: 4px solid #17a2b8; margin: 20px 0;">
         <p style="margin: 0; color: #495057;"><strong>💡 Lưu ý:</strong> Kết quả có thể thay đổi nhẹ giữa các lần phân tích do tính chất AI của Gemini, nhưng độ chính xác tổng thể vẫn được đảm bảo.</p>
         </div>
         </div>
         """)

 import re
 import os
 import numpy as np
+import json
+import sqlite3
+from datetime import datetime
+import hashlib
 GOOGLE_API_KEY = "AIzaSyASwqVh3ELFVKH-W3WuHtmjg3XgtwjJQKg"
 SEARCH_ENGINE_ID = "f34f8a4816771488b"
 genai.configure(api_key=GEMINI_API_KEY)
+# Knowledge Base Configuration
+KNOWLEDGE_BASE_DB = "knowledge_base.db"
+CONFIDENCE_THRESHOLD = 0.95  # 95% threshold for auto-updating knowledge base
+ENABLE_KNOWLEDGE_BASE_SEARCH = False  # Set to True to enable knowledge base search (slower)
 print("Loading the DistilBERT model we trained...")
 try:
     if os.path.exists(MODEL_PATH):
         tokenizer = None
         model = None
+# --- KNOWLEDGE BASE MANAGEMENT ---
+def init_knowledge_base():
+    """Initialize the SQLite knowledge base"""
+    conn = sqlite3.connect(KNOWLEDGE_BASE_DB)
+    cursor = conn.cursor()
+    cursor.execute('''
+        CREATE TABLE IF NOT EXISTS knowledge_entries (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            content_hash TEXT UNIQUE,
+            news_text TEXT,
+            prediction TEXT,
+            confidence REAL,
+            search_results TEXT,
+            gemini_analysis TEXT,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            last_accessed TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
+            access_count INTEGER DEFAULT 1
+        )
+    ''')
+    conn.commit()
+    conn.close()
+    print("Knowledge base initialized successfully!")
+def add_to_knowledge_base(news_text, prediction, confidence, search_results, gemini_analysis):
+    """Add high-confidence result to knowledge base"""
+    try:
+        # Create content hash for deduplication
+        content_hash = hashlib.md5(news_text.encode('utf-8')).hexdigest()
+        conn = sqlite3.connect(KNOWLEDGE_BASE_DB)
+        cursor = conn.cursor()
+        # Check if entry already exists
+        cursor.execute('SELECT id FROM knowledge_entries WHERE content_hash = ?', (content_hash,))
+        if cursor.fetchone():
+            print(f"Entry already exists in knowledge base (hash: {content_hash[:8]}...)")
+            conn.close()
+            return False
+        # Insert new entry
+        cursor.execute('''
+            INSERT INTO knowledge_entries
+            (content_hash, news_text, prediction, confidence, search_results, gemini_analysis)
+            VALUES (?, ?, ?, ?, ?, ?)
+        ''', (
+            content_hash,
+            news_text,
+            prediction,
+            confidence,
+            json.dumps(search_results, ensure_ascii=False),
+            gemini_analysis
+        ))
+        conn.commit()
+        conn.close()
+        print(f"✅ Added high-confidence result to knowledge base (confidence: {confidence:.1%})")
+        print(f"   Hash: {content_hash[:8]}...")
+        print(f"   Prediction: {prediction}")
+        return True
+    except Exception as e:
+        print(f"Error adding to knowledge base: {e}")
+        return False
+def search_knowledge_base(query_text, limit=5):
+    """Search the knowledge base for similar entries"""
+    try:
+        conn = sqlite3.connect(KNOWLEDGE_BASE_DB)
+        cursor = conn.cursor()
+        # Simple text similarity search (you can enhance this with embeddings later)
+        cursor.execute('''
+            SELECT news_text, prediction, confidence, search_results, gemini_analysis,
+                   created_at, access_count
+            FROM knowledge_entries
+            WHERE news_text LIKE ? OR gemini_analysis LIKE ?
+            ORDER BY confidence DESC, access_count DESC
+            LIMIT ?
+        ''', (f'%{query_text[:50]}%', f'%{query_text[:50]}%', limit))
+        results = cursor.fetchall()
+        # Update access count and last_accessed
+        for result in results:
+            cursor.execute('''
+                UPDATE knowledge_entries
+                SET access_count = access_count + 1, last_accessed = CURRENT_TIMESTAMP
+                WHERE news_text = ?
+            ''', (result[0],))
+        conn.commit()
+        conn.close()
+        if results:
+            print(f"📚 Found {len(results)} similar entries in knowledge base")
+            return results
+        else:
+            return []
+    except Exception as e:
+        print(f"Error searching knowledge base: {e}")
+        return []
+def format_knowledge_for_rag(knowledge_results):
+    """Format knowledge base results for RAG augmentation"""
+    if not knowledge_results:
+        return ""
+    knowledge_summary = "\n=== KIẾN THỨC TƯƠNG TỰ TỪ CƠ SỞ DỮ LIỆU ===\n"
+    for i, (news_text, prediction, confidence, search_results, gemini_analysis, created_at, access_count) in enumerate(knowledge_results, 1):
+        knowledge_summary += f"\n{i}. Tin tức tương tự (Độ tin cậy: {confidence:.1%}, Lần truy cập: {access_count}):\n"
+        knowledge_summary += f"   Nội dung: {news_text[:200]}...\n"
+        knowledge_summary += f"   Kết luận: {prediction}\n"
+        knowledge_summary += f"   Thời gian: {created_at}\n"
+    knowledge_summary += "\n==========================================\n"
+    return knowledge_summary
+def get_knowledge_base_stats():
+    """Get statistics about the knowledge base"""
+    try:
+        conn = sqlite3.connect(KNOWLEDGE_BASE_DB)
+        cursor = conn.cursor()
+        # Get total entries
+        cursor.execute('SELECT COUNT(*) FROM knowledge_entries')
+        total_entries = cursor.fetchone()[0]
+        # Get entries by prediction
+        cursor.execute('SELECT prediction, COUNT(*) FROM knowledge_entries GROUP BY prediction')
+        prediction_counts = dict(cursor.fetchall())
+        # Get average confidence
+        cursor.execute('SELECT AVG(confidence) FROM knowledge_entries')
+        avg_confidence = cursor.fetchone()[0] or 0
+        # Get most accessed entries
+        cursor.execute('SELECT news_text, access_count FROM knowledge_entries ORDER BY access_count DESC LIMIT 3')
+        top_accessed = cursor.fetchall()
+        conn.close()
+        return {
+            'total_entries': total_entries,
+            'prediction_counts': prediction_counts,
+            'avg_confidence': avg_confidence,
+            'top_accessed': top_accessed
+        }
+    except Exception as e:
+        print(f"Error getting knowledge base stats: {e}")
+        return None
+# Initialize knowledge base on startup
+init_knowledge_base()
 CREDIBLE_SOURCES = {
     'vnexpress.net': 0.95,
     'tuoitre.vn': 0.95,
             result = service.cse().list(
                 q=search_query,
                 cx=SEARCH_ENGINE_ID,
+                num=5  # Reduced from 10 to 5 for faster processing
             ).execute()
             print(f"API Response keys: {list(result.keys())}")
 def analyze_with_gemini(news_text, search_results, distilbert_prediction, distilbert_confidence):
     """Use Gemini AI to analyze the news and compare with our model results"""
     try:
+        # Knowledge base search (optional for faster performance)
+        if ENABLE_KNOWLEDGE_BASE_SEARCH:
+            print("🔍 Searching knowledge base for similar entries...")
+            knowledge_results = search_knowledge_base(news_text, limit=3)
+            knowledge_context = format_knowledge_for_rag(knowledge_results)
+        else:
+            knowledge_context = ""
         # Try to use the latest Gemini model available
         try:
             model = genai.GenerativeModel('gemini-2.0-flash-exp')
                 except:
                     model = genai.GenerativeModel('gemini-1.5-flash')
+        # Format the search results for Gemini (limit to top 3 for speed)
         search_summary = ""
         if search_results:
             search_summary = "Kết quả tìm kiếm Google:\n"
+            for i, result in enumerate(search_results[:3], 1):  # Reduced from 5 to 3
                 search_summary += f"{i}. {result['title']}\n   {result['snippet']}\n   Nguồn: {result['link']}\n\n"
         else:
             search_summary = "Không tìm thấy kết quả tìm kiếm Google cho tin tức này. Điều này có thể do API bị giới hạn hoặc tin tức quá mới/chưa được đăng tải."
 {search_summary}
+{knowledge_context}
 Hãy thực hiện phân tích toàn diện theo các tiêu chí sau:
+1. Phân tích nội dung: Kiểm tra tính logic, mâu thuẫn, ngôn ngữ cảm xúc thái quá
+2. Phân tích nguồn tin: Đánh giá uy tín và độ tin cậy của nguồn
+3. Phân tích ngữ cảnh: So sánh với thông tin có sẵn và kiến thức thực tế
+4. Phân tích ngôn ngữ: Tìm dấu hiệu của tin giả như từ ngữ gây sốc, cảm xúc
+5. Phân tích thời gian: Kiểm tra tính hợp lý về mặt thời gian
 Trả lời theo định dạng sau (chỉ bằng tiếng Việt, viết chi tiết và chuyên nghiệp):
+1. KẾT LUẬN: [THẬT/GIẢ/KHÔNG XÁC ĐỊNH]
+2. ĐỘ TIN CẬY: [THẬT: X% / GIẢ: Y%] (Trong đó X% là độ tin cậy tin THẬT, Y% là độ tin cậy tin GIẢ, X+Y=100%)
+3. PHÂN TÍCH CHI TIẾT:
+- Nội dung: [Phân tích chi tiết về nội dung tin tức]
+- Nguồn tin: [Đánh giá về nguồn và độ tin cậy]
+- Ngữ cảnh: [So sánh với thông tin có sẵn]
+- Ngôn ngữ: [Phân tích cách sử dụng từ ngữ]
+- Thời gian: [Kiểm tra tính hợp lý về mặt thời gian]
+4. CÁC DẤU HIỆU CẢNH BÁO: [Liệt kê các dấu hiệu đáng ngờ nếu có]
+5. KHUYẾN NGHỊ CHO NGƯỜI ĐỌC:
 - [Hướng dẫn cụ thể để kiểm chứng thông tin]
 - [Các nguồn tin đáng tin cậy để tham khảo]
 - [Cách phân biệt tin thật và tin giả]
+QUAN TRỌNG: Trong phần "ĐỘ TIN CẬY", hãy cung cấp tỷ lệ phần trăm chính xác dựa trên phân tích của bạn. Ví dụ: "THẬT: 95% / GIẢ: 5%" nghĩa là 95% tin tức này là THẬT, 5% là GIẢ.
 Viết chi tiết, chuyên nghiệp và hữu ích cho người đọc.
 """
         if search_results:
             print(f"DEBUG - First search result title: {search_results[0].get('title', 'No title')}")
+        # Use settings optimized for faster processing
         generation_config = genai.types.GenerationConfig(
+            temperature=0.3,  # Lower for more consistent results
+            top_p=0.8,        # Reduced for faster processing
+            top_k=20,         # Reduced for faster processing
+            max_output_tokens=1000  # Reduced for faster responses
         )
         response = model.generate_content(prompt, generation_config=generation_config)
         print("Gemini API response received successfully")
             if len(news_text) < 100:
                 warning_signs.append("Tin tức quá ngắn, thiếu thông tin chi tiết")
+            fallback_analysis = f"""1. KẾT LUẬN: {conclusion}
+2. ĐỘ TIN CẬY: {'THẬT: 5% / GIẢ: 95%' if conclusion == 'GIẢ' else 'THẬT: 95% / GIẢ: 5%' if conclusion == 'THẬT' else 'THẬT: 50% / GIẢ: 50%'}
+3. PHÂN TÍCH CHI TIẾT:
+- Nội dung: {'Tin tức có vẻ hợp lý' if distilbert_prediction == 'REAL' else 'Tin tức có nhiều dấu hiệu đáng ngờ' if distilbert_prediction == 'FAKE' else 'Nội dung không rõ ràng'}
+- Nguồn tin: Google Search kh��ng khả dụng (hết quota) - không thể kiểm tra nguồn
+- Ngữ cảnh: Phân tích từ khóa: {confidence_boost}
+- Ngôn ngữ: {'Ngôn ngữ trung tính' if fake_score == real_score else 'Có dấu hiệu cảm xúc thái quá' if fake_score > real_score else 'Ngôn ngữ khách quan'}
+- Thời gian: Không thể xác minh do thiếu thông tin bổ sung
+4. CÁC DẤU HIỆU CẢNH BÁO:
 {chr(10).join([f"- {sign}" for sign in warning_signs]) if warning_signs else "- Không phát hiện dấu hiệu cảnh báo rõ ràng"}
+5. KHUYẾN NGHỊ CHO NGƯỜI ĐỌC:
+- Kiểm tra nguồn: Tìm kiếm thông tin tương tự trên các trang báo uy tín như VnExpress, Tuổi Trẻ, Thanh Niên
+- Xác minh thời gian: Kiểm tra xem tin tức có được đăng tải đồng thời trên nhiều nguồn không
+- Đánh giá ngôn ngữ: Tránh chia sẻ tin tức có ngôn ngữ cảm xúc thái quá hoặc tạo cảm giác cấp bách
+- Lưu ý: Do hệ thống API tạm thời không khả dụng, kết quả phân tích có thể không hoàn toàn chính xác"""
             return fallback_analysis
         # For other errors, see what models are available
         # Look for the confidence percentage pattern
         import re
+        # Pattern to match "THẬT: X% / GIẢ: Y%" format
+        percentage_pattern = r'độ tin cậy.*?thật.*?(\d+)%.*?giả.*?(\d+)%'
         match = re.search(percentage_pattern, gemini_lower)
         if match:
     else:
         # Fallback to conclusion analysis
         conclusion_score = 0.5  # Default neutral
+        if "kết luận: giả" in gemini_lower or "kết luận: fake" in gemini_lower:
             conclusion_score = 0.1  # Very low for FAKE
             print("Gemini Conclusion: FAKE")
+        elif "kết luận: thật" in gemini_lower or "kết luận: real" in gemini_lower:
             conclusion_score = 0.9  # Very high for REAL
             print("Gemini Conclusion: REAL")
         elif "giả" in gemini_lower and "kết luận" in gemini_lower:
         # Step 6: Format the final results
         real_confidence = combined_confidence
         fake_confidence = 1 - combined_confidence
+        # Step 7: Check if result should be added to knowledge base
+        max_confidence = max(real_confidence, fake_confidence)
+        if max_confidence > CONFIDENCE_THRESHOLD:
+            print(f"🚀 High confidence result detected ({max_confidence:.1%}) - adding to knowledge base...")
+            final_prediction = "REAL" if real_confidence > fake_confidence else "FAKE"
+            # Add to knowledge base
+            success = add_to_knowledge_base(
+                news_text=news_text,
+                prediction=final_prediction,
+                confidence=max_confidence,
+                search_results=search_results,
+                gemini_analysis=gemini_analysis
+            )
+            if success:
+                print("✅ Successfully added to knowledge base for future RAG retrieval!")
+            else:
+                print("⚠️ Failed to add to knowledge base (duplicate or error)")
         # Build the detailed report with better formatting
         # Use combined_confidence to determine the final classification (not just DistilBERT)
         <div style="background: #f8f9fa; padding: 15px; border-radius: 10px; border-left: 4px solid #17a2b8; margin: 20px 0;">
         <p style="margin: 0; color: #495057;"><strong>💡 Lưu ý:</strong> Kết quả có thể thay đổi nhẹ giữa các lần phân tích do tính chất AI của Gemini, nhưng độ chính xác tổng thể vẫn được đảm bảo.</p>
         </div>
+        <div style="background: linear-gradient(135deg, #a8edea 0%, #fed6e3 100%); padding: 15px; border-radius: 10px; margin: 20px 0;">
+        <h4 style="margin: 0 0 10px 0; color: #333;">🧠 Hệ thống RAG với Cơ sở Tri thức Tự động</h4>
+        <p style="margin: 0; color: #555; font-size: 14px;">Khi độ tin cậy > 95%, hệ thống sẽ tự động lưu kết quả vào cơ sở tri thức để sử dụng cho các phân tích tương lai.</p>
+        </div>
         </div>
         """)