#!/usr/bin/env python3 """ Textilindo AI Assistant - Hugging Face Spaces """ from flask import Flask, request, jsonify, render_template import os import json import requests from difflib import SequenceMatcher import logging # Setup logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = Flask(__name__) def load_system_prompt(default_text): """Load system prompt from configs/system_prompt.md if available""" try: base_dir = os.path.dirname(__file__) md_path = os.path.join(base_dir, 'configs', 'system_prompt.md') if not os.path.exists(md_path): return default_text with open(md_path, 'r', encoding='utf-8') as f: content = f.read() start = content.find('"""') end = content.rfind('"""') if start != -1 and end != -1 and end > start: return content[start+3:end].strip() lines = [] for line in content.splitlines(): if line.strip().startswith('#'): continue lines.append(line) cleaned = '\n'.join(lines).strip() return cleaned or default_text except Exception: return default_text class TextilindoAI: def __init__(self): self.system_prompt = os.getenv( 'SYSTEM_PROMPT', load_system_prompt("You are Textilindo AI Assistant. Be concise, helpful, and use Indonesian.") ) self.dataset = self.load_all_datasets() def load_all_datasets(self): """Load all available datasets""" dataset = [] # Try multiple possible data directory paths possible_data_dirs = [ "data", "./data", "/app/data", os.path.join(os.path.dirname(__file__), "data") ] data_dir = None for dir_path in possible_data_dirs: if os.path.exists(dir_path): data_dir = dir_path logger.info(f"Found data directory: {data_dir}") break if not data_dir: logger.warning("No data directory found in any of the expected locations") return dataset # Load all JSONL files try: for filename in os.listdir(data_dir): if filename.endswith('.jsonl'): filepath = os.path.join(data_dir, filename) try: with open(filepath, 'r', encoding='utf-8') as f: for line_num, line in enumerate(f, 1): line = line.strip() if line: try: data = json.loads(line) dataset.append(data) except json.JSONDecodeError as e: logger.warning(f"Invalid JSON in {filename} line {line_num}: {e}") continue logger.info(f"Loaded {filename}: {len([d for d in dataset if d.get('instruction')])} examples") except Exception as e: logger.error(f"Error loading {filename}: {e}") except Exception as e: logger.error(f"Error reading data directory {data_dir}: {e}") logger.info(f"Total examples loaded: {len(dataset)}") return dataset def find_relevant_context(self, user_query, top_k=3): """Find most relevant examples from dataset""" if not self.dataset: return [] scores = [] for i, example in enumerate(self.dataset): instruction = example.get('instruction', '').lower() output = example.get('output', '').lower() query = user_query.lower() instruction_score = SequenceMatcher(None, query, instruction).ratio() output_score = SequenceMatcher(None, query, output).ratio() combined_score = (instruction_score * 0.7) + (output_score * 0.3) scores.append((combined_score, i)) scores.sort(reverse=True) relevant_examples = [] for score, idx in scores[:top_k]: if score > 0.1: relevant_examples.append(self.dataset[idx]) return relevant_examples def create_context_prompt(self, user_query, relevant_examples): """Create a prompt with relevant context""" if not relevant_examples: return user_query context_parts = [] context_parts.append("Berikut adalah beberapa contoh pertanyaan dan jawaban tentang Textilindo:") context_parts.append("") for i, example in enumerate(relevant_examples, 1): instruction = example.get('instruction', '') output = example.get('output', '') context_parts.append(f"Contoh {i}:") context_parts.append(f"Pertanyaan: {instruction}") context_parts.append(f"Jawaban: {output}") context_parts.append("") context_parts.append("Berdasarkan contoh di atas, jawab pertanyaan berikut:") context_parts.append(f"Pertanyaan: {user_query}") context_parts.append("Jawaban:") return "\n".join(context_parts) def chat(self, message, max_tokens=300, temperature=0.7): """Generate response using Hugging Face Spaces""" relevant_examples = self.find_relevant_context(message, 3) if relevant_examples: enhanced_prompt = self.create_context_prompt(message, relevant_examples) context_used = True else: enhanced_prompt = message context_used = False # For now, return a simple response # In production, this would call your HF Space inference endpoint response = f"Terima kasih atas pertanyaan Anda: {message}. Saya akan membantu Anda dengan informasi tentang Textilindo." return { "success": True, "response": response, "context_used": context_used, "relevant_examples_count": len(relevant_examples) } # Initialize AI ai = TextilindoAI() @app.route('/health', methods=['GET']) def health_check(): """Health check endpoint""" return jsonify({ "status": "healthy", "service": "Textilindo AI Assistant", "dataset_loaded": len(ai.dataset) > 0, "dataset_size": len(ai.dataset) }) @app.route('/chat', methods=['POST']) def chat(): """Main chat endpoint""" try: data = request.get_json() if not data: return jsonify({ "success": False, "error": "No JSON data provided" }), 400 message = data.get('message', '').strip() if not message: return jsonify({ "success": False, "error": "Message is required" }), 400 # Optional parameters max_tokens = data.get('max_tokens', 300) temperature = data.get('temperature', 0.7) # Process chat result = ai.chat(message, max_tokens, temperature) if result["success"]: return jsonify(result) else: return jsonify(result), 500 except Exception as e: logger.error(f"Error in chat endpoint: {e}") return jsonify({ "success": False, "error": f"Internal server error: {str(e)}" }), 500 @app.route('/stats', methods=['GET']) def get_stats(): """Get dataset and system statistics""" try: topics = {} for example in ai.dataset: metadata = example.get('metadata', {}) topic = metadata.get('topic', 'unknown') topics[topic] = topics.get(topic, 0) + 1 return jsonify({ "success": True, "dataset": { "total_examples": len(ai.dataset), "topics": topics, "topics_count": len(topics) }, "system": { "api_version": "1.0.0", "status": "operational" } }) except Exception as e: logger.error(f"Error in stats endpoint: {e}") return jsonify({ "success": False, "error": f"Internal server error: {str(e)}" }), 500 @app.route('/', methods=['GET']) def root(): """API root endpoint with documentation""" return jsonify({ "service": "Textilindo AI Assistant", "version": "1.0.0", "description": "AI-powered customer service for Textilindo", "endpoints": { "GET /": "API documentation (this endpoint)", "GET /health": "Health check", "POST /chat": "Chat with AI", "GET /stats": "Dataset and system statistics" }, "usage": { "chat": { "method": "POST", "url": "/chat", "body": { "message": "string (required)", "max_tokens": "integer (optional, default: 300)", "temperature": "float (optional, default: 0.7)" } } }, "dataset_size": len(ai.dataset) }) if __name__ == '__main__': logger.info("Starting Textilindo AI Assistant...") logger.info(f"Dataset loaded: {len(ai.dataset)} examples") app.run( debug=False, host='0.0.0.0', port=8080 )