Spaces:
Build error
Build error
| #!/usr/bin/env python3 | |
| """ | |
| Textilindo AI API Server | |
| Clean API-only implementation | |
| """ | |
| from flask import Flask, request, jsonify | |
| from flask_cors import CORS | |
| import os | |
| import json | |
| import requests | |
| from difflib import SequenceMatcher | |
| import logging | |
| # Setup logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| app = Flask(__name__) | |
| CORS(app) # Enable CORS for all routes | |
| def load_system_prompt(default_text): | |
| try: | |
| base_dir = os.path.dirname(__file__) | |
| md_path = os.path.join(base_dir, 'configs', 'system_prompt.md') | |
| if not os.path.exists(md_path): | |
| return default_text | |
| with open(md_path, 'r', encoding='utf-8') as f: | |
| content = f.read() | |
| start = content.find('"""') | |
| end = content.rfind('"""') | |
| if start != -1 and end != -1 and end > start: | |
| return content[start+3:end].strip() | |
| lines = [] | |
| for line in content.splitlines(): | |
| if line.strip().startswith('#'): | |
| continue | |
| lines.append(line) | |
| cleaned = '\n'.join(lines).strip() | |
| return cleaned or default_text | |
| except Exception: | |
| return default_text | |
| class TextilindoAI: | |
| def __init__(self, api_key): | |
| self.api_key = api_key | |
| self.base_url = "https://api.novita.ai/openai" | |
| self.headers = { | |
| "Authorization": f"Bearer {api_key}", | |
| "Content-Type": "application/json" | |
| } | |
| self.model = "qwen/qwen3-235b-a22b-instruct-2507" | |
| self.system_prompt = os.getenv( | |
| 'SYSTEM_PROMPT', | |
| load_system_prompt("You are Textilindo AI Assistant. Be concise, helpful, and use Indonesian.") | |
| ) | |
| self.dataset = self.load_dataset() | |
| def load_dataset(self): | |
| """Load the training dataset""" | |
| dataset = [] | |
| dataset_path = "data/textilindo_training_data.jsonl" | |
| if os.path.exists(dataset_path): | |
| try: | |
| with open(dataset_path, 'r', encoding='utf-8') as f: | |
| for line in f: | |
| line = line.strip() | |
| if line: | |
| data = json.loads(line) | |
| dataset.append(data) | |
| logger.info(f"Loaded {len(dataset)} examples from dataset") | |
| except Exception as e: | |
| logger.error(f"Error loading dataset: {e}") | |
| return dataset | |
| def find_relevant_context(self, user_query, top_k=3): | |
| """Find most relevant examples from dataset""" | |
| if not self.dataset: | |
| return [] | |
| scores = [] | |
| for i, example in enumerate(self.dataset): | |
| instruction = example.get('instruction', '').lower() | |
| output = example.get('output', '').lower() | |
| query = user_query.lower() | |
| instruction_score = SequenceMatcher(None, query, instruction).ratio() | |
| output_score = SequenceMatcher(None, query, output).ratio() | |
| combined_score = (instruction_score * 0.7) + (output_score * 0.3) | |
| scores.append((combined_score, i)) | |
| scores.sort(reverse=True) | |
| relevant_examples = [] | |
| for score, idx in scores[:top_k]: | |
| if score > 0.1: | |
| relevant_examples.append(self.dataset[idx]) | |
| return relevant_examples | |
| def create_context_prompt(self, user_query, relevant_examples): | |
| """Create a prompt with relevant context""" | |
| if not relevant_examples: | |
| return user_query | |
| context_parts = [] | |
| context_parts.append("Berikut adalah beberapa contoh pertanyaan dan jawaban tentang Textilindo:") | |
| context_parts.append("") | |
| for i, example in enumerate(relevant_examples, 1): | |
| instruction = example.get('instruction', '') | |
| output = example.get('output', '') | |
| context_parts.append(f"Contoh {i}:") | |
| context_parts.append(f"Pertanyaan: {instruction}") | |
| context_parts.append(f"Jawaban: {output}") | |
| context_parts.append("") | |
| context_parts.append("Berdasarkan contoh di atas, jawab pertanyaan berikut:") | |
| context_parts.append(f"Pertanyaan: {user_query}") | |
| context_parts.append("Jawaban:") | |
| return "\n".join(context_parts) | |
| def chat(self, message, max_tokens=300, temperature=0.7, system_prompt_override=None): | |
| """Send message to Novita AI with RAG context""" | |
| relevant_examples = self.find_relevant_context(message, 3) | |
| if relevant_examples: | |
| enhanced_prompt = self.create_context_prompt(message, relevant_examples) | |
| context_used = True | |
| else: | |
| enhanced_prompt = message | |
| context_used = False | |
| system_message = { | |
| "role": "system", | |
| "content": (system_prompt_override or self.system_prompt) | |
| } | |
| payload = { | |
| "model": self.model, | |
| "messages": [system_message, {"role": "user", "content": enhanced_prompt}], | |
| "max_tokens": max_tokens, | |
| "temperature": temperature, | |
| "top_p": 0.9 | |
| } | |
| try: | |
| response = requests.post( | |
| f"{self.base_url}/chat/completions", | |
| headers=self.headers, | |
| json=payload, | |
| timeout=30 | |
| ) | |
| if response.status_code == 200: | |
| result = response.json() | |
| ai_response = result.get('choices', [{}])[0].get('message', {}).get('content', '') | |
| return { | |
| "success": True, | |
| "response": ai_response, | |
| "context_used": context_used, | |
| "relevant_examples_count": len(relevant_examples), | |
| "model": self.model, | |
| "tokens_used": result.get('usage', {}).get('total_tokens', 0) | |
| } | |
| else: | |
| return { | |
| "success": False, | |
| "error": f"API Error: {response.status_code}", | |
| "details": response.text | |
| } | |
| except Exception as e: | |
| return { | |
| "success": False, | |
| "error": f"Request Error: {str(e)}" | |
| } | |
| # Initialize AI | |
| api_key = os.getenv('NOVITA_API_KEY') | |
| if not api_key: | |
| logger.error("NOVITA_API_KEY not found in environment variables") | |
| exit(1) | |
| ai = TextilindoAI(api_key) | |
| def health_check(): | |
| """Health check endpoint""" | |
| return jsonify({ | |
| "status": "healthy", | |
| "service": "Textilindo AI API", | |
| "model": ai.model, | |
| "dataset_loaded": len(ai.dataset) > 0, | |
| "dataset_size": len(ai.dataset) | |
| }) | |
| def chat(): | |
| """Main chat endpoint""" | |
| try: | |
| data = request.get_json() | |
| if not data: | |
| return jsonify({ | |
| "success": False, | |
| "error": "No JSON data provided" | |
| }), 400 | |
| message = data.get('message', '').strip() | |
| if not message: | |
| return jsonify({ | |
| "success": False, | |
| "error": "Message is required" | |
| }), 400 | |
| # Optional parameters | |
| max_tokens = data.get('max_tokens', 300) | |
| temperature = data.get('temperature', 0.7) | |
| system_prompt = data.get('system_prompt') | |
| # Validate parameters | |
| if not isinstance(max_tokens, int) or max_tokens < 1 or max_tokens > 1000: | |
| return jsonify({ | |
| "success": False, | |
| "error": "max_tokens must be between 1 and 1000" | |
| }), 400 | |
| if not isinstance(temperature, (int, float)) or temperature < 0 or temperature > 2: | |
| return jsonify({ | |
| "success": False, | |
| "error": "temperature must be between 0 and 2" | |
| }), 400 | |
| # Process chat | |
| result = ai.chat(message, max_tokens, temperature, system_prompt_override=system_prompt) | |
| if result["success"]: | |
| return jsonify(result) | |
| else: | |
| return jsonify(result), 500 | |
| except Exception as e: | |
| logger.error(f"Error in chat endpoint: {e}") | |
| return jsonify({ | |
| "success": False, | |
| "error": f"Internal server error: {str(e)}" | |
| }), 500 | |
| def get_stats(): | |
| """Get dataset and system statistics""" | |
| try: | |
| topics = {} | |
| for example in ai.dataset: | |
| metadata = example.get('metadata', {}) | |
| topic = metadata.get('topic', 'unknown') | |
| topics[topic] = topics.get(topic, 0) + 1 | |
| return jsonify({ | |
| "success": True, | |
| "dataset": { | |
| "total_examples": len(ai.dataset), | |
| "topics": topics, | |
| "topics_count": len(topics) | |
| }, | |
| "model": { | |
| "name": ai.model, | |
| "provider": "Novita AI" | |
| }, | |
| "system": { | |
| "api_version": "1.0.0", | |
| "status": "operational" | |
| } | |
| }) | |
| except Exception as e: | |
| logger.error(f"Error in stats endpoint: {e}") | |
| return jsonify({ | |
| "success": False, | |
| "error": f"Internal server error: {str(e)}" | |
| }), 500 | |
| def get_examples(): | |
| """Get sample questions from dataset""" | |
| try: | |
| limit = request.args.get('limit', 10, type=int) | |
| limit = min(limit, 50) # Max 50 examples | |
| examples = [] | |
| for example in ai.dataset[:limit]: | |
| examples.append({ | |
| "instruction": example.get('instruction', ''), | |
| "output": example.get('output', ''), | |
| "topic": example.get('metadata', {}).get('topic', 'unknown') | |
| }) | |
| return jsonify({ | |
| "success": True, | |
| "examples": examples, | |
| "total_returned": len(examples), | |
| "total_available": len(ai.dataset) | |
| }) | |
| except Exception as e: | |
| logger.error(f"Error in examples endpoint: {e}") | |
| return jsonify({ | |
| "success": False, | |
| "error": f"Internal server error: {str(e)}" | |
| }), 500 | |
| def root(): | |
| """API root endpoint with documentation""" | |
| return jsonify({ | |
| "service": "Textilindo AI API", | |
| "version": "1.0.0", | |
| "description": "AI-powered customer service for Textilindo", | |
| "endpoints": { | |
| "GET /": "API documentation (this endpoint)", | |
| "GET /health": "Health check", | |
| "POST /chat": "Chat with AI", | |
| "GET /stats": "Dataset and system statistics", | |
| "GET /examples": "Sample questions from dataset" | |
| }, | |
| "usage": { | |
| "chat": { | |
| "method": "POST", | |
| "url": "/chat", | |
| "body": { | |
| "message": "string (required)", | |
| "max_tokens": "integer (optional, default: 300)", | |
| "temperature": "float (optional, default: 0.7)" | |
| } | |
| } | |
| }, | |
| "model": ai.model, | |
| "dataset_size": len(ai.dataset) | |
| }) | |
| if __name__ == '__main__': | |
| logger.info("Starting Textilindo AI API Server...") | |
| logger.info(f"Model: {ai.model}") | |
| logger.info(f"Dataset loaded: {len(ai.dataset)} examples") | |
| app.run( | |
| debug=False, # Set to False for production | |
| host='0.0.0.0', | |
| port=8082 | |
| ) | |