Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| SAAP colossus Server Integration Test Script | |
| Quick-Test für colossus Server Integration und Performance-Benchmark | |
| Usage: | |
| python test_colossus_integration.py | |
| Author: Hanan Wandji Danga | |
| """ | |
| import asyncio | |
| import sys | |
| import os | |
| import json | |
| import time | |
| from dotenv import load_dotenv | |
| # Load environment variables | |
| load_dotenv() | |
| # Add parent directory to path for imports | |
| sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) | |
| from agents.colossus_saap_agent import ColossusSAAPAgent, ColossusBenchmark, create_saap_colossus_agents | |
| # colossus Server Configuration | |
| API_KEY = os.getenv("COLOSSUS_API_KEY", "") | |
| BASE_URL = "https://ai.adrian-schupp.de" | |
| MODEL_NAME = "mistral-small3.2:24b-instruct-2506" | |
| async def test_colossus_connection(): | |
| """ | |
| Test 1: Grundlegende Konnektivität und API-Funktionalität | |
| """ | |
| print("🔗 TEST 1: colossus Server Connection Test") | |
| print("=" * 50) | |
| agent = ColossusSAAPAgent("test_coordinator", "Coordinator", API_KEY, BASE_URL) | |
| # Health Check | |
| print("📊 Running health check...") | |
| health = await agent.health_check() | |
| print(f"Status: {health['colossus_status']}") | |
| print(f"Response Time: {health.get('response_time', 'N/A')}s") | |
| if health.get('error'): | |
| print(f"❌ Error: {health['error']}") | |
| return False | |
| print("✅ colossus Server is healthy and reachable") | |
| return True | |
| async def test_saap_agent_roles(): | |
| """ | |
| Test 2: SAAP Agent Rollen-spezifische Responses | |
| """ | |
| print("\n🤖 TEST 2: SAAP Agent Role-Specific Responses") | |
| print("=" * 50) | |
| # Erstelle die 3 Basis-Agenten aus dem Plan | |
| agents = create_saap_colossus_agents(API_KEY) | |
| test_questions = [ | |
| { | |
| "agent": "agent_coordinator", | |
| "role": "Coordinator", | |
| "question": "Wie koordinierst du mehrere Agenten in einem SAAP-System?" | |
| }, | |
| { | |
| "agent": "agent_developer", | |
| "role": "Developer", | |
| "question": "Welche Python-Bibliotheken empfiehlst du für Multi-Agent-Systeme?" | |
| }, | |
| { | |
| "agent": "agent_analyst", | |
| "role": "Analyst", | |
| "question": "Analysiere die Performance-Vorteile von On-Premise vs Cloud AI." | |
| } | |
| ] | |
| for test in test_questions: | |
| print(f"\n👤 Testing {test['role']} Agent...") | |
| # Find entsprechenden Agent | |
| agent = next((a for a in agents if a.agent_name == test['agent']), None) | |
| if not agent: | |
| print(f"❌ Agent {test['agent']} not found") | |
| continue | |
| # Sende Anfrage | |
| result = await agent.send_request_to_colossus(test['question']) | |
| if result['success']: | |
| print(f"✅ Response Time: {result['response_time']}s") | |
| print(f"📝 Response: {result['response'][:200]}...") | |
| print(f"🔢 Tokens: {result['token_count']}") | |
| else: | |
| print(f"❌ Failed: {result.get('error')}") | |
| async def test_performance_benchmark(): | |
| """ | |
| Test 3: Performance Benchmark für < 2s Response-Zeit Ziel | |
| """ | |
| print("\n⚡ TEST 3: Performance Benchmark (Target: < 2s)") | |
| print("=" * 50) | |
| benchmark = ColossusBenchmark(API_KEY) | |
| # SAAP-spezifische Test-Prompts | |
| test_prompts = [ | |
| "Was ist SAAP?", | |
| "Erkläre Multi-Agent Systeme kurz.", | |
| "Vorteile von On-Premise AI?", | |
| "Wie funktioniert Agent-Koordination?", | |
| "Python für AI-Entwicklung?" | |
| ] | |
| print(f"🧪 Running {len(test_prompts)} performance tests...") | |
| results = await benchmark.run_performance_benchmark(test_prompts) | |
| print(f"\n📊 BENCHMARK RESULTS:") | |
| print(f"Total Tests: {results['total_tests']}") | |
| print(f"Successful Tests: {results['successful_tests']}") | |
| print(f"Success Rate: {results['success_rate']:.1f}%") | |
| if results.get('average_response_time'): | |
| print(f"Average Response Time: {results['average_response_time']}s") | |
| print(f"Average Token Count: {results['average_token_count']}") | |
| # Performance Target Check | |
| target_met = results['performance_target_met'] | |
| target_symbol = "✅" if target_met else "❌" | |
| print(f"{target_symbol} Performance Target (< 2s): {'MET' if target_met else 'NOT MET'}") | |
| if not target_met: | |
| print("💡 Optimization suggestions will be provided for performance improvement") | |
| else: | |
| print("❌ No successful tests - check connection and API key") | |
| return results | |
| async def test_multi_agent_communication(): | |
| """ | |
| Test 4: Multi-Agent Kommunikation via Redis | |
| """ | |
| print("\n💬 TEST 4: Multi-Agent Communication Test") | |
| print("=" * 50) | |
| try: | |
| import redis | |
| redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True) | |
| # Test Redis connection | |
| redis_client.ping() | |
| print("✅ Redis connection successful") | |
| # Erstelle 2 Test-Agenten | |
| coordinator = ColossusSAAPAgent("test_coordinator", "Coordinator", API_KEY) | |
| developer = ColossusSAAPAgent("test_developer", "Developer", API_KEY) | |
| # Sende Nachricht von Coordinator zu Developer | |
| test_message = "Entwickle ein Python-Script für Agent-Kommunikation" | |
| print(f"📤 Coordinator → Developer: {test_message}") | |
| coordinator.send_message_to_agent("test_developer", test_message) | |
| # Simuliere Developer Response | |
| print("🔄 Developer processing message...") | |
| await asyncio.sleep(1) | |
| # Check Redis queue | |
| queue_length = redis_client.llen("saap_agent_test_developer") | |
| print(f"📊 Developer message queue length: {queue_length}") | |
| if queue_length > 0: | |
| print("✅ Multi-Agent communication working") | |
| else: | |
| print("⚠️ No messages in queue - check Redis setup") | |
| except ImportError: | |
| print("❌ Redis not available - install with: pip install redis") | |
| except Exception as e: | |
| print(f"❌ Redis connection failed: {e}") | |
| print("💡 Make sure Redis/Valkey is running: systemctl start valkey") | |
| def print_next_steps(performance_results): | |
| """ | |
| Druckt Next Steps basierend auf Test-Ergebnissen | |
| """ | |
| print("\n🎯 NEXT STEPS - Week 1-2 Infrastructure Foundation") | |
| print("=" * 60) | |
| if performance_results and performance_results.get('success_rate', 0) > 80: | |
| print("✅ colossus Integration erfolgreich!") | |
| print("🚀 Ready für Phase 1 Implementation:") | |
| print(" 1. Integriere colossus Agents in bestehendes SAAP-System") | |
| print(" 2. Update Vue.js Dashboard für colossus Agent-Monitoring") | |
| print(" 3. Teste 3 Basis-Agenten (Coordinator, Developer, Analyst)") | |
| print(" 4. OpenRouter GLM 4.5 Air als Fallback konfigurieren") | |
| print(" 5. Performance-Optimierung wenn Response-Zeit > 2s") | |
| print(f"\n📊 Performance Status:") | |
| avg_time = performance_results.get('average_response_time', 0) | |
| if avg_time < 2.0: | |
| print(f" ✅ Target erfüllt: {avg_time}s < 2s") | |
| else: | |
| print(f" ⚠️ Optimierung nötig: {avg_time}s > 2s") | |
| print(" 💡 Optimierungen: Model-Caching, Connection-Pooling, Prompt-Optimization") | |
| else: | |
| print("❌ colossus Integration needs fixes:") | |
| print(" 1. Check API Key and URL") | |
| print(" 2. Verify network connectivity") | |
| print(" 3. Test model availability") | |
| print(" 4. Check server status") | |
| async def main(): | |
| """ | |
| Hauptfunktion - führt alle Tests sequentiell durch | |
| """ | |
| print("🚀 SAAP colossus Server Integration Tests") | |
| print("=" * 60) | |
| print(f"Target: {BASE_URL}") | |
| print(f"Model: {MODEL_NAME}") | |
| print(f"Performance Goal: < 2s response time") | |
| print("") | |
| # Validate API key | |
| if not API_KEY: | |
| print("❌ Error: COLOSSUS_API_KEY environment variable not set") | |
| print("Please add COLOSSUS_API_KEY to backend/.env file") | |
| return | |
| # Test 1: Basic Connectivity | |
| connection_ok = await test_colossus_connection() | |
| if not connection_ok: | |
| print("\n❌ Basic connection failed. Check server and API key.") | |
| return | |
| # Test 2: Role-specific responses | |
| await test_saap_agent_roles() | |
| # Test 3: Performance benchmark | |
| performance_results = await test_performance_benchmark() | |
| # Test 4: Multi-Agent communication | |
| await test_multi_agent_communication() | |
| # Summary and next steps | |
| print_next_steps(performance_results) | |
| print(f"\n🎓 Integration Status: Ready for SAAP Phase 1 Development") | |
| print("📚 Master Thesis Progress: Infrastructure Foundation Complete") | |
| if __name__ == "__main__": | |
| # Run all tests | |
| asyncio.run(main()) | |