saap-plattform / backend /scripts /test_colossus_integration.py
Hwandji's picture
feat: initial HuggingFace Space deployment
4343907
#!/usr/bin/env python3
"""
SAAP colossus Server Integration Test Script
Quick-Test für colossus Server Integration und Performance-Benchmark
Usage:
python test_colossus_integration.py
Author: Hanan Wandji Danga
"""
import asyncio
import sys
import os
import json
import time
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Add parent directory to path for imports
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from agents.colossus_saap_agent import ColossusSAAPAgent, ColossusBenchmark, create_saap_colossus_agents
# colossus Server Configuration
API_KEY = os.getenv("COLOSSUS_API_KEY", "")
BASE_URL = "https://ai.adrian-schupp.de"
MODEL_NAME = "mistral-small3.2:24b-instruct-2506"
async def test_colossus_connection():
"""
Test 1: Grundlegende Konnektivität und API-Funktionalität
"""
print("🔗 TEST 1: colossus Server Connection Test")
print("=" * 50)
agent = ColossusSAAPAgent("test_coordinator", "Coordinator", API_KEY, BASE_URL)
# Health Check
print("📊 Running health check...")
health = await agent.health_check()
print(f"Status: {health['colossus_status']}")
print(f"Response Time: {health.get('response_time', 'N/A')}s")
if health.get('error'):
print(f"❌ Error: {health['error']}")
return False
print("✅ colossus Server is healthy and reachable")
return True
async def test_saap_agent_roles():
"""
Test 2: SAAP Agent Rollen-spezifische Responses
"""
print("\n🤖 TEST 2: SAAP Agent Role-Specific Responses")
print("=" * 50)
# Erstelle die 3 Basis-Agenten aus dem Plan
agents = create_saap_colossus_agents(API_KEY)
test_questions = [
{
"agent": "agent_coordinator",
"role": "Coordinator",
"question": "Wie koordinierst du mehrere Agenten in einem SAAP-System?"
},
{
"agent": "agent_developer",
"role": "Developer",
"question": "Welche Python-Bibliotheken empfiehlst du für Multi-Agent-Systeme?"
},
{
"agent": "agent_analyst",
"role": "Analyst",
"question": "Analysiere die Performance-Vorteile von On-Premise vs Cloud AI."
}
]
for test in test_questions:
print(f"\n👤 Testing {test['role']} Agent...")
# Find entsprechenden Agent
agent = next((a for a in agents if a.agent_name == test['agent']), None)
if not agent:
print(f"❌ Agent {test['agent']} not found")
continue
# Sende Anfrage
result = await agent.send_request_to_colossus(test['question'])
if result['success']:
print(f"✅ Response Time: {result['response_time']}s")
print(f"📝 Response: {result['response'][:200]}...")
print(f"🔢 Tokens: {result['token_count']}")
else:
print(f"❌ Failed: {result.get('error')}")
async def test_performance_benchmark():
"""
Test 3: Performance Benchmark für < 2s Response-Zeit Ziel
"""
print("\n⚡ TEST 3: Performance Benchmark (Target: < 2s)")
print("=" * 50)
benchmark = ColossusBenchmark(API_KEY)
# SAAP-spezifische Test-Prompts
test_prompts = [
"Was ist SAAP?",
"Erkläre Multi-Agent Systeme kurz.",
"Vorteile von On-Premise AI?",
"Wie funktioniert Agent-Koordination?",
"Python für AI-Entwicklung?"
]
print(f"🧪 Running {len(test_prompts)} performance tests...")
results = await benchmark.run_performance_benchmark(test_prompts)
print(f"\n📊 BENCHMARK RESULTS:")
print(f"Total Tests: {results['total_tests']}")
print(f"Successful Tests: {results['successful_tests']}")
print(f"Success Rate: {results['success_rate']:.1f}%")
if results.get('average_response_time'):
print(f"Average Response Time: {results['average_response_time']}s")
print(f"Average Token Count: {results['average_token_count']}")
# Performance Target Check
target_met = results['performance_target_met']
target_symbol = "✅" if target_met else "❌"
print(f"{target_symbol} Performance Target (< 2s): {'MET' if target_met else 'NOT MET'}")
if not target_met:
print("💡 Optimization suggestions will be provided for performance improvement")
else:
print("❌ No successful tests - check connection and API key")
return results
async def test_multi_agent_communication():
"""
Test 4: Multi-Agent Kommunikation via Redis
"""
print("\n💬 TEST 4: Multi-Agent Communication Test")
print("=" * 50)
try:
import redis
redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True)
# Test Redis connection
redis_client.ping()
print("✅ Redis connection successful")
# Erstelle 2 Test-Agenten
coordinator = ColossusSAAPAgent("test_coordinator", "Coordinator", API_KEY)
developer = ColossusSAAPAgent("test_developer", "Developer", API_KEY)
# Sende Nachricht von Coordinator zu Developer
test_message = "Entwickle ein Python-Script für Agent-Kommunikation"
print(f"📤 Coordinator → Developer: {test_message}")
coordinator.send_message_to_agent("test_developer", test_message)
# Simuliere Developer Response
print("🔄 Developer processing message...")
await asyncio.sleep(1)
# Check Redis queue
queue_length = redis_client.llen("saap_agent_test_developer")
print(f"📊 Developer message queue length: {queue_length}")
if queue_length > 0:
print("✅ Multi-Agent communication working")
else:
print("⚠️ No messages in queue - check Redis setup")
except ImportError:
print("❌ Redis not available - install with: pip install redis")
except Exception as e:
print(f"❌ Redis connection failed: {e}")
print("💡 Make sure Redis/Valkey is running: systemctl start valkey")
def print_next_steps(performance_results):
"""
Druckt Next Steps basierend auf Test-Ergebnissen
"""
print("\n🎯 NEXT STEPS - Week 1-2 Infrastructure Foundation")
print("=" * 60)
if performance_results and performance_results.get('success_rate', 0) > 80:
print("✅ colossus Integration erfolgreich!")
print("🚀 Ready für Phase 1 Implementation:")
print(" 1. Integriere colossus Agents in bestehendes SAAP-System")
print(" 2. Update Vue.js Dashboard für colossus Agent-Monitoring")
print(" 3. Teste 3 Basis-Agenten (Coordinator, Developer, Analyst)")
print(" 4. OpenRouter GLM 4.5 Air als Fallback konfigurieren")
print(" 5. Performance-Optimierung wenn Response-Zeit > 2s")
print(f"\n📊 Performance Status:")
avg_time = performance_results.get('average_response_time', 0)
if avg_time < 2.0:
print(f" ✅ Target erfüllt: {avg_time}s < 2s")
else:
print(f" ⚠️ Optimierung nötig: {avg_time}s > 2s")
print(" 💡 Optimierungen: Model-Caching, Connection-Pooling, Prompt-Optimization")
else:
print("❌ colossus Integration needs fixes:")
print(" 1. Check API Key and URL")
print(" 2. Verify network connectivity")
print(" 3. Test model availability")
print(" 4. Check server status")
async def main():
"""
Hauptfunktion - führt alle Tests sequentiell durch
"""
print("🚀 SAAP colossus Server Integration Tests")
print("=" * 60)
print(f"Target: {BASE_URL}")
print(f"Model: {MODEL_NAME}")
print(f"Performance Goal: < 2s response time")
print("")
# Validate API key
if not API_KEY:
print("❌ Error: COLOSSUS_API_KEY environment variable not set")
print("Please add COLOSSUS_API_KEY to backend/.env file")
return
# Test 1: Basic Connectivity
connection_ok = await test_colossus_connection()
if not connection_ok:
print("\n❌ Basic connection failed. Check server and API key.")
return
# Test 2: Role-specific responses
await test_saap_agent_roles()
# Test 3: Performance benchmark
performance_results = await test_performance_benchmark()
# Test 4: Multi-Agent communication
await test_multi_agent_communication()
# Summary and next steps
print_next_steps(performance_results)
print(f"\n🎓 Integration Status: Ready for SAAP Phase 1 Development")
print("📚 Master Thesis Progress: Infrastructure Foundation Complete")
if __name__ == "__main__":
# Run all tests
asyncio.run(main())