Spaces:

Hwandji
/

saap-plattform

Sleeping

App Files Files Community

saap-plattform / backend /scripts /test_colossus_integration.py

Hwandji

feat: initial HuggingFace Space deployment

4343907 8 days ago

raw

history blame contribute delete

9.01 kB

	#!/usr/bin/env python3
	"""
	SAAP colossus Server Integration Test Script
	Quick-Test für colossus Server Integration und Performance-Benchmark

	Usage:
	python test_colossus_integration.py

	Author: Hanan Wandji Danga
	"""

	import asyncio
	import sys
	import os
	import json
	import time
	from dotenv import load_dotenv

	# Load environment variables
	load_dotenv()

	# Add parent directory to path for imports
	sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

	from agents.colossus_saap_agent import ColossusSAAPAgent, ColossusBenchmark, create_saap_colossus_agents

	# colossus Server Configuration
	API_KEY = os.getenv("COLOSSUS_API_KEY", "")
	BASE_URL = "https://ai.adrian-schupp.de"
	MODEL_NAME = "mistral-small3.2:24b-instruct-2506"

	async def test_colossus_connection():
	"""
	Test 1: Grundlegende Konnektivität und API-Funktionalität
	"""
	print("🔗 TEST 1: colossus Server Connection Test")
	print("=" * 50)

	agent = ColossusSAAPAgent("test_coordinator", "Coordinator", API_KEY, BASE_URL)

	# Health Check
	print("📊 Running health check...")
	health = await agent.health_check()

	print(f"Status: {health['colossus_status']}")
	print(f"Response Time: {health.get('response_time', 'N/A')}s")

	if health.get('error'):
	print(f"❌ Error: {health['error']}")
	return False

	print("✅ colossus Server is healthy and reachable")
	return True

	async def test_saap_agent_roles():
	"""
	Test 2: SAAP Agent Rollen-spezifische Responses
	"""
	print("\n🤖 TEST 2: SAAP Agent Role-Specific Responses")
	print("=" * 50)

	# Erstelle die 3 Basis-Agenten aus dem Plan
	agents = create_saap_colossus_agents(API_KEY)

	test_questions = [
	{
	"agent": "agent_coordinator",
	"role": "Coordinator",
	"question": "Wie koordinierst du mehrere Agenten in einem SAAP-System?"
	},
	{
	"agent": "agent_developer",
	"role": "Developer",
	"question": "Welche Python-Bibliotheken empfiehlst du für Multi-Agent-Systeme?"
	},
	{
	"agent": "agent_analyst",
	"role": "Analyst",
	"question": "Analysiere die Performance-Vorteile von On-Premise vs Cloud AI."
	}
	]

	for test in test_questions:
	print(f"\n👤 Testing {test['role']} Agent...")

	# Find entsprechenden Agent
	agent = next((a for a in agents if a.agent_name == test['agent']), None)
	if not agent:
	print(f"❌ Agent {test['agent']} not found")
	continue

	# Sende Anfrage
	result = await agent.send_request_to_colossus(test['question'])

	if result['success']:
	print(f"✅ Response Time: {result['response_time']}s")
	print(f"📝 Response: {result['response'][:200]}...")
	print(f"🔢 Tokens: {result['token_count']}")
	else:
	print(f"❌ Failed: {result.get('error')}")

	async def test_performance_benchmark():
	"""
	Test 3: Performance Benchmark für < 2s Response-Zeit Ziel
	"""
	print("\n⚡ TEST 3: Performance Benchmark (Target: < 2s)")
	print("=" * 50)

	benchmark = ColossusBenchmark(API_KEY)

	# SAAP-spezifische Test-Prompts
	test_prompts = [
	"Was ist SAAP?",
	"Erkläre Multi-Agent Systeme kurz.",
	"Vorteile von On-Premise AI?",
	"Wie funktioniert Agent-Koordination?",
	"Python für AI-Entwicklung?"
	]

	print(f"🧪 Running {len(test_prompts)} performance tests...")
	results = await benchmark.run_performance_benchmark(test_prompts)

	print(f"\n📊 BENCHMARK RESULTS:")
	print(f"Total Tests: {results['total_tests']}")
	print(f"Successful Tests: {results['successful_tests']}")
	print(f"Success Rate: {results['success_rate']:.1f}%")

	if results.get('average_response_time'):
	print(f"Average Response Time: {results['average_response_time']}s")
	print(f"Average Token Count: {results['average_token_count']}")

	# Performance Target Check
	target_met = results['performance_target_met']
	target_symbol = "✅" if target_met else "❌"
	print(f"{target_symbol} Performance Target (< 2s): {'MET' if target_met else 'NOT MET'}")

	if not target_met:
	print("💡 Optimization suggestions will be provided for performance improvement")
	else:
	print("❌ No successful tests - check connection and API key")

	return results

	async def test_multi_agent_communication():
	"""
	Test 4: Multi-Agent Kommunikation via Redis
	"""
	print("\n💬 TEST 4: Multi-Agent Communication Test")
	print("=" * 50)

	try:
	import redis
	redis_client = redis.Redis(host='localhost', port=6379, decode_responses=True)

	# Test Redis connection
	redis_client.ping()
	print("✅ Redis connection successful")

	# Erstelle 2 Test-Agenten
	coordinator = ColossusSAAPAgent("test_coordinator", "Coordinator", API_KEY)
	developer = ColossusSAAPAgent("test_developer", "Developer", API_KEY)

	# Sende Nachricht von Coordinator zu Developer
	test_message = "Entwickle ein Python-Script für Agent-Kommunikation"
	print(f"📤 Coordinator → Developer: {test_message}")

	coordinator.send_message_to_agent("test_developer", test_message)

	# Simuliere Developer Response
	print("🔄 Developer processing message...")
	await asyncio.sleep(1)

	# Check Redis queue
	queue_length = redis_client.llen("saap_agent_test_developer")
	print(f"📊 Developer message queue length: {queue_length}")

	if queue_length > 0:
	print("✅ Multi-Agent communication working")
	else:
	print("⚠️ No messages in queue - check Redis setup")

	except ImportError:
	print("❌ Redis not available - install with: pip install redis")
	except Exception as e:
	print(f"❌ Redis connection failed: {e}")
	print("💡 Make sure Redis/Valkey is running: systemctl start valkey")

	def print_next_steps(performance_results):
	"""
	Druckt Next Steps basierend auf Test-Ergebnissen
	"""
	print("\n🎯 NEXT STEPS - Week 1-2 Infrastructure Foundation")
	print("=" * 60)

	if performance_results and performance_results.get('success_rate', 0) > 80:
	print("✅ colossus Integration erfolgreich!")
	print("🚀 Ready für Phase 1 Implementation:")
	print(" 1. Integriere colossus Agents in bestehendes SAAP-System")
	print(" 2. Update Vue.js Dashboard für colossus Agent-Monitoring")
	print(" 3. Teste 3 Basis-Agenten (Coordinator, Developer, Analyst)")
	print(" 4. OpenRouter GLM 4.5 Air als Fallback konfigurieren")
	print(" 5. Performance-Optimierung wenn Response-Zeit > 2s")

	print(f"\n📊 Performance Status:")
	avg_time = performance_results.get('average_response_time', 0)
	if avg_time < 2.0:
	print(f" ✅ Target erfüllt: {avg_time}s < 2s")
	else:
	print(f" ⚠️ Optimierung nötig: {avg_time}s > 2s")
	print(" 💡 Optimierungen: Model-Caching, Connection-Pooling, Prompt-Optimization")
	else:
	print("❌ colossus Integration needs fixes:")
	print(" 1. Check API Key and URL")
	print(" 2. Verify network connectivity")
	print(" 3. Test model availability")
	print(" 4. Check server status")

	async def main():
	"""
	Hauptfunktion - führt alle Tests sequentiell durch
	"""
	print("🚀 SAAP colossus Server Integration Tests")
	print("=" * 60)
	print(f"Target: {BASE_URL}")
	print(f"Model: {MODEL_NAME}")
	print(f"Performance Goal: < 2s response time")
	print("")

	# Validate API key
	if not API_KEY:
	print("❌ Error: COLOSSUS_API_KEY environment variable not set")
	print("Please add COLOSSUS_API_KEY to backend/.env file")
	return

	# Test 1: Basic Connectivity
	connection_ok = await test_colossus_connection()

	if not connection_ok:
	print("\n❌ Basic connection failed. Check server and API key.")
	return

	# Test 2: Role-specific responses
	await test_saap_agent_roles()

	# Test 3: Performance benchmark
	performance_results = await test_performance_benchmark()

	# Test 4: Multi-Agent communication
	await test_multi_agent_communication()

	# Summary and next steps
	print_next_steps(performance_results)

	print(f"\n🎓 Integration Status: Ready for SAAP Phase 1 Development")
	print("📚 Master Thesis Progress: Infrastructure Foundation Complete")

	if __name__ == "__main__":
	# Run all tests
	asyncio.run(main())