saap-plattform / backend /api /colossus_client.py
Hwandji's picture
feat: initial HuggingFace Space deployment
4343907
"""
SAAP colossus Server Integration
OpenAI-Compatible API Client for mistral-small3.2:24b-instruct-2506
"""
import requests
import json
import asyncio
import aiohttp
from typing import Dict, List, Optional
from dataclasses import dataclass
import time
@dataclass
class ColossusConfig:
"""colossus Server Configuration"""
base_url: str = "https://ai.adrian-schupp.de"
api_key: str = "sk-dBoxml3krytIRLdjr35Lnw"
model: str = "mistral-small3.2:24b-instruct-2506"
timeout: int = 90 # Increased from 30 to 90 seconds for larger models
max_tokens: int = 1000
temperature: float = 0.7
class ColossusClient:
"""
OpenAI-Compatible API Client for colossus Server
Handles communication with mistral-small model for SAAP Agents
"""
def __init__(self, config: ColossusConfig = None):
self.config = config or ColossusConfig()
self.session = None
async def __aenter__(self):
self.session = aiohttp.ClientSession(
timeout=aiohttp.ClientTimeout(total=self.config.timeout),
headers={
"Authorization": f"Bearer {self.config.api_key}",
"Content-Type": "application/json"
}
)
return self
async def __aexit__(self, exc_type, exc_val, exc_tb):
if self.session:
await self.session.close()
async def chat_completion(
self,
messages: List[Dict[str, str]],
agent_id: str = "default",
temperature: Optional[float] = None,
max_tokens: Optional[int] = None
) -> Dict:
"""
Send chat completion request to colossus
Args:
messages: List of message objects [{"role": "user", "content": "..."}]
agent_id: SAAP Agent identifier for logging
temperature: Model temperature override
max_tokens: Max tokens override
Returns:
API response with generated text
"""
start_time = time.time()
payload = {
"model": self.config.model,
"messages": messages,
"temperature": temperature or self.config.temperature,
"max_tokens": max_tokens or self.config.max_tokens,
"stream": False
}
try:
async with self.session.post(
f"{self.config.base_url}/v1/chat/completions",
json=payload
) as response:
response_time = time.time() - start_time
if response.status == 200:
data = await response.json()
# SAAP Performance Monitoring
print(f"βœ… colossus Response [{agent_id}]: {response_time:.2f}s")
return {
"success": True,
"response": data,
"response_time": response_time,
"agent_id": agent_id,
"model": self.config.model
}
else:
error_text = await response.text()
print(f"❌ colossus Error [{agent_id}]: {response.status} - {error_text}")
return {
"success": False,
"error": f"HTTP {response.status}: {error_text}",
"response_time": response_time,
"agent_id": agent_id
}
except asyncio.TimeoutError:
return {
"success": False,
"error": "Request timeout",
"response_time": self.config.timeout,
"agent_id": agent_id
}
except Exception as e:
return {
"success": False,
"error": str(e),
"response_time": time.time() - start_time,
"agent_id": agent_id
}
def sync_chat_completion(
self,
messages: List[Dict[str, str]],
agent_id: str = "default"
) -> Dict:
"""
Synchronous version for compatibility
"""
headers = {
"Authorization": f"Bearer {self.config.api_key}",
"Content-Type": "application/json"
}
payload = {
"model": self.config.model,
"messages": messages,
"temperature": self.config.temperature,
"max_tokens": self.config.max_tokens
}
start_time = time.time()
try:
response = requests.post(
f"{self.config.base_url}/v1/chat/completions",
headers=headers,
json=payload,
timeout=self.config.timeout
)
response_time = time.time() - start_time
if response.status_code == 200:
print(f"βœ… colossus Response [{agent_id}]: {response_time:.2f}s")
return {
"success": True,
"response": response.json(),
"response_time": response_time,
"agent_id": agent_id
}
else:
print(f"❌ colossus Error [{agent_id}]: {response.status_code}")
return {
"success": False,
"error": f"HTTP {response.status_code}: {response.text}",
"response_time": response_time,
"agent_id": agent_id
}
except Exception as e:
return {
"success": False,
"error": str(e),
"response_time": time.time() - start_time,
"agent_id": agent_id
}
# Performance Test Function
async def test_colossus_performance():
"""
Test colossus Server Performance
Target: < 2s Response Time
"""
print("πŸš€ SAAP colossus Performance Test Starting...")
test_messages = [
{"role": "system", "content": "You are Jane Alesi, lead AI architect for SAAP platform."},
{"role": "user", "content": "Hello Jane, please introduce yourself and explain your role in coordinating other AI agents."}
]
async with ColossusClient() as client:
# Single Request Test
result = await client.chat_completion(test_messages, agent_id="jane_alesi")
if result["success"]:
response_text = result["response"]["choices"][0]["message"]["content"]
response_time = result["response_time"]
print(f"\nπŸ“Š PERFORMANCE RESULTS:")
print(f"⏱️ Response Time: {response_time:.2f}s")
print(f"🎯 Target Met: {'βœ… YES' if response_time < 2.0 else '❌ NO'}")
print(f"πŸ€– Model: {result['model']}")
print(f"\nπŸ’¬ Response Preview:")
print(f"{response_text[:200]}...")
return result
else:
print(f"❌ Test Failed: {result['error']}")
return result
if __name__ == "__main__":
# Run Performance Test
result = asyncio.run(test_colossus_performance())