Spaces:

Speedofmastery
/

orynxml-agents

Paused

App Files Files Community

Speedofmastery commited on 9 days ago

Commit

88f3fce

verified ·

1 Parent(s): 02974e8

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Dockerfile +21 -0
README.md +64 -10
app.py +419 -0
app/__init__.py +10 -0
app/agent/__init__.py +16 -0
app/agent/base.py +196 -0
app/agent/browser.py +129 -0
app/agent/data_analysis.py +37 -0
app/agent/huggingface_agent.py +889 -0
app/agent/manus.py +165 -0
app/agent/mcp.py +185 -0
app/agent/react.py +38 -0
app/agent/sandbox_agent.py +223 -0
app/agent/swe.py +24 -0
app/agent/toolcall.py +250 -0
app/auth.py +205 -0
app/auth_interface.py +361 -0
app/auth_service.py +357 -0
app/bedrock.py +334 -0
app/cloudflare/__init__.py +11 -0
app/cloudflare/client.py +228 -0
app/cloudflare/d1.py +510 -0
app/cloudflare/durable_objects.py +365 -0
app/cloudflare/kv.py +457 -0
app/cloudflare/r2.py +434 -0
app/config.py +372 -0
app/config_cloudflare.py +145 -0
app/daytona/README.md +57 -0
app/daytona/sandbox.py +165 -0
app/daytona/tool_base.py +138 -0
app/exceptions.py +13 -0
app/flow/__init__.py +0 -0
app/flow/base.py +57 -0
app/flow/flow_factory.py +30 -0
app/flow/planning.py +442 -0
app/huggingface_models.py +0 -0
app/huggingface_models_backup.py +2237 -0
app/llm.py +766 -0
app/logger.py +42 -0
app/mcp/__init__.py +0 -0
app/mcp/server.py +180 -0
app/production_config.py +363 -0
app/prompt/__init__.py +0 -0
app/prompt/browser.py +94 -0
app/prompt/manus.py +10 -0
app/prompt/mcp.py +43 -0
app/prompt/planning.py +27 -0
app/prompt/swe.py +22 -0
app/prompt/toolcall.py +5 -0
app/prompt/visualization.py +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,21 @@

+FROM python:3.10-slim
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y \
+    wget \
+    gnupg \
+    && rm -rf /var/lib/apt/lists/*
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Install Playwright browsers
+RUN playwright install --with-deps chromium
+COPY . .
+EXPOSE 7860
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -1,10 +1,64 @@
----
-title: Orynxml Agents
-emoji: 🦀
-colorFrom: gray
-colorTo: yellow
-sdk: docker
-pinned: false
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: ORYNXML Complete Backend with Agents
+emoji: 🤖
+colorFrom: blue
+colorTo: purple
+sdk: docker
+pinned: false
+---
+# ORYNXML Complete Backend with AI Agents
+FastAPI backend with integrated AI agents for ORYNXML AI Platform.
+## AI Agents
+### 1. Manus Agent (Main)
+- Chat and conversation
+- Task execution
+- Tool orchestration
+- General AI capabilities
+### 2. Software Engineer Agent (SWE)
+- Code generation (Python, JavaScript, etc.)
+- Code debugging and refactoring
+- Architecture design
+- Test generation
+### 3. Browser Agent
+- Web scraping
+- Browser automation
+- Form filling
+- Navigation and interaction
+### 4. Data Analysis Agent
+- Data visualization
+- Chart generation
+- Statistical analysis
+- Data transformation
+## API Endpoints
+### Authentication
+- `POST /auth/signup` - Register user
+- `POST /auth/login` - Login user
+### Agent Operations
+- `POST /agent/run` - Run any agent with prompt
+- `POST /agent/code` - Generate code (SWE agent)
+- `POST /agent/browser` - Browser automation
+- `POST /agent/data` - Data analysis
+- `GET /agents/list` - List all agents
+### Status
+- `GET /health` - Health check
+- `GET /cloudflare/status` - Cloudflare status
+## Frontend
+https://orynxml-ai.pages.dev
+## Architecture
+- FastAPI REST API
+- 4 specialized AI agents
+- Cloudflare integration
+- SQLite authentication

app.py ADDED Viewed

	@@ -0,0 +1,419 @@

+"""
+ORYNXML Complete Backend with AI Agents
+FastAPI REST API + Manus Agent + SWE Agent + Browser Agent + HuggingFace Agent
+"""
+from fastapi import FastAPI, HTTPException, BackgroundTasks
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from typing import Optional, List, Dict, Any
+import os
+import sys
+import sqlite3
+import hashlib
+import asyncio
+from datetime import datetime
+# Add parent directory to path for imports
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..'))
+# Import AI Agents
+from app.agent.manus import Manus
+from app.agent.swe import SWEAgent
+from app.agent.browser import BrowserAgent
+from app.agent.data_analysis import DataAnalysis
+from app.llm import get_llm
+from app.tool.tool_collection import ToolCollection
+# HuggingFace token
+HF_TOKEN = os.getenv("HF_TOKEN", "")
+# Cloudflare Configuration
+CLOUDFLARE_CONFIG = {
+    "api_token": os.getenv("CLOUDFLARE_API_TOKEN", ""),
+    "account_id": os.getenv("CLOUDFLARE_ACCOUNT_ID", "62af59a7ac82b29543577ee6800735ee"),
+    "d1_database_id": os.getenv("CLOUDFLARE_D1_DATABASE_ID", "6d887f74-98ac-4db7-bfed-8061903d1f6c"),
+    "r2_bucket_name": os.getenv("CLOUDFLARE_R2_BUCKET_NAME", "openmanus-storage"),
+    "kv_namespace_id": os.getenv("CLOUDFLARE_KV_NAMESPACE_ID", "87f4aa01410d4fb19821f61006f94441"),
+    "kv_namespace_cache": os.getenv("CLOUDFLARE_KV_CACHE_ID", "7b58c88292c847d1a82c8e0dd5129f37"),
+}
+# Global agents (initialized on startup)
+manus_agent = None
+swe_agent = None
+browser_agent = None
+data_agent = None
+# Initialize FastAPI
+app = FastAPI(
+    title="ORYNXML AI Platform with Agents",
+    description="Complete AI backend with Manus, SWE, Browser, and Data Analysis agents",
+    version="2.0.0",
+)
+# CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Database
+def init_database():
+    conn = sqlite3.connect("openmanus.db")
+    cursor = conn.cursor()
+    cursor.execute("""
+        CREATE TABLE IF NOT EXISTS users (
+            id INTEGER PRIMARY KEY AUTOINCREMENT,
+            mobile TEXT UNIQUE NOT NULL,
+            name TEXT NOT NULL,
+            password_hash TEXT NOT NULL,
+            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
+        )
+    """)
+    conn.commit()
+    conn.close()
+init_database()
+# Pydantic Models
+class SignupRequest(BaseModel):
+    mobile: str
+    name: str
+    password: str
+class LoginRequest(BaseModel):
+    mobile: str
+    password: str
+class AgentRequest(BaseModel):
+    prompt: str
+    agent: Optional[str] = "manus"  # manus, swe, browser, data
+class CodeRequest(BaseModel):
+    task: str
+    language: Optional[str] = "python"
+class BrowserRequest(BaseModel):
+    task: str
+    url: Optional[str] = None
+class DataRequest(BaseModel):
+    data: Any
+    task: str
+# Helper Functions
+def hash_password(password: str) -> str:
+    return hashlib.sha256(password.encode()).hexdigest()
+def verify_password(password: str, password_hash: str) -> bool:
+    return hash_password(password) == password_hash
+# Startup event - Initialize agents
+@app.on_event("startup")
+async def startup_event():
+    global manus_agent, swe_agent, browser_agent, data_agent
+    print("🚀 Initializing AI Agents...")
+    try:
+        # Initialize Manus (main agent)
+        manus_agent = await Manus.create()
+        print("✅ Manus Agent initialized")
+        # Initialize SWE Agent
+        swe_agent = await SWEAgent.create()
+        print("✅ SWE Agent initialized")
+        # Initialize Browser Agent
+        browser_agent = await BrowserAgent.create()
+        print("✅ Browser Agent initialized")
+        # Initialize Data Analysis Agent
+        data_agent = await DataAnalysis.create()
+        print("✅ Data Analysis Agent initialized")
+        print("🎉 All agents ready!")
+    except Exception as e:
+        print(f"⚠️  Warning: Could not initialize all agents: {e}")
+        print("API will still work with limited functionality")
+# API Endpoints
+@app.get("/")
+async def root():
+    return {
+        "message": "ORYNXML AI Platform with Agents",
+        "version": "2.0.0",
+        "agents": {
+            "manus": "Main agent with all capabilities" if manus_agent else "Not initialized",
+            "swe": "Software Engineer agent" if swe_agent else "Not initialized",
+            "browser": "Browser automation agent" if browser_agent else "Not initialized",
+            "data": "Data analysis agent" if data_agent else "Not initialized",
+        },
+        "endpoints": {
+            "health": "/health",
+            "auth": "/auth/signup, /auth/login",
+            "agents": "/agent/run, /agent/code, /agent/browser, /agent/data",
+        }
+    }
+@app.get("/health")
+async def health_check():
+    return {
+        "status": "healthy",
+        "timestamp": datetime.now().isoformat(),
+        "agents_initialized": {
+            "manus": manus_agent is not None,
+            "swe": swe_agent is not None,
+            "browser": browser_agent is not None,
+            "data": data_agent is not None,
+        },
+        "cloudflare_configured": bool(CLOUDFLARE_CONFIG["api_token"]),
+    }
+@app.post("/auth/signup")
+async def signup(request: SignupRequest):
+    try:
+        if len(request.password) < 6:
+            raise HTTPException(status_code=400, detail="Password must be at least 6 characters")
+        conn = sqlite3.connect("openmanus.db")
+        cursor = conn.cursor()
+        cursor.execute("SELECT mobile FROM users WHERE mobile = ?", (request.mobile,))
+        if cursor.fetchone():
+            conn.close()
+            raise HTTPException(status_code=400, detail="Mobile number already registered")
+        password_hash = hash_password(request.password)
+        cursor.execute(
+            "INSERT INTO users (mobile, name, password_hash) VALUES (?, ?, ?)",
+            (request.mobile, request.name, password_hash)
+        )
+        conn.commit()
+        conn.close()
+        return {
+            "success": True,
+            "message": "Account created successfully",
+            "mobile": request.mobile,
+            "name": request.name
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Registration failed: {str(e)}")
+@app.post("/auth/login")
+async def login(request: LoginRequest):
+    try:
+        conn = sqlite3.connect("openmanus.db")
+        cursor = conn.cursor()
+        cursor.execute(
+            "SELECT name, password_hash FROM users WHERE mobile = ?",
+            (request.mobile,)
+        )
+        result = cursor.fetchone()
+        conn.close()
+        if not result:
+            raise HTTPException(status_code=401, detail="Invalid mobile number or password")
+        name, password_hash = result
+        if not verify_password(request.password, password_hash):
+            raise HTTPException(status_code=401, detail="Invalid mobile number or password")
+        return {
+            "success": True,
+            "message": "Login successful",
+            "user": {
+                "mobile": request.mobile,
+                "name": name
+            },
+            "token": f"session_{hash_password(request.mobile + str(datetime.now()))[:32]}"
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Login failed: {str(e)}")
+@app.post("/agent/run")
+async def run_agent(request: AgentRequest):
+    """Run any agent with a prompt"""
+    try:
+        agent_name = request.agent.lower()
+        # Select agent
+        if agent_name == "manus":
+            if not manus_agent:
+                raise HTTPException(status_code=503, detail="Manus agent not initialized")
+            agent = manus_agent
+        elif agent_name == "swe":
+            if not swe_agent:
+                raise HTTPException(status_code=503, detail="SWE agent not initialized")
+            agent = swe_agent
+        elif agent_name == "browser":
+            if not browser_agent:
+                raise HTTPException(status_code=503, detail="Browser agent not initialized")
+            agent = browser_agent
+        elif agent_name == "data":
+            if not data_agent:
+                raise HTTPException(status_code=503, detail="Data agent not initialized")
+            agent = data_agent
+        else:
+            raise HTTPException(status_code=400, detail=f"Unknown agent: {agent_name}")
+        # Run agent
+        result = await agent.run(request.prompt)
+        return {
+            "success": True,
+            "agent": agent_name,
+            "prompt": request.prompt,
+            "result": str(result),
+            "timestamp": datetime.now().isoformat()
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Agent execution failed: {str(e)}")
+@app.post("/agent/code")
+async def generate_code(request: CodeRequest):
+    """Software Engineer Agent - Generate code"""
+    try:
+        if not swe_agent:
+            raise HTTPException(status_code=503, detail="SWE agent not initialized")
+        prompt = f"Generate {request.language} code for: {request.task}"
+        result = await swe_agent.run(prompt)
+        return {
+            "success": True,
+            "task": request.task,
+            "language": request.language,
+            "code": str(result),
+            "timestamp": datetime.now().isoformat()
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Code generation failed: {str(e)}")
+@app.post("/agent/browser")
+async def browser_automation(request: BrowserRequest):
+    """Browser Agent - Automate web tasks"""
+    try:
+        if not browser_agent:
+            raise HTTPException(status_code=503, detail="Browser agent not initialized")
+        prompt = f"{request.task}"
+        if request.url:
+            prompt += f" on {request.url}"
+        result = await browser_agent.run(prompt)
+        return {
+            "success": True,
+            "task": request.task,
+            "url": request.url,
+            "result": str(result),
+            "timestamp": datetime.now().isoformat()
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Browser automation failed: {str(e)}")
+@app.post("/agent/data")
+async def analyze_data(request: DataRequest):
+    """Data Analysis Agent - Analyze and visualize data"""
+    try:
+        if not data_agent:
+            raise HTTPException(status_code=503, detail="Data agent not initialized")
+        prompt = f"Analyze this data: {request.data}. Task: {request.task}"
+        result = await data_agent.run(prompt)
+        return {
+            "success": True,
+            "task": request.task,
+            "result": str(result),
+            "timestamp": datetime.now().isoformat()
+        }
+    except HTTPException:
+        raise
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Data analysis failed: {str(e)}")
+@app.get("/agents/list")
+async def list_agents():
+    """List all available agents and their status"""
+    return {
+        "agents": [
+            {
+                "name": "manus",
+                "description": "Main agent with all capabilities (chat, coding, browsing, data analysis)",
+                "status": "initialized" if manus_agent else "not initialized",
+                "endpoint": "/agent/run"
+            },
+            {
+                "name": "swe",
+                "description": "Software Engineer agent (code generation, debugging, refactoring)",
+                "status": "initialized" if swe_agent else "not initialized",
+                "endpoint": "/agent/code"
+            },
+            {
+                "name": "browser",
+                "description": "Browser automation agent (web scraping, form filling, navigation)",
+                "status": "initialized" if browser_agent else "not initialized",
+                "endpoint": "/agent/browser"
+            },
+            {
+                "name": "data",
+                "description": "Data analysis agent (charts, visualization, statistics)",
+                "status": "initialized" if data_agent else "not initialized",
+                "endpoint": "/agent/data"
+            }
+        ]
+    }
+@app.get("/cloudflare/status")
+async def cloudflare_status():
+    services = []
+    if CLOUDFLARE_CONFIG["api_token"]:
+        services.append("✅ API Token Configured")
+    if CLOUDFLARE_CONFIG["d1_database_id"]:
+        services.append("✅ D1 Database Connected")
+    if CLOUDFLARE_CONFIG["r2_bucket_name"]:
+        services.append("✅ R2 Storage Connected")
+    if CLOUDFLARE_CONFIG["kv_namespace_id"]:
+        services.append("✅ KV Sessions Connected")
+    if CLOUDFLARE_CONFIG["kv_namespace_cache"]:
+        services.append("✅ KV Cache Connected")
+    return {
+        "configured": len(services) > 0,
+        "services": services,
+        "account_id": CLOUDFLARE_CONFIG["account_id"]
+    }
+if __name__ == "__main__":
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=7860,
+        log_level="info"
+    )

app/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+# Python version check: 3.11-3.13
+import sys
+if sys.version_info < (3, 11) or sys.version_info > (3, 13):
+    print(
+        "Warning: Unsupported Python version {ver}, please use 3.11-3.13".format(
+            ver=".".join(map(str, sys.version_info))
+        )
+    )

app/agent/__init__.py ADDED Viewed

	@@ -0,0 +1,16 @@

+from app.agent.base import BaseAgent
+from app.agent.browser import BrowserAgent
+from app.agent.mcp import MCPAgent
+from app.agent.react import ReActAgent
+from app.agent.swe import SWEAgent
+from app.agent.toolcall import ToolCallAgent
+__all__ = [
+    "BaseAgent",
+    "BrowserAgent",
+    "ReActAgent",
+    "SWEAgent",
+    "ToolCallAgent",
+    "MCPAgent",
+]

app/agent/base.py ADDED Viewed

	@@ -0,0 +1,196 @@

+from abc import ABC, abstractmethod
+from contextlib import asynccontextmanager
+from typing import List, Optional
+from pydantic import BaseModel, Field, model_validator
+from app.llm import LLM
+from app.logger import logger
+from app.sandbox.client import SANDBOX_CLIENT
+from app.schema import ROLE_TYPE, AgentState, Memory, Message
+class BaseAgent(BaseModel, ABC):
+    """Abstract base class for managing agent state and execution.
+    Provides foundational functionality for state transitions, memory management,
+    and a step-based execution loop. Subclasses must implement the `step` method.
+    """
+    # Core attributes
+    name: str = Field(..., description="Unique name of the agent")
+    description: Optional[str] = Field(None, description="Optional agent description")
+    # Prompts
+    system_prompt: Optional[str] = Field(
+        None, description="System-level instruction prompt"
+    )
+    next_step_prompt: Optional[str] = Field(
+        None, description="Prompt for determining next action"
+    )
+    # Dependencies
+    llm: LLM = Field(default_factory=LLM, description="Language model instance")
+    memory: Memory = Field(default_factory=Memory, description="Agent's memory store")
+    state: AgentState = Field(
+        default=AgentState.IDLE, description="Current agent state"
+    )
+    # Execution control
+    max_steps: int = Field(default=10, description="Maximum steps before termination")
+    current_step: int = Field(default=0, description="Current step in execution")
+    duplicate_threshold: int = 2
+    class Config:
+        arbitrary_types_allowed = True
+        extra = "allow"  # Allow extra fields for flexibility in subclasses
+    @model_validator(mode="after")
+    def initialize_agent(self) -> "BaseAgent":
+        """Initialize agent with default settings if not provided."""
+        if self.llm is None or not isinstance(self.llm, LLM):
+            self.llm = LLM(config_name=self.name.lower())
+        if not isinstance(self.memory, Memory):
+            self.memory = Memory()
+        return self
+    @asynccontextmanager
+    async def state_context(self, new_state: AgentState):
+        """Context manager for safe agent state transitions.
+        Args:
+            new_state: The state to transition to during the context.
+        Yields:
+            None: Allows execution within the new state.
+        Raises:
+            ValueError: If the new_state is invalid.
+        """
+        if not isinstance(new_state, AgentState):
+            raise ValueError(f"Invalid state: {new_state}")
+        previous_state = self.state
+        self.state = new_state
+        try:
+            yield
+        except Exception as e:
+            self.state = AgentState.ERROR  # Transition to ERROR on failure
+            raise e
+        finally:
+            self.state = previous_state  # Revert to previous state
+    def update_memory(
+        self,
+        role: ROLE_TYPE,  # type: ignore
+        content: str,
+        base64_image: Optional[str] = None,
+        **kwargs,
+    ) -> None:
+        """Add a message to the agent's memory.
+        Args:
+            role: The role of the message sender (user, system, assistant, tool).
+            content: The message content.
+            base64_image: Optional base64 encoded image.
+            **kwargs: Additional arguments (e.g., tool_call_id for tool messages).
+        Raises:
+            ValueError: If the role is unsupported.
+        """
+        message_map = {
+            "user": Message.user_message,
+            "system": Message.system_message,
+            "assistant": Message.assistant_message,
+            "tool": lambda content, **kw: Message.tool_message(content, **kw),
+        }
+        if role not in message_map:
+            raise ValueError(f"Unsupported message role: {role}")
+        # Create message with appropriate parameters based on role
+        kwargs = {"base64_image": base64_image, **(kwargs if role == "tool" else {})}
+        self.memory.add_message(message_map[role](content, **kwargs))
+    async def run(self, request: Optional[str] = None) -> str:
+        """Execute the agent's main loop asynchronously.
+        Args:
+            request: Optional initial user request to process.
+        Returns:
+            A string summarizing the execution results.
+        Raises:
+            RuntimeError: If the agent is not in IDLE state at start.
+        """
+        if self.state != AgentState.IDLE:
+            raise RuntimeError(f"Cannot run agent from state: {self.state}")
+        if request:
+            self.update_memory("user", request)
+        results: List[str] = []
+        async with self.state_context(AgentState.RUNNING):
+            while (
+                self.current_step < self.max_steps and self.state != AgentState.FINISHED
+            ):
+                self.current_step += 1
+                logger.info(f"Executing step {self.current_step}/{self.max_steps}")
+                step_result = await self.step()
+                # Check for stuck state
+                if self.is_stuck():
+                    self.handle_stuck_state()
+                results.append(f"Step {self.current_step}: {step_result}")
+            if self.current_step >= self.max_steps:
+                self.current_step = 0
+                self.state = AgentState.IDLE
+                results.append(f"Terminated: Reached max steps ({self.max_steps})")
+        await SANDBOX_CLIENT.cleanup()
+        return "\n".join(results) if results else "No steps executed"
+    @abstractmethod
+    async def step(self) -> str:
+        """Execute a single step in the agent's workflow.
+        Must be implemented by subclasses to define specific behavior.
+        """
+    def handle_stuck_state(self):
+        """Handle stuck state by adding a prompt to change strategy"""
+        stuck_prompt = "\
+        Observed duplicate responses. Consider new strategies and avoid repeating ineffective paths already attempted."
+        self.next_step_prompt = f"{stuck_prompt}\n{self.next_step_prompt}"
+        logger.warning(f"Agent detected stuck state. Added prompt: {stuck_prompt}")
+    def is_stuck(self) -> bool:
+        """Check if the agent is stuck in a loop by detecting duplicate content"""
+        if len(self.memory.messages) < 2:
+            return False
+        last_message = self.memory.messages[-1]
+        if not last_message.content:
+            return False
+        # Count identical content occurrences
+        duplicate_count = sum(
+            1
+            for msg in reversed(self.memory.messages[:-1])
+            if msg.role == "assistant" and msg.content == last_message.content
+        )
+        return duplicate_count >= self.duplicate_threshold
+    @property
+    def messages(self) -> List[Message]:
+        """Retrieve a list of messages from the agent's memory."""
+        return self.memory.messages
+    @messages.setter
+    def messages(self, value: List[Message]):
+        """Set the list of messages in the agent's memory."""
+        self.memory.messages = value

app/agent/browser.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import json
+from typing import TYPE_CHECKING, Optional
+from pydantic import Field, model_validator
+from app.agent.toolcall import ToolCallAgent
+from app.logger import logger
+from app.prompt.browser import NEXT_STEP_PROMPT, SYSTEM_PROMPT
+from app.schema import Message, ToolChoice
+from app.tool import BrowserUseTool, Terminate, ToolCollection
+from app.tool.sandbox.sb_browser_tool import SandboxBrowserTool
+# Avoid circular import if BrowserAgent needs BrowserContextHelper
+if TYPE_CHECKING:
+    from app.agent.base import BaseAgent  # Or wherever memory is defined
+class BrowserContextHelper:
+    def __init__(self, agent: "BaseAgent"):
+        self.agent = agent
+        self._current_base64_image: Optional[str] = None
+    async def get_browser_state(self) -> Optional[dict]:
+        browser_tool = self.agent.available_tools.get_tool(BrowserUseTool().name)
+        if not browser_tool:
+            browser_tool = self.agent.available_tools.get_tool(
+                SandboxBrowserTool().name
+            )
+        if not browser_tool or not hasattr(browser_tool, "get_current_state"):
+            logger.warning("BrowserUseTool not found or doesn't have get_current_state")
+            return None
+        try:
+            result = await browser_tool.get_current_state()
+            if result.error:
+                logger.debug(f"Browser state error: {result.error}")
+                return None
+            if hasattr(result, "base64_image") and result.base64_image:
+                self._current_base64_image = result.base64_image
+            else:
+                self._current_base64_image = None
+            return json.loads(result.output)
+        except Exception as e:
+            logger.debug(f"Failed to get browser state: {str(e)}")
+            return None
+    async def format_next_step_prompt(self) -> str:
+        """Gets browser state and formats the browser prompt."""
+        browser_state = await self.get_browser_state()
+        url_info, tabs_info, content_above_info, content_below_info = "", "", "", ""
+        results_info = ""  # Or get from agent if needed elsewhere
+        if browser_state and not browser_state.get("error"):
+            url_info = f"\n   URL: {browser_state.get('url', 'N/A')}\n   Title: {browser_state.get('title', 'N/A')}"
+            tabs = browser_state.get("tabs", [])
+            if tabs:
+                tabs_info = f"\n   {len(tabs)} tab(s) available"
+            pixels_above = browser_state.get("pixels_above", 0)
+            pixels_below = browser_state.get("pixels_below", 0)
+            if pixels_above > 0:
+                content_above_info = f" ({pixels_above} pixels)"
+            if pixels_below > 0:
+                content_below_info = f" ({pixels_below} pixels)"
+            if self._current_base64_image:
+                image_message = Message.user_message(
+                    content="Current browser screenshot:",
+                    base64_image=self._current_base64_image,
+                )
+                self.agent.memory.add_message(image_message)
+                self._current_base64_image = None  # Consume the image after adding
+        return NEXT_STEP_PROMPT.format(
+            url_placeholder=url_info,
+            tabs_placeholder=tabs_info,
+            content_above_placeholder=content_above_info,
+            content_below_placeholder=content_below_info,
+            results_placeholder=results_info,
+        )
+    async def cleanup_browser(self):
+        browser_tool = self.agent.available_tools.get_tool(BrowserUseTool().name)
+        if browser_tool and hasattr(browser_tool, "cleanup"):
+            await browser_tool.cleanup()
+class BrowserAgent(ToolCallAgent):
+    """
+    A browser agent that uses the browser_use library to control a browser.
+    This agent can navigate web pages, interact with elements, fill forms,
+    extract content, and perform other browser-based actions to accomplish tasks.
+    """
+    name: str = "browser"
+    description: str = "A browser agent that can control a browser to accomplish tasks"
+    system_prompt: str = SYSTEM_PROMPT
+    next_step_prompt: str = NEXT_STEP_PROMPT
+    max_observe: int = 10000
+    max_steps: int = 20
+    # Configure the available tools
+    available_tools: ToolCollection = Field(
+        default_factory=lambda: ToolCollection(BrowserUseTool(), Terminate())
+    )
+    # Use Auto for tool choice to allow both tool usage and free-form responses
+    tool_choices: ToolChoice = ToolChoice.AUTO
+    special_tool_names: list[str] = Field(default_factory=lambda: [Terminate().name])
+    browser_context_helper: Optional[BrowserContextHelper] = None
+    @model_validator(mode="after")
+    def initialize_helper(self) -> "BrowserAgent":
+        self.browser_context_helper = BrowserContextHelper(self)
+        return self
+    async def think(self) -> bool:
+        """Process current state and decide next actions using tools, with browser state info added"""
+        self.next_step_prompt = (
+            await self.browser_context_helper.format_next_step_prompt()
+        )
+        return await super().think()
+    async def cleanup(self):
+        """Clean up browser agent resources by calling parent cleanup."""
+        await self.browser_context_helper.cleanup_browser()

app/agent/data_analysis.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from pydantic import Field
+from app.agent.toolcall import ToolCallAgent
+from app.config import config
+from app.prompt.visualization import NEXT_STEP_PROMPT, SYSTEM_PROMPT
+from app.tool import Terminate, ToolCollection
+from app.tool.chart_visualization.chart_prepare import VisualizationPrepare
+from app.tool.chart_visualization.data_visualization import DataVisualization
+from app.tool.chart_visualization.python_execute import NormalPythonExecute
+class DataAnalysis(ToolCallAgent):
+    """
+    A data analysis agent that uses planning to solve various data analysis tasks.
+    This agent extends ToolCallAgent with a comprehensive set of tools and capabilities,
+    including Data Analysis, Chart Visualization, Data Report.
+    """
+    name: str = "Data_Analysis"
+    description: str = "An analytical agent that utilizes python and data visualization tools to solve diverse data analysis tasks"
+    system_prompt: str = SYSTEM_PROMPT.format(directory=config.workspace_root)
+    next_step_prompt: str = NEXT_STEP_PROMPT
+    max_observe: int = 15000
+    max_steps: int = 20
+    # Add general-purpose tools to the tool collection
+    available_tools: ToolCollection = Field(
+        default_factory=lambda: ToolCollection(
+            NormalPythonExecute(),
+            VisualizationPrepare(),
+            DataVisualization(),
+            Terminate(),
+        )
+    )

app/agent/huggingface_agent.py ADDED Viewed

	@@ -0,0 +1,889 @@

+"""
+Hugging Face Agent Integration for OpenManus
+Extends the main AI agent with access to thousands of HuggingFace models
+"""
+import os
+from typing import Any, Dict, List, Optional
+from app.agent.base import BaseAgent
+from app.huggingface_models import ModelCategory
+from app.logger import logger
+from app.tool.huggingface_models_tool import HuggingFaceModelsTool
+class HuggingFaceAgent(BaseAgent):
+    """AI Agent with integrated HuggingFace model access"""
+    def __init__(self, **config):
+        super().__init__(**config)
+        # Initialize HuggingFace integration
+        hf_token = os.getenv("HUGGINGFACE_TOKEN") or config.get("huggingface_token")
+        if not hf_token:
+            logger.warning(
+                "No Hugging Face token provided. HF models will not be available."
+            )
+            self.hf_tool = None
+        else:
+            self.hf_tool = HuggingFaceModelsTool(hf_token)
+        # Default models for different tasks
+        self.default_models = {
+            "text_generation": "MiniMax-M2",  # Latest high-performance model
+            "image_generation": "FLUX.1 Dev",  # Best quality image generation
+            "speech_recognition": "Whisper Large v3",  # Best multilingual ASR
+            "text_to_speech": "Kokoro 82M",  # High quality, lightweight TTS
+            "image_classification": "ViT Base Patch16",  # General image classification
+            "embeddings": "Sentence Transformers All MiniLM",  # Fast embeddings
+            "translation": "M2M100 1.2B",  # Multilingual translation
+            "summarization": "PEGASUS XSum",  # Abstractive summarization
+        }
+    async def generate_text_with_hf(
+        self,
+        prompt: str,
+        model_name: Optional[str] = None,
+        max_tokens: int = 200,
+        temperature: float = 0.7,
+        stream: bool = False,
+    ) -> Dict[str, Any]:
+        """Generate text using HuggingFace models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or self.default_models["text_generation"]
+        return await self.hf_tool.text_generation(
+            model_name=model_name,
+            prompt=prompt,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            stream=stream,
+        )
+    async def generate_image_with_hf(
+        self,
+        prompt: str,
+        model_name: Optional[str] = None,
+        negative_prompt: Optional[str] = None,
+        width: int = 1024,
+        height: int = 1024,
+    ) -> Dict[str, Any]:
+        """Generate images using HuggingFace models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or self.default_models["image_generation"]
+        return await self.hf_tool.generate_image(
+            model_name=model_name,
+            prompt=prompt,
+            negative_prompt=negative_prompt,
+            width=width,
+            height=height,
+        )
+    async def transcribe_audio_with_hf(
+        self,
+        audio_data: bytes,
+        model_name: Optional[str] = None,
+        language: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Transcribe audio using HuggingFace models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or self.default_models["speech_recognition"]
+        return await self.hf_tool.transcribe_audio(
+            model_name=model_name, audio_data=audio_data, language=language
+        )
+    async def synthesize_speech_with_hf(
+        self,
+        text: str,
+        model_name: Optional[str] = None,
+        voice_id: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Generate speech from text using HuggingFace models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or self.default_models["text_to_speech"]
+        return await self.hf_tool.text_to_speech(
+            model_name=model_name, text=text, voice_id=voice_id
+        )
+    async def classify_image_with_hf(
+        self, image_data: bytes, model_name: Optional[str] = None, task: str = "general"
+    ) -> Dict[str, Any]:
+        """Classify images using HuggingFace models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        # Choose model based on task
+        if task == "nsfw":
+            model_name = "NSFW Image Detection"
+        elif task == "emotions":
+            model_name = "Facial Emotions Detection"
+        elif task == "deepfake":
+            model_name = "Deepfake Detection"
+        else:
+            model_name = model_name or self.default_models["image_classification"]
+        return await self.hf_tool.classify_image(
+            model_name=model_name, image_data=image_data
+        )
+    async def get_text_embeddings_with_hf(
+        self, texts: List[str], model_name: Optional[str] = None
+    ) -> Dict[str, Any]:
+        """Get text embeddings using HuggingFace models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or self.default_models["embeddings"]
+        return await self.hf_tool.get_embeddings(model_name=model_name, texts=texts)
+    async def translate_with_hf(
+        self,
+        text: str,
+        target_language: str,
+        source_language: Optional[str] = None,
+        model_name: Optional[str] = None,
+    ) -> Dict[str, Any]:
+        """Translate text using HuggingFace models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or self.default_models["translation"]
+        return await self.hf_tool.translate_text(
+            model_name=model_name,
+            text=text,
+            source_language=source_language,
+            target_language=target_language,
+        )
+    async def summarize_with_hf(
+        self, text: str, model_name: Optional[str] = None, max_length: int = 150
+    ) -> Dict[str, Any]:
+        """Summarize text using HuggingFace models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or self.default_models["summarization"]
+        return await self.hf_tool.summarize_text(
+            model_name=model_name, text=text, max_length=max_length
+        )
+    def get_available_hf_models(self, category: Optional[str] = None) -> Dict[str, Any]:
+        """Get list of available HuggingFace models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        return self.hf_tool.list_available_models(category)
+    async def smart_model_selection(
+        self, task_description: str, content_type: str = "text"
+    ) -> str:
+        """
+        Intelligently select the best HuggingFace model for a task
+        Args:
+            task_description: Description of what the user wants to do
+            content_type: Type of content (text, image, audio, video)
+        """
+        task_lower = task_description.lower()
+        # Video generation and processing
+        if any(
+            keyword in task_lower
+            for keyword in [
+                "video",
+                "movie",
+                "animation",
+                "motion",
+                "gif",
+                "sequence",
+                "frames",
+            ]
+        ):
+            if "generate" in task_lower or "create" in task_lower:
+                return "Stable Video Diffusion"
+            elif "analyze" in task_lower or "describe" in task_lower:
+                return "Video ChatGPT"
+            else:
+                return "AnimateDiff"
+        # Code and App Development
+        elif any(
+            keyword in task_lower
+            for keyword in [
+                "code",
+                "programming",
+                "app",
+                "application",
+                "software",
+                "develop",
+                "build",
+                "function",
+                "class",
+                "api",
+                "database",
+                "website",
+                "frontend",
+                "backend",
+            ]
+        ):
+            if "app" in task_lower or "application" in task_lower:
+                return "CodeLlama 34B Instruct"  # Best for full applications
+            elif "python" in task_lower:
+                return "WizardCoder 34B"  # Python specialist
+            elif "api" in task_lower:
+                return "StarCoder2 15B"  # Good for APIs
+            elif "explain" in task_lower or "comment" in task_lower:
+                return "Phind CodeLlama"  # Best for code explanation
+            else:
+                return "DeepSeek Coder V2"  # General coding
+        # 3D and AR/VR Content
+        elif any(
+            keyword in task_lower
+            for keyword in [
+                "3d",
+                "three dimensional",
+                "mesh",
+                "model",
+                "obj",
+                "stl",
+                "ar",
+                "vr",
+                "augmented reality",
+                "virtual reality",
+                "texture",
+                "material",
+            ]
+        ):
+            if "text" in task_lower and ("3d" in task_lower or "model" in task_lower):
+                return "Shap-E"
+            elif "image" in task_lower and "3d" in task_lower:
+                return "DreamFusion"
+            else:
+                return "Point-E"
+        # Document Processing and OCR
+        elif any(
+            keyword in task_lower
+            for keyword in [
+                "ocr",
+                "document",
+                "pdf",
+                "scan",
+                "extract text",
+                "handwriting",
+                "form",
+                "table",
+                "layout",
+                "invoice",
+                "receipt",
+                "contract",
+            ]
+        ):
+            if "handwriting" in task_lower or "handwritten" in task_lower:
+                return "TrOCR Handwritten"
+            elif "table" in task_lower:
+                return "TableTransformer"
+            elif "form" in task_lower:
+                return "FormNet"
+            else:
+                return "TrOCR Large"
+        # Multimodal AI
+        elif any(
+            keyword in task_lower
+            for keyword in [
+                "visual question",
+                "image question",
+                "describe image",
+                "multimodal",
+                "vision language",
+                "image text",
+                "cross modal",
+            ]
+        ):
+            if "chat" in task_lower or "conversation" in task_lower:
+                return "GPT-4V"
+            elif "question" in task_lower:
+                return "LLaVA"
+            else:
+                return "BLIP-2"
+        # Creative Content
+        elif any(
+            keyword in task_lower
+            for keyword in [
+                "story",
+                "creative",
+                "poem",
+                "poetry",
+                "novel",
+                "screenplay",
+                "script",
+                "blog",
+                "article",
+                "marketing",
+                "copy",
+                "advertising",
+            ]
+        ):
+            if "story" in task_lower or "novel" in task_lower:
+                return "Novel AI"
+            elif "poem" in task_lower or "poetry" in task_lower:
+                return "Poet Assistant"
+            elif "marketing" in task_lower or "copy" in task_lower:
+                return "Marketing Copy AI"
+            else:
+                return "GPT-3.5 Creative"
+        # Game Development
+        elif any(
+            keyword in task_lower
+            for keyword in [
+                "game",
+                "character",
+                "npc",
+                "level",
+                "dialogue",
+                "asset",
+                "quest",
+                "gameplay",
+                "mechanic",
+                "unity",
+                "unreal",
+            ]
+        ):
+            if "character" in task_lower:
+                return "Character AI"
+            elif "level" in task_lower or "environment" in task_lower:
+                return "Level Designer"
+            elif "dialogue" in task_lower or "conversation" in task_lower:
+                return "Dialogue Writer"
+            else:
+                return "Asset Creator"
+        # Science and Research
+        elif any(
+            keyword in task_lower
+            for keyword in [
+                "research",
+                "scientific",
+                "paper",
+                "analysis",
+                "data",
+                "protein",
+                "molecule",
+                "chemistry",
+                "biology",
+                "physics",
+                "experiment",
+            ]
+        ):
+            if "protein" in task_lower or "folding" in task_lower:
+                return "AlphaFold"
+            elif "molecule" in task_lower or "chemistry" in task_lower:
+                return "ChemBERTa"
+            elif "data" in task_lower and "analysis" in task_lower:
+                return "Data Analyst"
+            else:
+                return "SciBERT"
+        # Business and Productivity
+        elif any(
+            keyword in task_lower
+            for keyword in [
+                "email",
+                "business",
+                "report",
+                "presentation",
+                "meeting",
+                "project",
+                "plan",
+                "proposal",
+                "memo",
+                "letter",
+                "professional",
+            ]
+        ):
+            if "email" in task_lower:
+                return "Email Assistant"
+            elif "presentation" in task_lower:
+                return "Presentation AI"
+            elif "report" in task_lower:
+                return "Report Writer"
+            elif "meeting" in task_lower:
+                return "Meeting Summarizer"
+            else:
+                return "Project Planner"
+        # Specialized AI
+        elif any(
+            keyword in task_lower
+            for keyword in [
+                "music",
+                "audio",
+                "sound",
+                "voice clone",
+                "enhance",
+                "restore",
+                "upscale",
+                "remove background",
+                "inpaint",
+                "style transfer",
+            ]
+        ):
+            if "music" in task_lower:
+                return "MusicGen"
+            elif "voice" in task_lower and "clone" in task_lower:
+                return "Voice Cloner"
+            elif "upscale" in task_lower or "enhance" in task_lower:
+                return "Real-ESRGAN"
+            elif "background" in task_lower and "remove" in task_lower:
+                return "Background Remover"
+            elif "restore" in task_lower or "face" in task_lower:
+                return "GFPGAN"
+            else:
+                return "LaMa"
+        # Traditional categories
+        elif any(
+            keyword in task_lower
+            for keyword in [
+                "generate",
+                "write",
+                "create",
+                "compose",
+                "chat",
+                "conversation",
+            ]
+        ):
+            if "chat" in task_lower or "conversation" in task_lower:
+                return "Llama 3.1 8B Instruct"
+            else:
+                return "MiniMax-M2"
+        # Image generation
+        elif any(
+            keyword in task_lower
+            for keyword in ["image", "picture", "draw", "art", "photo", "visual"]
+        ):
+            if "fast" in task_lower or "quick" in task_lower:
+                return "FLUX.1 Schnell"
+            else:
+                return "FLUX.1 Dev"
+        # Audio processing
+        elif any(
+            keyword in task_lower
+            for keyword in ["transcribe", "speech to text", "recognize", "audio"]
+        ):
+            if content_type == "audio" or "transcribe" in task_lower:
+                return "Whisper Large v3"
+        # Text-to-speech
+        elif any(
+            keyword in task_lower
+            for keyword in ["speak", "voice", "text to speech", "tts"]
+        ):
+            if "fast" in task_lower:
+                return "Kokoro 82M"  # Lightweight and fast
+            else:
+                return "VibeVoice 1.5B"  # High quality
+        # Image analysis
+        elif (
+            any(
+                keyword in task_lower
+                for keyword in ["classify", "analyze image", "detect", "recognize"]
+            )
+            and content_type == "image"
+        ):
+            if "nsfw" in task_lower or "safe" in task_lower:
+                return "NSFW Image Detection"
+            elif "emotion" in task_lower or "face" in task_lower:
+                return "Facial Emotions Detection"
+            elif "deepfake" in task_lower or "fake" in task_lower:
+                return "Deepfake Detection"
+            else:
+                return "ViT Base Patch16"  # General classification
+        # Translation
+        elif any(
+            keyword in task_lower for keyword in ["translate", "language", "convert"]
+        ):
+            return "M2M100 1.2B"  # Multilingual translation
+        # Summarization
+        elif any(
+            keyword in task_lower
+            for keyword in ["summarize", "summary", "abstract", "brief"]
+        ):
+            return "PEGASUS XSum"  # Best summarization
+        # Embeddings/similarity
+        elif any(
+            keyword in task_lower
+            for keyword in ["similar", "embed", "vector", "search", "match"]
+        ):
+            return "Sentence Transformers All MiniLM"  # Fast embeddings
+        # Default fallback
+        else:
+            return "MiniMax-M2"  # Best general-purpose model
+    async def execute_hf_task(
+        self, task: str, content: Any, model_name: Optional[str] = None, **kwargs
+    ) -> Dict[str, Any]:
+        """
+        Execute any HuggingFace task with intelligent model selection
+        Args:
+            task: Task description (e.g., "generate image", "transcribe audio")
+            content: Input content (text, image bytes, audio bytes)
+            model_name: Specific model to use (optional)
+            **kwargs: Additional parameters
+        """
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        try:
+            task_lower = task.lower()
+            # Determine content type
+            content_type = "text"
+            if isinstance(content, bytes):
+                if (
+                    b"PNG" in content[:20]
+                    or b"JFIF" in content[:20]
+                    or b"GIF" in content[:20]
+                ):
+                    content_type = "image"
+                else:
+                    content_type = "audio"
+            # Auto-select model if not specified
+            if not model_name:
+                model_name = await self.smart_model_selection(task, content_type)
+            # Route to appropriate method based on task
+            if "generate" in task_lower and (
+                "image" in task_lower or "picture" in task_lower
+            ):
+                return await self.generate_image_with_hf(content, model_name, **kwargs)
+            elif "transcribe" in task_lower or "speech to text" in task_lower:
+                return await self.transcribe_audio_with_hf(
+                    content, model_name, **kwargs
+                )
+            elif "text to speech" in task_lower or "tts" in task_lower:
+                return await self.synthesize_speech_with_hf(
+                    content, model_name, **kwargs
+                )
+            elif "classify" in task_lower and content_type == "image":
+                return await self.classify_image_with_hf(content, model_name, **kwargs)
+            elif "embed" in task_lower or "vector" in task_lower:
+                texts = [content] if isinstance(content, str) else content
+                return await self.get_text_embeddings_with_hf(texts, model_name)
+            elif "translate" in task_lower:
+                return await self.translate_with_hf(
+                    content, model_name=model_name, **kwargs
+                )
+            elif "summarize" in task_lower:
+                return await self.summarize_with_hf(content, model_name, **kwargs)
+            else:
+                # Default to text generation
+                return await self.generate_text_with_hf(content, model_name, **kwargs)
+        except Exception as e:
+            logger.error(f"HuggingFace task execution failed: {e}")
+            return {"error": f"Task execution failed: {str(e)}"}
+    async def chat_with_hf_models(
+        self, message: str, conversation_history: List[Dict] = None
+    ) -> Dict[str, Any]:
+        """
+        Enhanced chat with access to HuggingFace models
+        This method extends the base agent's capabilities with HF models
+        """
+        # Check if the user is asking for HuggingFace-specific functionality
+        message_lower = message.lower()
+        # Handle model listing requests
+        if "list" in message_lower and (
+            "model" in message_lower or "hf" in message_lower
+        ):
+            return self.get_available_hf_models()
+        # Handle specific model requests
+        hf_keywords = [
+            "generate image",
+            "create image",
+            "draw",
+            "picture",
+            "transcribe",
+            "speech to text",
+            "audio",
+            "text to speech",
+            "speak",
+            "voice",
+            "translate",
+            "language",
+            "classify image",
+            "embed",
+            "vector",
+            "similarity",
+            "summarize",
+        ]
+        if any(keyword in message_lower for keyword in hf_keywords):
+            # This is likely a HuggingFace model request
+            return await self.execute_hf_task(message, message)
+        # For regular chat, we can enhance responses with HF models
+        # First get a response from the base agent
+        base_response = await super().chat(message, conversation_history)
+        # Optionally enhance with HF capabilities if relevant
+        if "image" in message_lower and "generate" in message_lower:
+            # User might want image generation
+            base_response["hf_suggestion"] = {
+                "action": "generate_image",
+                "models": ["FLUX.1 Dev", "FLUX.1 Schnell", "Stable Diffusion XL"],
+                "message": "I can also generate images for you using HuggingFace models. Just ask!",
+            }
+        return base_response
+    # New methods for expanded model categories
+    async def generate_video_with_hf(
+        self, prompt: str, model_name: Optional[str] = None, **kwargs
+    ) -> Dict[str, Any]:
+        """Generate video from text prompt"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or "Stable Video Diffusion"
+        return await self.hf_tool.text_to_video(
+            model_name=model_name, prompt=prompt, **kwargs
+        )
+    async def generate_code_with_hf(
+        self,
+        prompt: str,
+        language: str = "python",
+        model_name: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Generate code from natural language description"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or "CodeLlama 34B Instruct"
+        return await self.hf_tool.code_generation(
+            model_name=model_name, prompt=prompt, language=language, **kwargs
+        )
+    async def generate_app_with_hf(
+        self,
+        description: str,
+        app_type: str = "web_app",
+        model_name: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Generate complete application from description"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or "CodeLlama 34B Instruct"
+        enhanced_prompt = f"Create a {app_type} application: {description}"
+        return await self.hf_tool.code_generation(
+            model_name=model_name, prompt=enhanced_prompt, **kwargs
+        )
+    async def generate_3d_model_with_hf(
+        self, prompt: str, model_name: Optional[str] = None, **kwargs
+    ) -> Dict[str, Any]:
+        """Generate 3D model from text description"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or "Shap-E"
+        return await self.hf_tool.text_to_3d(
+            model_name=model_name, prompt=prompt, **kwargs
+        )
+    async def process_document_with_hf(
+        self,
+        document_data: bytes,
+        task_type: str = "ocr",
+        model_name: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Process documents with OCR and analysis"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        if task_type == "ocr":
+            model_name = model_name or "TrOCR Large"
+            return await self.hf_tool.ocr(
+                model_name=model_name, image_data=document_data, **kwargs
+            )
+        else:
+            model_name = model_name or "LayoutLMv3"
+            return await self.hf_tool.document_analysis(
+                model_name=model_name, document_data=document_data, **kwargs
+            )
+    async def multimodal_chat_with_hf(
+        self, image_data: bytes, text: str, model_name: Optional[str] = None, **kwargs
+    ) -> Dict[str, Any]:
+        """Chat with images using multimodal models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or "BLIP-2"
+        return await self.hf_tool.vision_language(
+            model_name=model_name, image_data=image_data, text=text, **kwargs
+        )
+    async def generate_music_with_hf(
+        self,
+        prompt: str,
+        duration: int = 30,
+        model_name: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Generate music from text description"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or "MusicGen"
+        return await self.hf_tool.music_generation(
+            model_name=model_name, prompt=prompt, duration=duration, **kwargs
+        )
+    async def enhance_image_with_hf(
+        self,
+        image_data: bytes,
+        task_type: str = "super_resolution",
+        model_name: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Enhance images with various AI models"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        if task_type == "super_resolution":
+            model_name = model_name or "Real-ESRGAN"
+            return await self.hf_tool.super_resolution(
+                model_name=model_name, image_data=image_data, **kwargs
+            )
+        elif task_type == "background_removal":
+            model_name = model_name or "Background Remover"
+            return await self.hf_tool.background_removal(
+                model_name=model_name, image_data=image_data, **kwargs
+            )
+        elif task_type == "face_restoration":
+            model_name = model_name or "GFPGAN"
+            return await self.hf_tool.super_resolution(
+                model_name=model_name, image_data=image_data, **kwargs
+            )
+    async def generate_creative_content_with_hf(
+        self,
+        prompt: str,
+        content_type: str = "story",
+        model_name: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Generate creative content like stories, poems, etc."""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or "GPT-3.5 Creative"
+        enhanced_prompt = f"Write a {content_type}: {prompt}"
+        return await self.hf_tool.creative_writing(
+            model_name=model_name, prompt=enhanced_prompt, **kwargs
+        )
+    async def generate_game_content_with_hf(
+        self,
+        description: str,
+        content_type: str = "character",
+        model_name: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Generate game development content"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or "Character AI"
+        enhanced_prompt = f"Create game {content_type}: {description}"
+        return await self.hf_tool.creative_writing(
+            model_name=model_name, prompt=enhanced_prompt, **kwargs
+        )
+    async def generate_business_document_with_hf(
+        self,
+        context: str,
+        document_type: str = "email",
+        model_name: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Generate business documents and content"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or "Email Assistant"
+        return await self.hf_tool.business_document(
+            model_name=model_name,
+            document_type=document_type,
+            context=context,
+            **kwargs,
+        )
+    async def research_assistance_with_hf(
+        self,
+        topic: str,
+        research_type: str = "analysis",
+        model_name: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Research assistance and scientific content generation"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        model_name = model_name or "SciBERT"
+        enhanced_prompt = f"Research {research_type} on: {topic}"
+        return await self.hf_tool.text_generation(
+            model_name=model_name, prompt=enhanced_prompt, **kwargs
+        )
+    def get_available_hf_models(self, category: Optional[str] = None) -> Dict[str, Any]:
+        """Get available models by category"""
+        if not self.hf_tool:
+            return {"error": "HuggingFace integration not available"}
+        return self.hf_tool.list_available_models(category=category)

app/agent/manus.py ADDED Viewed

	@@ -0,0 +1,165 @@

+from typing import Dict, List, Optional
+from pydantic import Field, model_validator
+from app.agent.browser import BrowserContextHelper
+from app.agent.toolcall import ToolCallAgent
+from app.config import config
+from app.logger import logger
+from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT
+from app.tool import Terminate, ToolCollection
+from app.tool.ask_human import AskHuman
+from app.tool.browser_use_tool import BrowserUseTool
+from app.tool.mcp import MCPClients, MCPClientTool
+from app.tool.python_execute import PythonExecute
+from app.tool.str_replace_editor import StrReplaceEditor
+class Manus(ToolCallAgent):
+    """A versatile general-purpose agent with support for both local and MCP tools."""
+    name: str = "Manus"
+    description: str = "A versatile agent that can solve various tasks using multiple tools including MCP-based tools"
+    system_prompt: str = SYSTEM_PROMPT.format(directory=config.workspace_root)
+    next_step_prompt: str = NEXT_STEP_PROMPT
+    max_observe: int = 10000
+    max_steps: int = 20
+    # MCP clients for remote tool access
+    mcp_clients: MCPClients = Field(default_factory=MCPClients)
+    # Add general-purpose tools to the tool collection
+    available_tools: ToolCollection = Field(
+        default_factory=lambda: ToolCollection(
+            PythonExecute(),
+            BrowserUseTool(),
+            StrReplaceEditor(),
+            AskHuman(),
+            Terminate(),
+        )
+    )
+    special_tool_names: list[str] = Field(default_factory=lambda: [Terminate().name])
+    browser_context_helper: Optional[BrowserContextHelper] = None
+    # Track connected MCP servers
+    connected_servers: Dict[str, str] = Field(
+        default_factory=dict
+    )  # server_id -> url/command
+    _initialized: bool = False
+    @model_validator(mode="after")
+    def initialize_helper(self) -> "Manus":
+        """Initialize basic components synchronously."""
+        self.browser_context_helper = BrowserContextHelper(self)
+        return self
+    @classmethod
+    async def create(cls, **kwargs) -> "Manus":
+        """Factory method to create and properly initialize a Manus instance."""
+        instance = cls(**kwargs)
+        await instance.initialize_mcp_servers()
+        instance._initialized = True
+        return instance
+    async def initialize_mcp_servers(self) -> None:
+        """Initialize connections to configured MCP servers."""
+        for server_id, server_config in config.mcp_config.servers.items():
+            try:
+                if server_config.type == "sse":
+                    if server_config.url:
+                        await self.connect_mcp_server(server_config.url, server_id)
+                        logger.info(
+                            f"Connected to MCP server {server_id} at {server_config.url}"
+                        )
+                elif server_config.type == "stdio":
+                    if server_config.command:
+                        await self.connect_mcp_server(
+                            server_config.command,
+                            server_id,
+                            use_stdio=True,
+                            stdio_args=server_config.args,
+                        )
+                        logger.info(
+                            f"Connected to MCP server {server_id} using command {server_config.command}"
+                        )
+            except Exception as e:
+                logger.error(f"Failed to connect to MCP server {server_id}: {e}")
+    async def connect_mcp_server(
+        self,
+        server_url: str,
+        server_id: str = "",
+        use_stdio: bool = False,
+        stdio_args: List[str] = None,
+    ) -> None:
+        """Connect to an MCP server and add its tools."""
+        if use_stdio:
+            await self.mcp_clients.connect_stdio(
+                server_url, stdio_args or [], server_id
+            )
+            self.connected_servers[server_id or server_url] = server_url
+        else:
+            await self.mcp_clients.connect_sse(server_url, server_id)
+            self.connected_servers[server_id or server_url] = server_url
+        # Update available tools with only the new tools from this server
+        new_tools = [
+            tool for tool in self.mcp_clients.tools if tool.server_id == server_id
+        ]
+        self.available_tools.add_tools(*new_tools)
+    async def disconnect_mcp_server(self, server_id: str = "") -> None:
+        """Disconnect from an MCP server and remove its tools."""
+        await self.mcp_clients.disconnect(server_id)
+        if server_id:
+            self.connected_servers.pop(server_id, None)
+        else:
+            self.connected_servers.clear()
+        # Rebuild available tools without the disconnected server's tools
+        base_tools = [
+            tool
+            for tool in self.available_tools.tools
+            if not isinstance(tool, MCPClientTool)
+        ]
+        self.available_tools = ToolCollection(*base_tools)
+        self.available_tools.add_tools(*self.mcp_clients.tools)
+    async def cleanup(self):
+        """Clean up Manus agent resources."""
+        if self.browser_context_helper:
+            await self.browser_context_helper.cleanup_browser()
+        # Disconnect from all MCP servers only if we were initialized
+        if self._initialized:
+            await self.disconnect_mcp_server()
+            self._initialized = False
+    async def think(self) -> bool:
+        """Process current state and decide next actions with appropriate context."""
+        if not self._initialized:
+            await self.initialize_mcp_servers()
+            self._initialized = True
+        original_prompt = self.next_step_prompt
+        recent_messages = self.memory.messages[-3:] if self.memory.messages else []
+        browser_in_use = any(
+            tc.function.name == BrowserUseTool().name
+            for msg in recent_messages
+            if msg.tool_calls
+            for tc in msg.tool_calls
+        )
+        if browser_in_use:
+            self.next_step_prompt = (
+                await self.browser_context_helper.format_next_step_prompt()
+            )
+        result = await super().think()
+        # Restore original prompt
+        self.next_step_prompt = original_prompt
+        return result

app/agent/mcp.py ADDED Viewed

	@@ -0,0 +1,185 @@

+from typing import Any, Dict, List, Optional, Tuple
+from pydantic import Field
+from app.agent.toolcall import ToolCallAgent
+from app.logger import logger
+from app.prompt.mcp import MULTIMEDIA_RESPONSE_PROMPT, NEXT_STEP_PROMPT, SYSTEM_PROMPT
+from app.schema import AgentState, Message
+from app.tool.base import ToolResult
+from app.tool.mcp import MCPClients
+class MCPAgent(ToolCallAgent):
+    """Agent for interacting with MCP (Model Context Protocol) servers.
+    This agent connects to an MCP server using either SSE or stdio transport
+    and makes the server's tools available through the agent's tool interface.
+    """
+    name: str = "mcp_agent"
+    description: str = "An agent that connects to an MCP server and uses its tools."
+    system_prompt: str = SYSTEM_PROMPT
+    next_step_prompt: str = NEXT_STEP_PROMPT
+    # Initialize MCP tool collection
+    mcp_clients: MCPClients = Field(default_factory=MCPClients)
+    available_tools: MCPClients = None  # Will be set in initialize()
+    max_steps: int = 20
+    connection_type: str = "stdio"  # "stdio" or "sse"
+    # Track tool schemas to detect changes
+    tool_schemas: Dict[str, Dict[str, Any]] = Field(default_factory=dict)
+    _refresh_tools_interval: int = 5  # Refresh tools every N steps
+    # Special tool names that should trigger termination
+    special_tool_names: List[str] = Field(default_factory=lambda: ["terminate"])
+    async def initialize(
+        self,
+        connection_type: Optional[str] = None,
+        server_url: Optional[str] = None,
+        command: Optional[str] = None,
+        args: Optional[List[str]] = None,
+    ) -> None:
+        """Initialize the MCP connection.
+        Args:
+            connection_type: Type of connection to use ("stdio" or "sse")
+            server_url: URL of the MCP server (for SSE connection)
+            command: Command to run (for stdio connection)
+            args: Arguments for the command (for stdio connection)
+        """
+        if connection_type:
+            self.connection_type = connection_type
+        # Connect to the MCP server based on connection type
+        if self.connection_type == "sse":
+            if not server_url:
+                raise ValueError("Server URL is required for SSE connection")
+            await self.mcp_clients.connect_sse(server_url=server_url)
+        elif self.connection_type == "stdio":
+            if not command:
+                raise ValueError("Command is required for stdio connection")
+            await self.mcp_clients.connect_stdio(command=command, args=args or [])
+        else:
+            raise ValueError(f"Unsupported connection type: {self.connection_type}")
+        # Set available_tools to our MCP instance
+        self.available_tools = self.mcp_clients
+        # Store initial tool schemas
+        await self._refresh_tools()
+        # Add system message about available tools
+        tool_names = list(self.mcp_clients.tool_map.keys())
+        tools_info = ", ".join(tool_names)
+        # Add system prompt and available tools information
+        self.memory.add_message(
+            Message.system_message(
+                f"{self.system_prompt}\n\nAvailable MCP tools: {tools_info}"
+            )
+        )
+    async def _refresh_tools(self) -> Tuple[List[str], List[str]]:
+        """Refresh the list of available tools from the MCP server.
+        Returns:
+            A tuple of (added_tools, removed_tools)
+        """
+        if not self.mcp_clients.sessions:
+            return [], []
+        # Get current tool schemas directly from the server
+        response = await self.mcp_clients.list_tools()
+        current_tools = {tool.name: tool.inputSchema for tool in response.tools}
+        # Determine added, removed, and changed tools
+        current_names = set(current_tools.keys())
+        previous_names = set(self.tool_schemas.keys())
+        added_tools = list(current_names - previous_names)
+        removed_tools = list(previous_names - current_names)
+        # Check for schema changes in existing tools
+        changed_tools = []
+        for name in current_names.intersection(previous_names):
+            if current_tools[name] != self.tool_schemas.get(name):
+                changed_tools.append(name)
+        # Update stored schemas
+        self.tool_schemas = current_tools
+        # Log and notify about changes
+        if added_tools:
+            logger.info(f"Added MCP tools: {added_tools}")
+            self.memory.add_message(
+                Message.system_message(f"New tools available: {', '.join(added_tools)}")
+            )
+        if removed_tools:
+            logger.info(f"Removed MCP tools: {removed_tools}")
+            self.memory.add_message(
+                Message.system_message(
+                    f"Tools no longer available: {', '.join(removed_tools)}"
+                )
+            )
+        if changed_tools:
+            logger.info(f"Changed MCP tools: {changed_tools}")
+        return added_tools, removed_tools
+    async def think(self) -> bool:
+        """Process current state and decide next action."""
+        # Check MCP session and tools availability
+        if not self.mcp_clients.sessions or not self.mcp_clients.tool_map:
+            logger.info("MCP service is no longer available, ending interaction")
+            self.state = AgentState.FINISHED
+            return False
+        # Refresh tools periodically
+        if self.current_step % self._refresh_tools_interval == 0:
+            await self._refresh_tools()
+            # All tools removed indicates shutdown
+            if not self.mcp_clients.tool_map:
+                logger.info("MCP service has shut down, ending interaction")
+                self.state = AgentState.FINISHED
+                return False
+        # Use the parent class's think method
+        return await super().think()
+    async def _handle_special_tool(self, name: str, result: Any, **kwargs) -> None:
+        """Handle special tool execution and state changes"""
+        # First process with parent handler
+        await super()._handle_special_tool(name, result, **kwargs)
+        # Handle multimedia responses
+        if isinstance(result, ToolResult) and result.base64_image:
+            self.memory.add_message(
+                Message.system_message(
+                    MULTIMEDIA_RESPONSE_PROMPT.format(tool_name=name)
+                )
+            )
+    def _should_finish_execution(self, name: str, **kwargs) -> bool:
+        """Determine if tool execution should finish the agent"""
+        # Terminate if the tool name is 'terminate'
+        return name.lower() == "terminate"
+    async def cleanup(self) -> None:
+        """Clean up MCP connection when done."""
+        if self.mcp_clients.sessions:
+            await self.mcp_clients.disconnect()
+            logger.info("MCP connection closed")
+    async def run(self, request: Optional[str] = None) -> str:
+        """Run the agent with cleanup when done."""
+        try:
+            result = await super().run(request)
+            return result
+        finally:
+            # Ensure cleanup happens even if there's an error
+            await self.cleanup()

app/agent/react.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from abc import ABC, abstractmethod
+from typing import Optional
+from pydantic import Field
+from app.agent.base import BaseAgent
+from app.llm import LLM
+from app.schema import AgentState, Memory
+class ReActAgent(BaseAgent, ABC):
+    name: str
+    description: Optional[str] = None
+    system_prompt: Optional[str] = None
+    next_step_prompt: Optional[str] = None
+    llm: Optional[LLM] = Field(default_factory=LLM)
+    memory: Memory = Field(default_factory=Memory)
+    state: AgentState = AgentState.IDLE
+    max_steps: int = 10
+    current_step: int = 0
+    @abstractmethod
+    async def think(self) -> bool:
+        """Process current state and decide next action"""
+    @abstractmethod
+    async def act(self) -> str:
+        """Execute decided actions"""
+    async def step(self) -> str:
+        """Execute a single step: think and act."""
+        should_act = await self.think()
+        if not should_act:
+            return "Thinking complete - no action needed"
+        return await self.act()

app/agent/sandbox_agent.py ADDED Viewed

	@@ -0,0 +1,223 @@

+from typing import Dict, List, Optional
+from pydantic import Field, model_validator
+from app.agent.browser import BrowserContextHelper
+from app.agent.toolcall import ToolCallAgent
+from app.config import config
+from app.daytona.sandbox import create_sandbox, delete_sandbox
+from app.daytona.tool_base import SandboxToolsBase
+from app.logger import logger
+from app.prompt.manus import NEXT_STEP_PROMPT, SYSTEM_PROMPT
+from app.tool import Terminate, ToolCollection
+from app.tool.ask_human import AskHuman
+from app.tool.mcp import MCPClients, MCPClientTool
+from app.tool.sandbox.sb_browser_tool import SandboxBrowserTool
+from app.tool.sandbox.sb_files_tool import SandboxFilesTool
+from app.tool.sandbox.sb_shell_tool import SandboxShellTool
+from app.tool.sandbox.sb_vision_tool import SandboxVisionTool
+class SandboxManus(ToolCallAgent):
+    """A versatile general-purpose agent with support for both local and MCP tools."""
+    name: str = "SandboxManus"
+    description: str = "A versatile agent that can solve various tasks using multiple sandbox-tools including MCP-based tools"
+    system_prompt: str = SYSTEM_PROMPT.format(directory=config.workspace_root)
+    next_step_prompt: str = NEXT_STEP_PROMPT
+    max_observe: int = 10000
+    max_steps: int = 20
+    # MCP clients for remote tool access
+    mcp_clients: MCPClients = Field(default_factory=MCPClients)
+    # Add general-purpose tools to the tool collection
+    available_tools: ToolCollection = Field(
+        default_factory=lambda: ToolCollection(
+            # PythonExecute(),
+            # BrowserUseTool(),
+            # StrReplaceEditor(),
+            AskHuman(),
+            Terminate(),
+        )
+    )
+    special_tool_names: list[str] = Field(default_factory=lambda: [Terminate().name])
+    browser_context_helper: Optional[BrowserContextHelper] = None
+    # Track connected MCP servers
+    connected_servers: Dict[str, str] = Field(
+        default_factory=dict
+    )  # server_id -> url/command
+    _initialized: bool = False
+    sandbox_link: Optional[dict[str, dict[str, str]]] = Field(default_factory=dict)
+    @model_validator(mode="after")
+    def initialize_helper(self) -> "SandboxManus":
+        """Initialize basic components synchronously."""
+        self.browser_context_helper = BrowserContextHelper(self)
+        return self
+    @classmethod
+    async def create(cls, **kwargs) -> "SandboxManus":
+        """Factory method to create and properly initialize a Manus instance."""
+        instance = cls(**kwargs)
+        await instance.initialize_mcp_servers()
+        await instance.initialize_sandbox_tools()
+        instance._initialized = True
+        return instance
+    async def initialize_sandbox_tools(
+        self,
+        password: str = config.daytona.VNC_password,
+    ) -> None:
+        try:
+            # 创建新沙箱
+            if password:
+                sandbox = create_sandbox(password=password)
+                self.sandbox = sandbox
+            else:
+                raise ValueError("password must be provided")
+            vnc_link = sandbox.get_preview_link(6080)
+            website_link = sandbox.get_preview_link(8080)
+            vnc_url = vnc_link.url if hasattr(vnc_link, "url") else str(vnc_link)
+            website_url = (
+                website_link.url if hasattr(website_link, "url") else str(website_link)
+            )
+            # Get the actual sandbox_id from the created sandbox
+            actual_sandbox_id = sandbox.id if hasattr(sandbox, "id") else "new_sandbox"
+            if not self.sandbox_link:
+                self.sandbox_link = {}
+            self.sandbox_link[actual_sandbox_id] = {
+                "vnc": vnc_url,
+                "website": website_url,
+            }
+            logger.info(f"VNC URL: {vnc_url}")
+            logger.info(f"Website URL: {website_url}")
+            SandboxToolsBase._urls_printed = True
+            sb_tools = [
+                SandboxBrowserTool(sandbox),
+                SandboxFilesTool(sandbox),
+                SandboxShellTool(sandbox),
+                SandboxVisionTool(sandbox),
+            ]
+            self.available_tools.add_tools(*sb_tools)
+        except Exception as e:
+            logger.error(f"Error initializing sandbox tools: {e}")
+            raise
+    async def initialize_mcp_servers(self) -> None:
+        """Initialize connections to configured MCP servers."""
+        for server_id, server_config in config.mcp_config.servers.items():
+            try:
+                if server_config.type == "sse":
+                    if server_config.url:
+                        await self.connect_mcp_server(server_config.url, server_id)
+                        logger.info(
+                            f"Connected to MCP server {server_id} at {server_config.url}"
+                        )
+                elif server_config.type == "stdio":
+                    if server_config.command:
+                        await self.connect_mcp_server(
+                            server_config.command,
+                            server_id,
+                            use_stdio=True,
+                            stdio_args=server_config.args,
+                        )
+                        logger.info(
+                            f"Connected to MCP server {server_id} using command {server_config.command}"
+                        )
+            except Exception as e:
+                logger.error(f"Failed to connect to MCP server {server_id}: {e}")
+    async def connect_mcp_server(
+        self,
+        server_url: str,
+        server_id: str = "",
+        use_stdio: bool = False,
+        stdio_args: List[str] = None,
+    ) -> None:
+        """Connect to an MCP server and add its tools."""
+        if use_stdio:
+            await self.mcp_clients.connect_stdio(
+                server_url, stdio_args or [], server_id
+            )
+            self.connected_servers[server_id or server_url] = server_url
+        else:
+            await self.mcp_clients.connect_sse(server_url, server_id)
+            self.connected_servers[server_id or server_url] = server_url
+        # Update available tools with only the new tools from this server
+        new_tools = [
+            tool for tool in self.mcp_clients.tools if tool.server_id == server_id
+        ]
+        self.available_tools.add_tools(*new_tools)
+    async def disconnect_mcp_server(self, server_id: str = "") -> None:
+        """Disconnect from an MCP server and remove its tools."""
+        await self.mcp_clients.disconnect(server_id)
+        if server_id:
+            self.connected_servers.pop(server_id, None)
+        else:
+            self.connected_servers.clear()
+        # Rebuild available tools without the disconnected server's tools
+        base_tools = [
+            tool
+            for tool in self.available_tools.tools
+            if not isinstance(tool, MCPClientTool)
+        ]
+        self.available_tools = ToolCollection(*base_tools)
+        self.available_tools.add_tools(*self.mcp_clients.tools)
+    async def delete_sandbox(self, sandbox_id: str) -> None:
+        """Delete a sandbox by ID."""
+        try:
+            await delete_sandbox(sandbox_id)
+            logger.info(f"Sandbox {sandbox_id} deleted successfully")
+            if sandbox_id in self.sandbox_link:
+                del self.sandbox_link[sandbox_id]
+        except Exception as e:
+            logger.error(f"Error deleting sandbox {sandbox_id}: {e}")
+            raise e
+    async def cleanup(self):
+        """Clean up Manus agent resources."""
+        if self.browser_context_helper:
+            await self.browser_context_helper.cleanup_browser()
+        # Disconnect from all MCP servers only if we were initialized
+        if self._initialized:
+            await self.disconnect_mcp_server()
+            await self.delete_sandbox(self.sandbox.id if self.sandbox else "unknown")
+            self._initialized = False
+    async def think(self) -> bool:
+        """Process current state and decide next actions with appropriate context."""
+        if not self._initialized:
+            await self.initialize_mcp_servers()
+            self._initialized = True
+        original_prompt = self.next_step_prompt
+        recent_messages = self.memory.messages[-3:] if self.memory.messages else []
+        browser_in_use = any(
+            tc.function.name == SandboxBrowserTool().name
+            for msg in recent_messages
+            if msg.tool_calls
+            for tc in msg.tool_calls
+        )
+        if browser_in_use:
+            self.next_step_prompt = (
+                await self.browser_context_helper.format_next_step_prompt()
+            )
+        result = await super().think()
+        # Restore original prompt
+        self.next_step_prompt = original_prompt
+        return result

app/agent/swe.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from typing import List
+from pydantic import Field
+from app.agent.toolcall import ToolCallAgent
+from app.prompt.swe import SYSTEM_PROMPT
+from app.tool import Bash, StrReplaceEditor, Terminate, ToolCollection
+class SWEAgent(ToolCallAgent):
+    """An agent that implements the SWEAgent paradigm for executing code and natural conversations."""
+    name: str = "swe"
+    description: str = "an autonomous AI programmer that interacts directly with the computer to solve tasks."
+    system_prompt: str = SYSTEM_PROMPT
+    next_step_prompt: str = ""
+    available_tools: ToolCollection = ToolCollection(
+        Bash(), StrReplaceEditor(), Terminate()
+    )
+    special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
+    max_steps: int = 20

app/agent/toolcall.py ADDED Viewed

	@@ -0,0 +1,250 @@

+import asyncio
+import json
+from typing import Any, List, Optional, Union
+from pydantic import Field
+from app.agent.react import ReActAgent
+from app.exceptions import TokenLimitExceeded
+from app.logger import logger
+from app.prompt.toolcall import NEXT_STEP_PROMPT, SYSTEM_PROMPT
+from app.schema import TOOL_CHOICE_TYPE, AgentState, Message, ToolCall, ToolChoice
+from app.tool import CreateChatCompletion, Terminate, ToolCollection
+TOOL_CALL_REQUIRED = "Tool calls required but none provided"
+class ToolCallAgent(ReActAgent):
+    """Base agent class for handling tool/function calls with enhanced abstraction"""
+    name: str = "toolcall"
+    description: str = "an agent that can execute tool calls."
+    system_prompt: str = SYSTEM_PROMPT
+    next_step_prompt: str = NEXT_STEP_PROMPT
+    available_tools: ToolCollection = ToolCollection(
+        CreateChatCompletion(), Terminate()
+    )
+    tool_choices: TOOL_CHOICE_TYPE = ToolChoice.AUTO  # type: ignore
+    special_tool_names: List[str] = Field(default_factory=lambda: [Terminate().name])
+    tool_calls: List[ToolCall] = Field(default_factory=list)
+    _current_base64_image: Optional[str] = None
+    max_steps: int = 30
+    max_observe: Optional[Union[int, bool]] = None
+    async def think(self) -> bool:
+        """Process current state and decide next actions using tools"""
+        if self.next_step_prompt:
+            user_msg = Message.user_message(self.next_step_prompt)
+            self.messages += [user_msg]
+        try:
+            # Get response with tool options
+            response = await self.llm.ask_tool(
+                messages=self.messages,
+                system_msgs=(
+                    [Message.system_message(self.system_prompt)]
+                    if self.system_prompt
+                    else None
+                ),
+                tools=self.available_tools.to_params(),
+                tool_choice=self.tool_choices,
+            )
+        except ValueError:
+            raise
+        except Exception as e:
+            # Check if this is a RetryError containing TokenLimitExceeded
+            if hasattr(e, "__cause__") and isinstance(e.__cause__, TokenLimitExceeded):
+                token_limit_error = e.__cause__
+                logger.error(
+                    f"🚨 Token limit error (from RetryError): {token_limit_error}"
+                )
+                self.memory.add_message(
+                    Message.assistant_message(
+                        f"Maximum token limit reached, cannot continue execution: {str(token_limit_error)}"
+                    )
+                )
+                self.state = AgentState.FINISHED
+                return False
+            raise
+        self.tool_calls = tool_calls = (
+            response.tool_calls if response and response.tool_calls else []
+        )
+        content = response.content if response and response.content else ""
+        # Log response info
+        logger.info(f"✨ {self.name}'s thoughts: {content}")
+        logger.info(
+            f"🛠️ {self.name} selected {len(tool_calls) if tool_calls else 0} tools to use"
+        )
+        if tool_calls:
+            logger.info(
+                f"🧰 Tools being prepared: {[call.function.name for call in tool_calls]}"
+            )
+            logger.info(f"🔧 Tool arguments: {tool_calls[0].function.arguments}")
+        try:
+            if response is None:
+                raise RuntimeError("No response received from the LLM")
+            # Handle different tool_choices modes
+            if self.tool_choices == ToolChoice.NONE:
+                if tool_calls:
+                    logger.warning(
+                        f"🤔 Hmm, {self.name} tried to use tools when they weren't available!"
+                    )
+                if content:
+                    self.memory.add_message(Message.assistant_message(content))
+                    return True
+                return False
+            # Create and add assistant message
+            assistant_msg = (
+                Message.from_tool_calls(content=content, tool_calls=self.tool_calls)
+                if self.tool_calls
+                else Message.assistant_message(content)
+            )
+            self.memory.add_message(assistant_msg)
+            if self.tool_choices == ToolChoice.REQUIRED and not self.tool_calls:
+                return True  # Will be handled in act()
+            # For 'auto' mode, continue with content if no commands but content exists
+            if self.tool_choices == ToolChoice.AUTO and not self.tool_calls:
+                return bool(content)
+            return bool(self.tool_calls)
+        except Exception as e:
+            logger.error(f"🚨 Oops! The {self.name}'s thinking process hit a snag: {e}")
+            self.memory.add_message(
+                Message.assistant_message(
+                    f"Error encountered while processing: {str(e)}"
+                )
+            )
+            return False
+    async def act(self) -> str:
+        """Execute tool calls and handle their results"""
+        if not self.tool_calls:
+            if self.tool_choices == ToolChoice.REQUIRED:
+                raise ValueError(TOOL_CALL_REQUIRED)
+            # Return last message content if no tool calls
+            return self.messages[-1].content or "No content or commands to execute"
+        results = []
+        for command in self.tool_calls:
+            # Reset base64_image for each tool call
+            self._current_base64_image = None
+            result = await self.execute_tool(command)
+            if self.max_observe:
+                result = result[: self.max_observe]
+            logger.info(
+                f"🎯 Tool '{command.function.name}' completed its mission! Result: {result}"
+            )
+            # Add tool response to memory
+            tool_msg = Message.tool_message(
+                content=result,
+                tool_call_id=command.id,
+                name=command.function.name,
+                base64_image=self._current_base64_image,
+            )
+            self.memory.add_message(tool_msg)
+            results.append(result)
+        return "\n\n".join(results)
+    async def execute_tool(self, command: ToolCall) -> str:
+        """Execute a single tool call with robust error handling"""
+        if not command or not command.function or not command.function.name:
+            return "Error: Invalid command format"
+        name = command.function.name
+        if name not in self.available_tools.tool_map:
+            return f"Error: Unknown tool '{name}'"
+        try:
+            # Parse arguments
+            args = json.loads(command.function.arguments or "{}")
+            # Execute the tool
+            logger.info(f"🔧 Activating tool: '{name}'...")
+            result = await self.available_tools.execute(name=name, tool_input=args)
+            # Handle special tools
+            await self._handle_special_tool(name=name, result=result)
+            # Check if result is a ToolResult with base64_image
+            if hasattr(result, "base64_image") and result.base64_image:
+                # Store the base64_image for later use in tool_message
+                self._current_base64_image = result.base64_image
+            # Format result for display (standard case)
+            observation = (
+                f"Observed output of cmd `{name}` executed:\n{str(result)}"
+                if result
+                else f"Cmd `{name}` completed with no output"
+            )
+            return observation
+        except json.JSONDecodeError:
+            error_msg = f"Error parsing arguments for {name}: Invalid JSON format"
+            logger.error(
+                f"📝 Oops! The arguments for '{name}' don't make sense - invalid JSON, arguments:{command.function.arguments}"
+            )
+            return f"Error: {error_msg}"
+        except Exception as e:
+            error_msg = f"⚠️ Tool '{name}' encountered a problem: {str(e)}"
+            logger.exception(error_msg)
+            return f"Error: {error_msg}"
+    async def _handle_special_tool(self, name: str, result: Any, **kwargs):
+        """Handle special tool execution and state changes"""
+        if not self._is_special_tool(name):
+            return
+        if self._should_finish_execution(name=name, result=result, **kwargs):
+            # Set agent state to finished
+            logger.info(f"🏁 Special tool '{name}' has completed the task!")
+            self.state = AgentState.FINISHED
+    @staticmethod
+    def _should_finish_execution(**kwargs) -> bool:
+        """Determine if tool execution should finish the agent"""
+        return True
+    def _is_special_tool(self, name: str) -> bool:
+        """Check if tool name is in special tools list"""
+        return name.lower() in [n.lower() for n in self.special_tool_names]
+    async def cleanup(self):
+        """Clean up resources used by the agent's tools."""
+        logger.info(f"🧹 Cleaning up resources for agent '{self.name}'...")
+        for tool_name, tool_instance in self.available_tools.tool_map.items():
+            if hasattr(tool_instance, "cleanup") and asyncio.iscoroutinefunction(
+                tool_instance.cleanup
+            ):
+                try:
+                    logger.debug(f"🧼 Cleaning up tool: {tool_name}")
+                    await tool_instance.cleanup()
+                except Exception as e:
+                    logger.error(
+                        f"🚨 Error cleaning up tool '{tool_name}': {e}", exc_info=True
+                    )
+        logger.info(f"✨ Cleanup complete for agent '{self.name}'.")
+    async def run(self, request: Optional[str] = None) -> str:
+        """Run the agent with cleanup when done."""
+        try:
+            return await super().run(request)
+        finally:
+            await self.cleanup()

app/auth.py ADDED Viewed

	@@ -0,0 +1,205 @@

+"""
+User authentication models and validation for OpenManus
+Mobile number + password based authentication system
+"""
+import hashlib
+import re
+import secrets
+from datetime import datetime, timedelta
+from typing import Optional
+from dataclasses import dataclass
+from pydantic import BaseModel, validator
+class UserSignupRequest(BaseModel):
+    """User signup request model"""
+    full_name: str
+    mobile_number: str
+    password: str
+    confirm_password: str
+    @validator("full_name")
+    def validate_full_name(cls, v):
+        if not v or len(v.strip()) < 2:
+            raise ValueError("Full name must be at least 2 characters long")
+        if len(v.strip()) > 100:
+            raise ValueError("Full name must be less than 100 characters")
+        return v.strip()
+    @validator("mobile_number")
+    def validate_mobile_number(cls, v):
+        # Remove all non-digit characters
+        digits_only = re.sub(r"\D", "", v)
+        # Check if it's a valid mobile number (10-15 digits)
+        if len(digits_only) < 10 or len(digits_only) > 15:
+            raise ValueError("Mobile number must be between 10-15 digits")
+        # Ensure it starts with country code or local format
+        if not re.match(r"^(\+?[1-9]\d{9,14})$", digits_only):
+            raise ValueError("Invalid mobile number format")
+        return digits_only
+    @validator("password")
+    def validate_password(cls, v):
+        if len(v) < 8:
+            raise ValueError("Password must be at least 8 characters long")
+        if len(v) > 128:
+            raise ValueError("Password must be less than 128 characters")
+        # Check for at least one uppercase, lowercase, and digit
+        if not re.search(r"[A-Z]", v):
+            raise ValueError("Password must contain at least one uppercase letter")
+        if not re.search(r"[a-z]", v):
+            raise ValueError("Password must contain at least one lowercase letter")
+        if not re.search(r"\d", v):
+            raise ValueError("Password must contain at least one digit")
+        return v
+    @validator("confirm_password")
+    def validate_confirm_password(cls, v, values):
+        if "password" in values and v != values["password"]:
+            raise ValueError("Passwords do not match")
+        return v
+class UserLoginRequest(BaseModel):
+    """User login request model"""
+    mobile_number: str
+    password: str
+    @validator("mobile_number")
+    def validate_mobile_number(cls, v):
+        # Remove all non-digit characters
+        digits_only = re.sub(r"\D", "", v)
+        if len(digits_only) < 10 or len(digits_only) > 15:
+            raise ValueError("Invalid mobile number")
+        return digits_only
+@dataclass
+class User:
+    """User model"""
+    id: str
+    mobile_number: str
+    full_name: str
+    password_hash: str
+    avatar_url: Optional[str] = None
+    preferences: Optional[str] = None
+    is_active: bool = True
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+@dataclass
+class UserSession:
+    """User session model"""
+    session_id: str
+    user_id: str
+    mobile_number: str
+    full_name: str
+    created_at: datetime
+    expires_at: datetime
+    @property
+    def is_valid(self) -> bool:
+        """Check if session is still valid"""
+        return datetime.utcnow() < self.expires_at
+class UserAuth:
+    """User authentication utilities"""
+    @staticmethod
+    def hash_password(password: str) -> str:
+        """Hash password using SHA-256 with salt"""
+        salt = secrets.token_hex(32)
+        password_hash = hashlib.sha256((password + salt).encode()).hexdigest()
+        return f"{salt}:{password_hash}"
+    @staticmethod
+    def verify_password(password: str, password_hash: str) -> bool:
+        """Verify password against stored hash"""
+        try:
+            salt, stored_hash = password_hash.split(":")
+            password_hash_check = hashlib.sha256((password + salt).encode()).hexdigest()
+            return password_hash_check == stored_hash
+        except ValueError:
+            return False
+    @staticmethod
+    def generate_session_id() -> str:
+        """Generate secure session ID"""
+        return secrets.token_urlsafe(32)
+    @staticmethod
+    def generate_user_id() -> str:
+        """Generate unique user ID"""
+        return f"user_{secrets.token_hex(16)}"
+    @staticmethod
+    def format_mobile_number(mobile_number: str) -> str:
+        """Format mobile number for consistent storage"""
+        # Remove all non-digit characters
+        digits_only = re.sub(r"\D", "", mobile_number)
+        # Add + prefix if not present and format consistently
+        if not digits_only.startswith("+"):
+            # Assume it's a local number, add default country code if needed
+            if len(digits_only) == 10:  # US format
+                digits_only = f"1{digits_only}"
+        return f"+{digits_only}"
+    @staticmethod
+    def create_session(user: User, duration_hours: int = 24) -> UserSession:
+        """Create a new user session"""
+        session_id = UserAuth.generate_session_id()
+        created_at = datetime.utcnow()
+        expires_at = created_at + timedelta(hours=duration_hours)
+        return UserSession(
+            session_id=session_id,
+            user_id=user.id,
+            mobile_number=user.mobile_number,
+            full_name=user.full_name,
+            created_at=created_at,
+            expires_at=expires_at,
+        )
+# Response models
+class AuthResponse(BaseModel):
+    """Authentication response model"""
+    success: bool
+    message: str
+    session_id: Optional[str] = None
+    user_id: Optional[str] = None
+    full_name: Optional[str] = None
+class UserProfile(BaseModel):
+    """User profile response model"""
+    user_id: str
+    full_name: str
+    mobile_number: str  # Masked for security
+    avatar_url: Optional[str] = None
+    created_at: Optional[str] = None
+    @staticmethod
+    def mask_mobile_number(mobile_number: str) -> str:
+        """Mask mobile number for security (show only last 4 digits)"""
+        if len(mobile_number) <= 4:
+            return "*" * len(mobile_number)
+        return "*" * (len(mobile_number) - 4) + mobile_number[-4:]

app/auth_interface.py ADDED Viewed

	@@ -0,0 +1,361 @@

+"""
+Authentication Web Interface for OpenManus
+Mobile number + password based authentication forms
+"""
+import asyncio
+import sqlite3
+from typing import Optional, Tuple
+import gradio as gr
+from app.auth import UserSignupRequest, UserLoginRequest
+from app.auth_service import AuthService
+from app.logger import logger
+class AuthInterface:
+    """Authentication interface with Gradio"""
+    def __init__(self, db_path: str = "openmanus.db"):
+        self.db_path = db_path
+        self.auth_service = None
+        self.current_session = None
+        self.init_database()
+    def init_database(self):
+        """Initialize database with schema"""
+        try:
+            conn = sqlite3.connect(self.db_path)
+            # Create users table with mobile auth
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS users (
+                    id TEXT PRIMARY KEY,
+                    mobile_number TEXT UNIQUE NOT NULL,
+                    full_name TEXT NOT NULL,
+                    password_hash TEXT NOT NULL,
+                    avatar_url TEXT,
+                    preferences TEXT,
+                    is_active BOOLEAN DEFAULT TRUE,
+                    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
+                )
+            """
+            )
+            # Create sessions table
+            conn.execute(
+                """
+                CREATE TABLE IF NOT EXISTS sessions (
+                    id TEXT PRIMARY KEY,
+                    user_id TEXT NOT NULL,
+                    title TEXT,
+                    metadata TEXT,
+                    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+                    expires_at DATETIME,
+                    FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE
+                )
+            """
+            )
+            conn.commit()
+            conn.close()
+            logger.info("Database initialized successfully")
+        except Exception as e:
+            logger.error(f"Database initialization error: {str(e)}")
+    def get_db_connection(self):
+        """Get database connection"""
+        return sqlite3.connect(self.db_path)
+    async def handle_signup(
+        self, full_name: str, mobile_number: str, password: str, confirm_password: str
+    ) -> Tuple[str, bool, dict]:
+        """Handle user signup"""
+        try:
+            # Validate input
+            if not all([full_name, mobile_number, password, confirm_password]):
+                return "All fields are required", False, gr.update(visible=True)
+            # Create signup request
+            signup_data = UserSignupRequest(
+                full_name=full_name,
+                mobile_number=mobile_number,
+                password=password,
+                confirm_password=confirm_password,
+            )
+            # Process signup
+            db_conn = self.get_db_connection()
+            auth_service = AuthService(db_conn)
+            result = await auth_service.register_user(signup_data)
+            db_conn.close()
+            if result.success:
+                self.current_session = {
+                    "session_id": result.session_id,
+                    "user_id": result.user_id,
+                    "full_name": result.full_name,
+                }
+                return (
+                    f"Welcome {result.full_name}! Account created successfully.",
+                    True,
+                    gr.update(visible=False),
+                )
+            else:
+                return result.message, False, gr.update(visible=True)
+        except ValueError as e:
+            return str(e), False, gr.update(visible=True)
+        except Exception as e:
+            logger.error(f"Signup error: {str(e)}")
+            return "An error occurred during signup", False, gr.update(visible=True)
+    async def handle_login(
+        self, mobile_number: str, password: str
+    ) -> Tuple[str, bool, dict]:
+        """Handle user login"""
+        try:
+            # Validate input
+            if not all([mobile_number, password]):
+                return (
+                    "Mobile number and password are required",
+                    False,
+                    gr.update(visible=True),
+                )
+            # Create login request
+            login_data = UserLoginRequest(
+                mobile_number=mobile_number, password=password
+            )
+            # Process login
+            db_conn = self.get_db_connection()
+            auth_service = AuthService(db_conn)
+            result = await auth_service.login_user(login_data)
+            db_conn.close()
+            if result.success:
+                self.current_session = {
+                    "session_id": result.session_id,
+                    "user_id": result.user_id,
+                    "full_name": result.full_name,
+                }
+                return (
+                    f"Welcome back, {result.full_name}!",
+                    True,
+                    gr.update(visible=False),
+                )
+            else:
+                return result.message, False, gr.update(visible=True)
+        except ValueError as e:
+            return str(e), False, gr.update(visible=True)
+        except Exception as e:
+            logger.error(f"Login error: {str(e)}")
+            return "An error occurred during login", False, gr.update(visible=True)
+    def handle_logout(self) -> Tuple[str, bool, dict]:
+        """Handle user logout"""
+        if self.current_session:
+            # In a real app, you'd delete the session from database
+            self.current_session = None
+        return "Logged out successfully", False, gr.update(visible=True)
+    def create_interface(self) -> gr.Interface:
+        """Create the authentication interface"""
+        with gr.Blocks(
+            title="OpenManus Authentication", theme=gr.themes.Soft()
+        ) as auth_interface:
+            gr.Markdown(
+                """
+            # 🔐 OpenManus Authentication
+            ### Secure Mobile Number + Password Login System
+            """
+            )
+            # Session status
+            session_status = gr.Textbox(
+                value="Not logged in", label="Status", interactive=False
+            )
+            # Auth forms container
+            with gr.Column(visible=True) as auth_forms:
+                with gr.Tabs():
+                    # Login Tab
+                    with gr.TabItem("🔑 Login"):
+                        gr.Markdown("### Login with your mobile number and password")
+                        login_mobile = gr.Textbox(
+                            label="📱 Mobile Number",
+                            placeholder="Enter your mobile number (e.g., +1234567890)",
+                            lines=1,
+                        )
+                        login_password = gr.Textbox(
+                            label="🔒 Password",
+                            type="password",
+                            placeholder="Enter your password",
+                            lines=1,
+                        )
+                        login_btn = gr.Button("🔑 Login", variant="primary", size="lg")
+                        login_result = gr.Textbox(label="Result", interactive=False)
+                    # Signup Tab
+                    with gr.TabItem("📝 Sign Up"):
+                        gr.Markdown("### Create your new account")
+                        signup_fullname = gr.Textbox(
+                            label="👤 Full Name",
+                            placeholder="Enter your full name",
+                            lines=1,
+                        )
+                        signup_mobile = gr.Textbox(
+                            label="📱 Mobile Number",
+                            placeholder="Enter your mobile number (e.g., +1234567890)",
+                            lines=1,
+                        )
+                        signup_password = gr.Textbox(
+                            label="🔒 Password",
+                            type="password",
+                            placeholder="Create a strong password (min 8 chars, include uppercase, lowercase, digit)",
+                            lines=1,
+                        )
+                        signup_confirm_password = gr.Textbox(
+                            label="🔒 Confirm Password",
+                            type="password",
+                            placeholder="Confirm your password",
+                            lines=1,
+                        )
+                        signup_btn = gr.Button(
+                            "📝 Create Account", variant="primary", size="lg"
+                        )
+                        signup_result = gr.Textbox(label="Result", interactive=False)
+            # Logged in section
+            with gr.Column(visible=False) as logged_in_section:
+                gr.Markdown("### ✅ You are logged in!")
+                user_info = gr.Markdown("Welcome!")
+                logout_btn = gr.Button("🚪 Logout", variant="secondary")
+                logout_result = gr.Textbox(label="Result", interactive=False)
+            # Password requirements info
+            with gr.Accordion("📋 Password Requirements", open=False):
+                gr.Markdown(
+                    """
+                **Password must contain:**
+                - At least 8 characters
+                - At least 1 uppercase letter (A-Z)
+                - At least 1 lowercase letter (a-z)
+                - At least 1 digit (0-9)
+                - Maximum 128 characters
+                **Mobile Number Format:**
+                - 10-15 digits
+                - Can include country code
+                - Examples: +1234567890, 1234567890, +91987654321
+                """
+                )
+            # Event handlers
+            def sync_signup(*args):
+                """Synchronous wrapper for signup"""
+                return asyncio.run(self.handle_signup(*args))
+            def sync_login(*args):
+                """Synchronous wrapper for login"""
+                return asyncio.run(self.handle_login(*args))
+            def update_ui_after_auth(result_text, success, auth_forms_update):
+                """Update UI after authentication"""
+                if success:
+                    return (
+                        result_text,  # session_status
+                        auth_forms_update,  # auth_forms visibility
+                        gr.update(visible=True),  # logged_in_section visibility
+                        f"### 👋 {self.current_session['full_name'] if self.current_session else 'User'}",  # user_info
+                    )
+                else:
+                    return (
+                        "Not logged in",  # session_status
+                        auth_forms_update,  # auth_forms visibility
+                        gr.update(visible=False),  # logged_in_section visibility
+                        "Welcome!",  # user_info
+                    )
+            def update_ui_after_logout(result_text, success, auth_forms_update):
+                """Update UI after logout"""
+                return (
+                    "Not logged in",  # session_status
+                    auth_forms_update,  # auth_forms visibility
+                    gr.update(visible=False),  # logged_in_section visibility
+                    "Welcome!",  # user_info
+                )
+            # Login button click
+            login_btn.click(
+                fn=sync_login,
+                inputs=[login_mobile, login_password],
+                outputs=[login_result, gr.State(), gr.State()],
+            ).then(
+                fn=update_ui_after_auth,
+                inputs=[login_result, gr.State(), gr.State()],
+                outputs=[session_status, auth_forms, logged_in_section, user_info],
+            )
+            # Signup button click
+            signup_btn.click(
+                fn=sync_signup,
+                inputs=[
+                    signup_fullname,
+                    signup_mobile,
+                    signup_password,
+                    signup_confirm_password,
+                ],
+                outputs=[signup_result, gr.State(), gr.State()],
+            ).then(
+                fn=update_ui_after_auth,
+                inputs=[signup_result, gr.State(), gr.State()],
+                outputs=[session_status, auth_forms, logged_in_section, user_info],
+            )
+            # Logout button click
+            logout_btn.click(
+                fn=self.handle_logout, outputs=[logout_result, gr.State(), gr.State()]
+            ).then(
+                fn=update_ui_after_logout,
+                inputs=[logout_result, gr.State(), gr.State()],
+                outputs=[session_status, auth_forms, logged_in_section, user_info],
+            )
+        return auth_interface
+# Standalone authentication app
+def create_auth_app(db_path: str = "openmanus.db") -> gr.Interface:
+    """Create standalone authentication app"""
+    auth_interface = AuthInterface(db_path)
+    return auth_interface.create_interface()
+if __name__ == "__main__":
+    # Run standalone auth interface for testing
+    auth_app = create_auth_app()
+    auth_app.launch(server_name="0.0.0.0", server_port=7860, share=False, debug=True)

app/auth_service.py ADDED Viewed

	@@ -0,0 +1,357 @@

+"""
+User authentication service for OpenManus
+Handles user registration, login, and session management with D1 database
+"""
+import json
+import sqlite3
+from datetime import datetime
+from typing import Optional, Tuple
+from app.auth import (
+    User,
+    UserAuth,
+    UserSession,
+    UserSignupRequest,
+    UserLoginRequest,
+    AuthResponse,
+    UserProfile,
+)
+from app.logger import logger
+class AuthService:
+    """Authentication service for user management"""
+    def __init__(self, db_connection=None):
+        """Initialize auth service with database connection"""
+        self.db = db_connection
+        self.logger = logger
+    async def register_user(self, signup_data: UserSignupRequest) -> AuthResponse:
+        """Register a new user"""
+        try:
+            # Format mobile number consistently
+            formatted_mobile = UserAuth.format_mobile_number(signup_data.mobile_number)
+            # Check if user already exists
+            existing_user = await self.get_user_by_mobile(formatted_mobile)
+            if existing_user:
+                return AuthResponse(
+                    success=False, message="User with this mobile number already exists"
+                )
+            # Create new user
+            user_id = UserAuth.generate_user_id()
+            password_hash = UserAuth.hash_password(signup_data.password)
+            user = User(
+                id=user_id,
+                mobile_number=formatted_mobile,
+                full_name=signup_data.full_name,
+                password_hash=password_hash,
+                created_at=datetime.utcnow(),
+                updated_at=datetime.utcnow(),
+            )
+            # Save user to database
+            success = await self.save_user(user)
+            if not success:
+                return AuthResponse(
+                    success=False, message="Failed to create user account"
+                )
+            # Create session
+            session = UserAuth.create_session(user)
+            session_saved = await self.save_session(session)
+            if not session_saved:
+                return AuthResponse(
+                    success=False, message="User created but failed to create session"
+                )
+            self.logger.info(f"New user registered: {formatted_mobile}")
+            return AuthResponse(
+                success=True,
+                message="Account created successfully",
+                session_id=session.session_id,
+                user_id=user.id,
+                full_name=user.full_name,
+            )
+        except Exception as e:
+            self.logger.error(f"User registration error: {str(e)}")
+            return AuthResponse(
+                success=False, message="An error occurred during registration"
+            )
+    async def login_user(self, login_data: UserLoginRequest) -> AuthResponse:
+        """Authenticate user login"""
+        try:
+            # Format mobile number consistently
+            formatted_mobile = UserAuth.format_mobile_number(login_data.mobile_number)
+            # Get user from database
+            user = await self.get_user_by_mobile(formatted_mobile)
+            if not user:
+                return AuthResponse(
+                    success=False, message="Invalid mobile number or password"
+                )
+            # Verify password
+            if not UserAuth.verify_password(login_data.password, user.password_hash):
+                return AuthResponse(
+                    success=False, message="Invalid mobile number or password"
+                )
+            # Check if user is active
+            if not user.is_active:
+                return AuthResponse(
+                    success=False,
+                    message="Account is deactivated. Please contact support.",
+                )
+            # Create new session
+            session = UserAuth.create_session(user)
+            session_saved = await self.save_session(session)
+            if not session_saved:
+                return AuthResponse(
+                    success=False,
+                    message="Login successful but failed to create session",
+                )
+            self.logger.info(f"User logged in: {formatted_mobile}")
+            return AuthResponse(
+                success=True,
+                message="Login successful",
+                session_id=session.session_id,
+                user_id=user.id,
+                full_name=user.full_name,
+            )
+        except Exception as e:
+            self.logger.error(f"User login error: {str(e)}")
+            return AuthResponse(success=False, message="An error occurred during login")
+    async def validate_session(self, session_id: str) -> Optional[UserSession]:
+        """Validate user session"""
+        try:
+            if not self.db:
+                return None
+            cursor = self.db.cursor()
+            cursor.execute(
+                """
+                SELECT s.id, s.user_id, u.mobile_number, u.full_name,
+                       s.created_at, s.expires_at
+                FROM sessions s
+                JOIN users u ON s.user_id = u.id
+                WHERE s.id = ? AND u.is_active = 1
+            """,
+                (session_id,),
+            )
+            row = cursor.fetchone()
+            if not row:
+                return None
+            session = UserSession(
+                session_id=row[0],
+                user_id=row[1],
+                mobile_number=row[2],
+                full_name=row[3],
+                created_at=datetime.fromisoformat(row[4]),
+                expires_at=datetime.fromisoformat(row[5]),
+            )
+            # Check if session is still valid
+            if not session.is_valid:
+                # Clean up expired session
+                await self.delete_session(session_id)
+                return None
+            return session
+        except Exception as e:
+            self.logger.error(f"Session validation error: {str(e)}")
+            return None
+    async def logout_user(self, session_id: str) -> bool:
+        """Logout user by deleting session"""
+        return await self.delete_session(session_id)
+    async def get_user_profile(self, user_id: str) -> Optional[UserProfile]:
+        """Get user profile by user ID"""
+        try:
+            user = await self.get_user_by_id(user_id)
+            if not user:
+                return None
+            return UserProfile(
+                user_id=user.id,
+                full_name=user.full_name,
+                mobile_number=UserProfile.mask_mobile_number(user.mobile_number),
+                avatar_url=user.avatar_url,
+                created_at=user.created_at.isoformat() if user.created_at else None,
+            )
+        except Exception as e:
+            self.logger.error(f"Get user profile error: {str(e)}")
+            return None
+    # Database operations
+    async def save_user(self, user: User) -> bool:
+        """Save user to database"""
+        try:
+            if not self.db:
+                return False
+            cursor = self.db.cursor()
+            cursor.execute(
+                """
+                INSERT INTO users (id, mobile_number, full_name, password_hash,
+                                 avatar_url, preferences, is_active, created_at, updated_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            """,
+                (
+                    user.id,
+                    user.mobile_number,
+                    user.full_name,
+                    user.password_hash,
+                    user.avatar_url,
+                    user.preferences,
+                    user.is_active,
+                    user.created_at.isoformat() if user.created_at else None,
+                    user.updated_at.isoformat() if user.updated_at else None,
+                ),
+            )
+            self.db.commit()
+            return True
+        except Exception as e:
+            self.logger.error(f"Save user error: {str(e)}")
+            return False
+    async def get_user_by_mobile(self, mobile_number: str) -> Optional[User]:
+        """Get user by mobile number"""
+        try:
+            if not self.db:
+                return None
+            cursor = self.db.cursor()
+            cursor.execute(
+                """
+                SELECT id, mobile_number, full_name, password_hash, avatar_url,
+                       preferences, is_active, created_at, updated_at
+                FROM users
+                WHERE mobile_number = ?
+            """,
+                (mobile_number,),
+            )
+            row = cursor.fetchone()
+            if not row:
+                return None
+            return User(
+                id=row[0],
+                mobile_number=row[1],
+                full_name=row[2],
+                password_hash=row[3],
+                avatar_url=row[4],
+                preferences=row[5],
+                is_active=bool(row[6]),
+                created_at=datetime.fromisoformat(row[7]) if row[7] else None,
+                updated_at=datetime.fromisoformat(row[8]) if row[8] else None,
+            )
+        except Exception as e:
+            self.logger.error(f"Get user by mobile error: {str(e)}")
+            return None
+    async def get_user_by_id(self, user_id: str) -> Optional[User]:
+        """Get user by ID"""
+        try:
+            if not self.db:
+                return None
+            cursor = self.db.cursor()
+            cursor.execute(
+                """
+                SELECT id, mobile_number, full_name, password_hash, avatar_url,
+                       preferences, is_active, created_at, updated_at
+                FROM users
+                WHERE id = ? AND is_active = 1
+            """,
+                (user_id,),
+            )
+            row = cursor.fetchone()
+            if not row:
+                return None
+            return User(
+                id=row[0],
+                mobile_number=row[1],
+                full_name=row[2],
+                password_hash=row[3],
+                avatar_url=row[4],
+                preferences=row[5],
+                is_active=bool(row[6]),
+                created_at=datetime.fromisoformat(row[7]) if row[7] else None,
+                updated_at=datetime.fromisoformat(row[8]) if row[8] else None,
+            )
+        except Exception as e:
+            self.logger.error(f"Get user by ID error: {str(e)}")
+            return None
+    async def save_session(self, session: UserSession) -> bool:
+        """Save session to database"""
+        try:
+            if not self.db:
+                return False
+            cursor = self.db.cursor()
+            cursor.execute(
+                """
+                INSERT INTO sessions (id, user_id, title, metadata, created_at,
+                                    updated_at, expires_at)
+                VALUES (?, ?, ?, ?, ?, ?, ?)
+            """,
+                (
+                    session.session_id,
+                    session.user_id,
+                    "User Session",
+                    json.dumps({"login_type": "mobile_password"}),
+                    session.created_at.isoformat(),
+                    session.created_at.isoformat(),
+                    session.expires_at.isoformat(),
+                ),
+            )
+            self.db.commit()
+            return True
+        except Exception as e:
+            self.logger.error(f"Save session error: {str(e)}")
+            return False
+    async def delete_session(self, session_id: str) -> bool:
+        """Delete session from database"""
+        try:
+            if not self.db:
+                return False
+            cursor = self.db.cursor()
+            cursor.execute("DELETE FROM sessions WHERE id = ?", (session_id,))
+            self.db.commit()
+            return True
+        except Exception as e:
+            self.logger.error(f"Delete session error: {str(e)}")
+            return False

app/bedrock.py ADDED Viewed

	@@ -0,0 +1,334 @@

+import json
+import sys
+import time
+import uuid
+from datetime import datetime
+from typing import Dict, List, Literal, Optional
+import boto3
+# Global variables to track the current tool use ID across function calls
+# Tmp solution
+CURRENT_TOOLUSE_ID = None
+# Class to handle OpenAI-style response formatting
+class OpenAIResponse:
+    def __init__(self, data):
+        # Recursively convert nested dicts and lists to OpenAIResponse objects
+        for key, value in data.items():
+            if isinstance(value, dict):
+                value = OpenAIResponse(value)
+            elif isinstance(value, list):
+                value = [
+                    OpenAIResponse(item) if isinstance(item, dict) else item
+                    for item in value
+                ]
+            setattr(self, key, value)
+    def model_dump(self, *args, **kwargs):
+        # Convert object to dict and add timestamp
+        data = self.__dict__
+        data["created_at"] = datetime.now().isoformat()
+        return data
+# Main client class for interacting with Amazon Bedrock
+class BedrockClient:
+    def __init__(self):
+        # Initialize Bedrock client, you need to configure AWS env first
+        try:
+            self.client = boto3.client("bedrock-runtime")
+            self.chat = Chat(self.client)
+        except Exception as e:
+            print(f"Error initializing Bedrock client: {e}")
+            sys.exit(1)
+# Chat interface class
+class Chat:
+    def __init__(self, client):
+        self.completions = ChatCompletions(client)
+# Core class handling chat completions functionality
+class ChatCompletions:
+    def __init__(self, client):
+        self.client = client
+    def _convert_openai_tools_to_bedrock_format(self, tools):
+        # Convert OpenAI function calling format to Bedrock tool format
+        bedrock_tools = []
+        for tool in tools:
+            if tool.get("type") == "function":
+                function = tool.get("function", {})
+                bedrock_tool = {
+                    "toolSpec": {
+                        "name": function.get("name", ""),
+                        "description": function.get("description", ""),
+                        "inputSchema": {
+                            "json": {
+                                "type": "object",
+                                "properties": function.get("parameters", {}).get(
+                                    "properties", {}
+                                ),
+                                "required": function.get("parameters", {}).get(
+                                    "required", []
+                                ),
+                            }
+                        },
+                    }
+                }
+                bedrock_tools.append(bedrock_tool)
+        return bedrock_tools
+    def _convert_openai_messages_to_bedrock_format(self, messages):
+        # Convert OpenAI message format to Bedrock message format
+        bedrock_messages = []
+        system_prompt = []
+        for message in messages:
+            if message.get("role") == "system":
+                system_prompt = [{"text": message.get("content")}]
+            elif message.get("role") == "user":
+                bedrock_message = {
+                    "role": message.get("role", "user"),
+                    "content": [{"text": message.get("content")}],
+                }
+                bedrock_messages.append(bedrock_message)
+            elif message.get("role") == "assistant":
+                bedrock_message = {
+                    "role": "assistant",
+                    "content": [{"text": message.get("content")}],
+                }
+                openai_tool_calls = message.get("tool_calls", [])
+                if openai_tool_calls:
+                    bedrock_tool_use = {
+                        "toolUseId": openai_tool_calls[0]["id"],
+                        "name": openai_tool_calls[0]["function"]["name"],
+                        "input": json.loads(
+                            openai_tool_calls[0]["function"]["arguments"]
+                        ),
+                    }
+                    bedrock_message["content"].append({"toolUse": bedrock_tool_use})
+                    global CURRENT_TOOLUSE_ID
+                    CURRENT_TOOLUSE_ID = openai_tool_calls[0]["id"]
+                bedrock_messages.append(bedrock_message)
+            elif message.get("role") == "tool":
+                bedrock_message = {
+                    "role": "user",
+                    "content": [
+                        {
+                            "toolResult": {
+                                "toolUseId": CURRENT_TOOLUSE_ID,
+                                "content": [{"text": message.get("content")}],
+                            }
+                        }
+                    ],
+                }
+                bedrock_messages.append(bedrock_message)
+            else:
+                raise ValueError(f"Invalid role: {message.get('role')}")
+        return system_prompt, bedrock_messages
+    def _convert_bedrock_response_to_openai_format(self, bedrock_response):
+        # Convert Bedrock response format to OpenAI format
+        content = ""
+        if bedrock_response.get("output", {}).get("message", {}).get("content"):
+            content_array = bedrock_response["output"]["message"]["content"]
+            content = "".join(item.get("text", "") for item in content_array)
+        if content == "":
+            content = "."
+        # Handle tool calls in response
+        openai_tool_calls = []
+        if bedrock_response.get("output", {}).get("message", {}).get("content"):
+            for content_item in bedrock_response["output"]["message"]["content"]:
+                if content_item.get("toolUse"):
+                    bedrock_tool_use = content_item["toolUse"]
+                    global CURRENT_TOOLUSE_ID
+                    CURRENT_TOOLUSE_ID = bedrock_tool_use["toolUseId"]
+                    openai_tool_call = {
+                        "id": CURRENT_TOOLUSE_ID,
+                        "type": "function",
+                        "function": {
+                            "name": bedrock_tool_use["name"],
+                            "arguments": json.dumps(bedrock_tool_use["input"]),
+                        },
+                    }
+                    openai_tool_calls.append(openai_tool_call)
+        # Construct final OpenAI format response
+        openai_format = {
+            "id": f"chatcmpl-{uuid.uuid4()}",
+            "created": int(time.time()),
+            "object": "chat.completion",
+            "system_fingerprint": None,
+            "choices": [
+                {
+                    "finish_reason": bedrock_response.get("stopReason", "end_turn"),
+                    "index": 0,
+                    "message": {
+                        "content": content,
+                        "role": bedrock_response.get("output", {})
+                        .get("message", {})
+                        .get("role", "assistant"),
+                        "tool_calls": openai_tool_calls
+                        if openai_tool_calls != []
+                        else None,
+                        "function_call": None,
+                    },
+                }
+            ],
+            "usage": {
+                "completion_tokens": bedrock_response.get("usage", {}).get(
+                    "outputTokens", 0
+                ),
+                "prompt_tokens": bedrock_response.get("usage", {}).get(
+                    "inputTokens", 0
+                ),
+                "total_tokens": bedrock_response.get("usage", {}).get("totalTokens", 0),
+            },
+        }
+        return OpenAIResponse(openai_format)
+    async def _invoke_bedrock(
+        self,
+        model: str,
+        messages: List[Dict[str, str]],
+        max_tokens: int,
+        temperature: float,
+        tools: Optional[List[dict]] = None,
+        tool_choice: Literal["none", "auto", "required"] = "auto",
+        **kwargs,
+    ) -> OpenAIResponse:
+        # Non-streaming invocation of Bedrock model
+        (
+            system_prompt,
+            bedrock_messages,
+        ) = self._convert_openai_messages_to_bedrock_format(messages)
+        response = self.client.converse(
+            modelId=model,
+            system=system_prompt,
+            messages=bedrock_messages,
+            inferenceConfig={"temperature": temperature, "maxTokens": max_tokens},
+            toolConfig={"tools": tools} if tools else None,
+        )
+        openai_response = self._convert_bedrock_response_to_openai_format(response)
+        return openai_response
+    async def _invoke_bedrock_stream(
+        self,
+        model: str,
+        messages: List[Dict[str, str]],
+        max_tokens: int,
+        temperature: float,
+        tools: Optional[List[dict]] = None,
+        tool_choice: Literal["none", "auto", "required"] = "auto",
+        **kwargs,
+    ) -> OpenAIResponse:
+        # Streaming invocation of Bedrock model
+        (
+            system_prompt,
+            bedrock_messages,
+        ) = self._convert_openai_messages_to_bedrock_format(messages)
+        response = self.client.converse_stream(
+            modelId=model,
+            system=system_prompt,
+            messages=bedrock_messages,
+            inferenceConfig={"temperature": temperature, "maxTokens": max_tokens},
+            toolConfig={"tools": tools} if tools else None,
+        )
+        # Initialize response structure
+        bedrock_response = {
+            "output": {"message": {"role": "", "content": []}},
+            "stopReason": "",
+            "usage": {},
+            "metrics": {},
+        }
+        bedrock_response_text = ""
+        bedrock_response_tool_input = ""
+        # Process streaming response
+        stream = response.get("stream")
+        if stream:
+            for event in stream:
+                if event.get("messageStart", {}).get("role"):
+                    bedrock_response["output"]["message"]["role"] = event[
+                        "messageStart"
+                    ]["role"]
+                if event.get("contentBlockDelta", {}).get("delta", {}).get("text"):
+                    bedrock_response_text += event["contentBlockDelta"]["delta"]["text"]
+                    print(
+                        event["contentBlockDelta"]["delta"]["text"], end="", flush=True
+                    )
+                if event.get("contentBlockStop", {}).get("contentBlockIndex") == 0:
+                    bedrock_response["output"]["message"]["content"].append(
+                        {"text": bedrock_response_text}
+                    )
+                if event.get("contentBlockStart", {}).get("start", {}).get("toolUse"):
+                    bedrock_tool_use = event["contentBlockStart"]["start"]["toolUse"]
+                    tool_use = {
+                        "toolUseId": bedrock_tool_use["toolUseId"],
+                        "name": bedrock_tool_use["name"],
+                    }
+                    bedrock_response["output"]["message"]["content"].append(
+                        {"toolUse": tool_use}
+                    )
+                    global CURRENT_TOOLUSE_ID
+                    CURRENT_TOOLUSE_ID = bedrock_tool_use["toolUseId"]
+                if event.get("contentBlockDelta", {}).get("delta", {}).get("toolUse"):
+                    bedrock_response_tool_input += event["contentBlockDelta"]["delta"][
+                        "toolUse"
+                    ]["input"]
+                    print(
+                        event["contentBlockDelta"]["delta"]["toolUse"]["input"],
+                        end="",
+                        flush=True,
+                    )
+                if event.get("contentBlockStop", {}).get("contentBlockIndex") == 1:
+                    bedrock_response["output"]["message"]["content"][1]["toolUse"][
+                        "input"
+                    ] = json.loads(bedrock_response_tool_input)
+        print()
+        openai_response = self._convert_bedrock_response_to_openai_format(
+            bedrock_response
+        )
+        return openai_response
+    def create(
+        self,
+        model: str,
+        messages: List[Dict[str, str]],
+        max_tokens: int,
+        temperature: float,
+        stream: Optional[bool] = True,
+        tools: Optional[List[dict]] = None,
+        tool_choice: Literal["none", "auto", "required"] = "auto",
+        **kwargs,
+    ) -> OpenAIResponse:
+        # Main entry point for chat completion
+        bedrock_tools = []
+        if tools is not None:
+            bedrock_tools = self._convert_openai_tools_to_bedrock_format(tools)
+        if stream:
+            return self._invoke_bedrock_stream(
+                model,
+                messages,
+                max_tokens,
+                temperature,
+                bedrock_tools,
+                tool_choice,
+                **kwargs,
+            )
+        else:
+            return self._invoke_bedrock(
+                model,
+                messages,
+                max_tokens,
+                temperature,
+                bedrock_tools,
+                tool_choice,
+                **kwargs,
+            )

app/cloudflare/__init__.py ADDED Viewed

	@@ -0,0 +1,11 @@

+"""
+Cloudflare services integration for OpenManus
+"""
+from .client import CloudflareClient
+from .d1 import D1Database
+from .durable_objects import DurableObjects
+from .kv import KVStorage
+from .r2 import R2Storage
+__all__ = ["CloudflareClient", "D1Database", "R2Storage", "KVStorage", "DurableObjects"]

app/cloudflare/client.py ADDED Viewed

	@@ -0,0 +1,228 @@

+"""
+Cloudflare API Client
+Handles authentication and base HTTP operations for Cloudflare services
+"""
+import asyncio
+import json
+from typing import Any, Dict, Optional, Union
+import aiohttp
+from app.logger import logger
+class CloudflareClient:
+    """Base client for Cloudflare API operations"""
+    def __init__(
+        self,
+        api_token: str,
+        account_id: str,
+        worker_url: Optional[str] = None,
+        timeout: int = 30,
+    ):
+        self.api_token = api_token
+        self.account_id = account_id
+        self.worker_url = worker_url
+        self.timeout = timeout
+        self.base_url = "https://api.cloudflare.com/client/v4"
+        # HTTP headers for API requests
+        self.headers = {
+            "Authorization": f"Bearer {api_token}",
+            "Content-Type": "application/json",
+        }
+    async def _make_request(
+        self,
+        method: str,
+        url: str,
+        data: Optional[Dict[str, Any]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        use_worker: bool = False,
+    ) -> Dict[str, Any]:
+        """Make HTTP request to Cloudflare API or Worker"""
+        # Use worker URL if specified and use_worker is True
+        if use_worker and self.worker_url:
+            full_url = f"{self.worker_url.rstrip('/')}/{url.lstrip('/')}"
+        else:
+            full_url = f"{self.base_url}/{url.lstrip('/')}"
+        request_headers = self.headers.copy()
+        if headers:
+            request_headers.update(headers)
+        timeout = aiohttp.ClientTimeout(total=self.timeout)
+        try:
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                async with session.request(
+                    method=method.upper(),
+                    url=full_url,
+                    headers=request_headers,
+                    json=data if data else None,
+                ) as response:
+                    response_text = await response.text()
+                    try:
+                        response_data = (
+                            json.loads(response_text) if response_text else {}
+                        )
+                    except json.JSONDecodeError:
+                        response_data = {"raw_response": response_text}
+                    if not response.ok:
+                        logger.error(
+                            f"Cloudflare API error: {response.status} - {response_text}"
+                        )
+                        raise CloudflareError(
+                            f"HTTP {response.status}: {response_text}",
+                            response.status,
+                            response_data,
+                        )
+                    return response_data
+        except asyncio.TimeoutError:
+            logger.error(f"Timeout making request to {full_url}")
+            raise CloudflareError(f"Request timeout after {self.timeout}s")
+        except aiohttp.ClientError as e:
+            logger.error(f"HTTP client error: {e}")
+            raise CloudflareError(f"Client error: {e}")
+    async def get(
+        self,
+        url: str,
+        headers: Optional[Dict[str, str]] = None,
+        use_worker: bool = False,
+    ) -> Dict[str, Any]:
+        """Make GET request"""
+        return await self._make_request(
+            "GET", url, headers=headers, use_worker=use_worker
+        )
+    async def post(
+        self,
+        url: str,
+        data: Optional[Dict[str, Any]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        use_worker: bool = False,
+    ) -> Dict[str, Any]:
+        """Make POST request"""
+        return await self._make_request(
+            "POST", url, data=data, headers=headers, use_worker=use_worker
+        )
+    async def put(
+        self,
+        url: str,
+        data: Optional[Dict[str, Any]] = None,
+        headers: Optional[Dict[str, str]] = None,
+        use_worker: bool = False,
+    ) -> Dict[str, Any]:
+        """Make PUT request"""
+        return await self._make_request(
+            "PUT", url, data=data, headers=headers, use_worker=use_worker
+        )
+    async def delete(
+        self,
+        url: str,
+        headers: Optional[Dict[str, str]] = None,
+        use_worker: bool = False,
+    ) -> Dict[str, Any]:
+        """Make DELETE request"""
+        return await self._make_request(
+            "DELETE", url, headers=headers, use_worker=use_worker
+        )
+    async def upload_file(
+        self,
+        url: str,
+        file_data: bytes,
+        content_type: str = "application/octet-stream",
+        headers: Optional[Dict[str, str]] = None,
+        use_worker: bool = False,
+    ) -> Dict[str, Any]:
+        """Upload file data"""
+        # Use worker URL if specified and use_worker is True
+        if use_worker and self.worker_url:
+            full_url = f"{self.worker_url.rstrip('/')}/{url.lstrip('/')}"
+        else:
+            full_url = f"{self.base_url}/{url.lstrip('/')}"
+        upload_headers = {
+            "Authorization": f"Bearer {self.api_token}",
+            "Content-Type": content_type,
+        }
+        if headers:
+            upload_headers.update(headers)
+        timeout = aiohttp.ClientTimeout(
+            total=self.timeout * 2
+        )  # Longer timeout for uploads
+        try:
+            async with aiohttp.ClientSession(timeout=timeout) as session:
+                async with session.put(
+                    url=full_url, headers=upload_headers, data=file_data
+                ) as response:
+                    response_text = await response.text()
+                    try:
+                        response_data = (
+                            json.loads(response_text) if response_text else {}
+                        )
+                    except json.JSONDecodeError:
+                        response_data = {"raw_response": response_text}
+                    if not response.ok:
+                        logger.error(
+                            f"File upload error: {response.status} - {response_text}"
+                        )
+                        raise CloudflareError(
+                            f"Upload failed: HTTP {response.status}",
+                            response.status,
+                            response_data,
+                        )
+                    return response_data
+        except asyncio.TimeoutError:
+            logger.error(f"Timeout uploading file to {full_url}")
+            raise CloudflareError(f"Upload timeout after {self.timeout * 2}s")
+        except aiohttp.ClientError as e:
+            logger.error(f"Upload client error: {e}")
+            raise CloudflareError(f"Upload error: {e}")
+    def get_account_url(self, endpoint: str) -> str:
+        """Get URL for account-scoped endpoint"""
+        return f"accounts/{self.account_id}/{endpoint}"
+    def get_worker_url(self, endpoint: str) -> str:
+        """Get URL for worker endpoint"""
+        if not self.worker_url:
+            raise CloudflareError("Worker URL not configured")
+        return endpoint
+class CloudflareError(Exception):
+    """Cloudflare API error"""
+    def __init__(
+        self,
+        message: str,
+        status_code: Optional[int] = None,
+        response_data: Optional[Dict[str, Any]] = None,
+    ):
+        super().__init__(message)
+        self.status_code = status_code
+        self.response_data = response_data or {}
+    def __str__(self) -> str:
+        if self.status_code:
+            return f"CloudflareError({self.status_code}): {super().__str__()}"
+        return f"CloudflareError: {super().__str__()}"

app/cloudflare/d1.py ADDED Viewed

	@@ -0,0 +1,510 @@

+"""
+D1 Database integration for OpenManus
+Provides interface to Cloudflare D1 database operations
+"""
+from typing import Any, Dict, List, Optional, Union
+from app.logger import logger
+from .client import CloudflareClient, CloudflareError
+class D1Database:
+    """Cloudflare D1 Database client"""
+    def __init__(self, client: CloudflareClient, database_id: str):
+        self.client = client
+        self.database_id = database_id
+        self.base_endpoint = f"accounts/{client.account_id}/d1/database/{database_id}"
+    async def execute_query(
+        self, sql: str, params: Optional[List[Any]] = None, use_worker: bool = True
+    ) -> Dict[str, Any]:
+        """Execute a SQL query"""
+        query_data = {"sql": sql}
+        if params:
+            query_data["params"] = params
+        try:
+            if use_worker:
+                # Use worker endpoint for better performance
+                response = await self.client.post(
+                    "api/database/query", data=query_data, use_worker=True
+                )
+            else:
+                # Use Cloudflare API directly
+                response = await self.client.post(
+                    f"{self.base_endpoint}/query", data=query_data
+                )
+            return response
+        except CloudflareError as e:
+            logger.error(f"D1 query execution failed: {e}")
+            raise
+    async def batch_execute(
+        self, queries: List[Dict[str, Any]], use_worker: bool = True
+    ) -> Dict[str, Any]:
+        """Execute multiple queries in a batch"""
+        batch_data = {"queries": queries}
+        try:
+            if use_worker:
+                response = await self.client.post(
+                    "api/database/batch", data=batch_data, use_worker=True
+                )
+            else:
+                response = await self.client.post(
+                    f"{self.base_endpoint}/query", data=batch_data
+                )
+            return response
+        except CloudflareError as e:
+            logger.error(f"D1 batch execution failed: {e}")
+            raise
+    # User management methods
+    async def create_user(
+        self,
+        user_id: str,
+        username: str,
+        email: Optional[str] = None,
+        metadata: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
+        """Create a new user"""
+        sql = """
+        INSERT INTO users (id, username, email, metadata)
+        VALUES (?, ?, ?, ?)
+        ON CONFLICT(id) DO UPDATE SET
+        username = excluded.username,
+        email = excluded.email,
+        metadata = excluded.metadata,
+        updated_at = strftime('%s', 'now')
+        """
+        import json
+        params = [user_id, username, email, json.dumps(metadata or {})]
+        return await self.execute_query(sql, params)
+    async def get_user(self, user_id: str) -> Optional[Dict[str, Any]]:
+        """Get user by ID"""
+        sql = "SELECT * FROM users WHERE id = ?"
+        params = [user_id]
+        result = await self.execute_query(sql, params)
+        # Parse response based on Cloudflare D1 format
+        if result.get("success") and result.get("result"):
+            rows = result["result"][0].get("results", [])
+            if rows:
+                user = rows[0]
+                if user.get("metadata"):
+                    import json
+                    user["metadata"] = json.loads(user["metadata"])
+                return user
+        return None
+    async def get_user_by_username(self, username: str) -> Optional[Dict[str, Any]]:
+        """Get user by username"""
+        sql = "SELECT * FROM users WHERE username = ?"
+        params = [username]
+        result = await self.execute_query(sql, params)
+        if result.get("success") and result.get("result"):
+            rows = result["result"][0].get("results", [])
+            if rows:
+                user = rows[0]
+                if user.get("metadata"):
+                    import json
+                    user["metadata"] = json.loads(user["metadata"])
+                return user
+        return None
+    # Session management methods
+    async def create_session(
+        self,
+        session_id: str,
+        user_id: str,
+        session_data: Dict[str, Any],
+        expires_at: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        """Create a new session"""
+        sql = """
+        INSERT INTO sessions (id, user_id, session_data, expires_at)
+        VALUES (?, ?, ?, ?)
+        """
+        import json
+        params = [session_id, user_id, json.dumps(session_data), expires_at]
+        return await self.execute_query(sql, params)
+    async def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
+        """Get session by ID"""
+        sql = """
+        SELECT * FROM sessions
+        WHERE id = ? AND (expires_at IS NULL OR expires_at > strftime('%s', 'now'))
+        """
+        params = [session_id]
+        result = await self.execute_query(sql, params)
+        if result.get("success") and result.get("result"):
+            rows = result["result"][0].get("results", [])
+            if rows:
+                session = rows[0]
+                if session.get("session_data"):
+                    import json
+                    session["session_data"] = json.loads(session["session_data"])
+                return session
+        return None
+    async def delete_session(self, session_id: str) -> Dict[str, Any]:
+        """Delete a session"""
+        sql = "DELETE FROM sessions WHERE id = ?"
+        params = [session_id]
+        return await self.execute_query(sql, params)
+    # Conversation methods
+    async def create_conversation(
+        self,
+        conversation_id: str,
+        user_id: str,
+        title: Optional[str] = None,
+        messages: Optional[List[Dict[str, Any]]] = None,
+    ) -> Dict[str, Any]:
+        """Create a new conversation"""
+        sql = """
+        INSERT INTO conversations (id, user_id, title, messages)
+        VALUES (?, ?, ?, ?)
+        """
+        import json
+        params = [conversation_id, user_id, title, json.dumps(messages or [])]
+        return await self.execute_query(sql, params)
+    async def get_conversation(self, conversation_id: str) -> Optional[Dict[str, Any]]:
+        """Get conversation by ID"""
+        sql = "SELECT * FROM conversations WHERE id = ?"
+        params = [conversation_id]
+        result = await self.execute_query(sql, params)
+        if result.get("success") and result.get("result"):
+            rows = result["result"][0].get("results", [])
+            if rows:
+                conversation = rows[0]
+                if conversation.get("messages"):
+                    import json
+                    conversation["messages"] = json.loads(conversation["messages"])
+                return conversation
+        return None
+    async def update_conversation_messages(
+        self, conversation_id: str, messages: List[Dict[str, Any]]
+    ) -> Dict[str, Any]:
+        """Update conversation messages"""
+        sql = """
+        UPDATE conversations
+        SET messages = ?, updated_at = strftime('%s', 'now')
+        WHERE id = ?
+        """
+        import json
+        params = [json.dumps(messages), conversation_id]
+        return await self.execute_query(sql, params)
+    async def get_user_conversations(
+        self, user_id: str, limit: int = 50
+    ) -> List[Dict[str, Any]]:
+        """Get user's conversations"""
+        sql = """
+        SELECT id, user_id, title, created_at, updated_at
+        FROM conversations
+        WHERE user_id = ?
+        ORDER BY updated_at DESC
+        LIMIT ?
+        """
+        params = [user_id, limit]
+        result = await self.execute_query(sql, params)
+        if result.get("success") and result.get("result"):
+            return result["result"][0].get("results", [])
+        return []
+    # Agent execution methods
+    async def create_agent_execution(
+        self,
+        execution_id: str,
+        user_id: str,
+        session_id: Optional[str] = None,
+        task_description: Optional[str] = None,
+        status: str = "pending",
+    ) -> Dict[str, Any]:
+        """Create a new agent execution record"""
+        sql = """
+        INSERT INTO agent_executions (id, user_id, session_id, task_description, status)
+        VALUES (?, ?, ?, ?, ?)
+        """
+        params = [execution_id, user_id, session_id, task_description, status]
+        return await self.execute_query(sql, params)
+    async def update_agent_execution(
+        self,
+        execution_id: str,
+        status: Optional[str] = None,
+        result: Optional[str] = None,
+        execution_time: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        """Update agent execution record"""
+        updates = []
+        params = []
+        if status:
+            updates.append("status = ?")
+            params.append(status)
+        if result:
+            updates.append("result = ?")
+            params.append(result)
+        if execution_time is not None:
+            updates.append("execution_time = ?")
+            params.append(execution_time)
+        if status in ["completed", "failed"]:
+            updates.append("completed_at = strftime('%s', 'now')")
+        if not updates:
+            return {"success": True, "message": "No updates provided"}
+        sql = f"""
+        UPDATE agent_executions
+        SET {', '.join(updates)}
+        WHERE id = ?
+        """
+        params.append(execution_id)
+        return await self.execute_query(sql, params)
+    async def get_agent_execution(self, execution_id: str) -> Optional[Dict[str, Any]]:
+        """Get agent execution by ID"""
+        sql = "SELECT * FROM agent_executions WHERE id = ?"
+        params = [execution_id]
+        result = await self.execute_query(sql, params)
+        if result.get("success") and result.get("result"):
+            rows = result["result"][0].get("results", [])
+            if rows:
+                return rows[0]
+        return None
+    async def get_user_executions(
+        self, user_id: str, limit: int = 50
+    ) -> List[Dict[str, Any]]:
+        """Get user's agent executions"""
+        sql = """
+        SELECT * FROM agent_executions
+        WHERE user_id = ?
+        ORDER BY created_at DESC
+        LIMIT ?
+        """
+        params = [user_id, limit]
+        result = await self.execute_query(sql, params)
+        if result.get("success") and result.get("result"):
+            return result["result"][0].get("results", [])
+        return []
+    # File record methods
+    async def create_file_record(
+        self,
+        file_id: str,
+        user_id: str,
+        filename: str,
+        file_key: str,
+        file_size: int,
+        content_type: str,
+        bucket: str = "storage",
+    ) -> Dict[str, Any]:
+        """Create a file record"""
+        sql = """
+        INSERT INTO files (id, user_id, filename, file_key, file_size, content_type, bucket)
+        VALUES (?, ?, ?, ?, ?, ?, ?)
+        """
+        params = [file_id, user_id, filename, file_key, file_size, content_type, bucket]
+        return await self.execute_query(sql, params)
+    async def get_file_record(self, file_id: str) -> Optional[Dict[str, Any]]:
+        """Get file record by ID"""
+        sql = "SELECT * FROM files WHERE id = ?"
+        params = [file_id]
+        result = await self.execute_query(sql, params)
+        if result.get("success") and result.get("result"):
+            rows = result["result"][0].get("results", [])
+            if rows:
+                return rows[0]
+        return None
+    async def get_user_files(
+        self, user_id: str, limit: int = 100
+    ) -> List[Dict[str, Any]]:
+        """Get user's files"""
+        sql = """
+        SELECT * FROM files
+        WHERE user_id = ?
+        ORDER BY created_at DESC
+        LIMIT ?
+        """
+        params = [user_id, limit]
+        result = await self.execute_query(sql, params)
+        if result.get("success") and result.get("result"):
+            return result["result"][0].get("results", [])
+        return []
+    async def delete_file_record(self, file_id: str) -> Dict[str, Any]:
+        """Delete a file record"""
+        sql = "DELETE FROM files WHERE id = ?"
+        params = [file_id]
+        return await self.execute_query(sql, params)
+    # Schema initialization
+    async def initialize_schema(self) -> Dict[str, Any]:
+        """Initialize database schema"""
+        schema_queries = [
+            {
+                "sql": """CREATE TABLE IF NOT EXISTS users (
+                    id TEXT PRIMARY KEY,
+                    username TEXT UNIQUE NOT NULL,
+                    email TEXT UNIQUE,
+                    created_at INTEGER DEFAULT (strftime('%s', 'now')),
+                    updated_at INTEGER DEFAULT (strftime('%s', 'now')),
+                    metadata TEXT
+                )"""
+            },
+            {
+                "sql": """CREATE TABLE IF NOT EXISTS sessions (
+                    id TEXT PRIMARY KEY,
+                    user_id TEXT NOT NULL,
+                    session_data TEXT,
+                    created_at INTEGER DEFAULT (strftime('%s', 'now')),
+                    expires_at INTEGER,
+                    FOREIGN KEY (user_id) REFERENCES users(id)
+                )"""
+            },
+            {
+                "sql": """CREATE TABLE IF NOT EXISTS conversations (
+                    id TEXT PRIMARY KEY,
+                    user_id TEXT NOT NULL,
+                    title TEXT,
+                    messages TEXT,
+                    created_at INTEGER DEFAULT (strftime('%s', 'now')),
+                    updated_at INTEGER DEFAULT (strftime('%s', 'now')),
+                    FOREIGN KEY (user_id) REFERENCES users(id)
+                )"""
+            },
+            {
+                "sql": """CREATE TABLE IF NOT EXISTS files (
+                    id TEXT PRIMARY KEY,
+                    user_id TEXT NOT NULL,
+                    filename TEXT NOT NULL,
+                    file_key TEXT NOT NULL,
+                    file_size INTEGER,
+                    content_type TEXT,
+                    bucket TEXT DEFAULT 'storage',
+                    created_at INTEGER DEFAULT (strftime('%s', 'now')),
+                    FOREIGN KEY (user_id) REFERENCES users(id)
+                )"""
+            },
+            {
+                "sql": """CREATE TABLE IF NOT EXISTS agent_executions (
+                    id TEXT PRIMARY KEY,
+                    user_id TEXT NOT NULL,
+                    session_id TEXT,
+                    task_description TEXT,
+                    status TEXT DEFAULT 'pending',
+                    result TEXT,
+                    execution_time INTEGER,
+                    created_at INTEGER DEFAULT (strftime('%s', 'now')),
+                    completed_at INTEGER,
+                    FOREIGN KEY (user_id) REFERENCES users(id)
+                )"""
+            },
+        ]
+        # Add indexes
+        index_queries = [
+            {
+                "sql": "CREATE INDEX IF NOT EXISTS idx_sessions_user_id ON sessions(user_id)"
+            },
+            {
+                "sql": "CREATE INDEX IF NOT EXISTS idx_conversations_user_id ON conversations(user_id)"
+            },
+            {"sql": "CREATE INDEX IF NOT EXISTS idx_files_user_id ON files(user_id)"},
+            {
+                "sql": "CREATE INDEX IF NOT EXISTS idx_agent_executions_user_id ON agent_executions(user_id)"
+            },
+        ]
+        all_queries = schema_queries + index_queries
+        return await self.batch_execute(all_queries)

app/cloudflare/durable_objects.py ADDED Viewed

	@@ -0,0 +1,365 @@

+"""
+Durable Objects integration for OpenManus
+Provides interface to Cloudflare Durable Objects operations
+"""
+import json
+import time
+from typing import Any, Dict, List, Optional
+from app.logger import logger
+from .client import CloudflareClient, CloudflareError
+class DurableObjects:
+    """Cloudflare Durable Objects client"""
+    def __init__(self, client: CloudflareClient):
+        self.client = client
+    async def create_agent_session(
+        self, session_id: str, user_id: str, metadata: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """Create a new agent session"""
+        session_data = {
+            "sessionId": session_id,
+            "userId": user_id,
+            "metadata": metadata or {},
+        }
+        try:
+            response = await self.client.post(
+                f"do/agent/{session_id}/start", data=session_data, use_worker=True
+            )
+            return {
+                "success": True,
+                "session_id": session_id,
+                "user_id": user_id,
+                **response,
+            }
+        except CloudflareError as e:
+            logger.error(f"Failed to create agent session: {e}")
+            raise
+    async def get_agent_session_status(self, session_id: str) -> Dict[str, Any]:
+        """Get agent session status"""
+        try:
+            response = await self.client.get(
+                f"do/agent/{session_id}/status?sessionId={session_id}", use_worker=True
+            )
+            return response
+        except CloudflareError as e:
+            logger.error(f"Failed to get agent session status: {e}")
+            raise
+    async def update_agent_session(
+        self, session_id: str, updates: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Update agent session"""
+        update_data = {"sessionId": session_id, "updates": updates}
+        try:
+            response = await self.client.post(
+                f"do/agent/{session_id}/update", data=update_data, use_worker=True
+            )
+            return {"success": True, "session_id": session_id, **response}
+        except CloudflareError as e:
+            logger.error(f"Failed to update agent session: {e}")
+            raise
+    async def stop_agent_session(self, session_id: str) -> Dict[str, Any]:
+        """Stop agent session"""
+        try:
+            response = await self.client.post(
+                f"do/agent/{session_id}/stop",
+                data={"sessionId": session_id},
+                use_worker=True,
+            )
+            return {"success": True, "session_id": session_id, **response}
+        except CloudflareError as e:
+            logger.error(f"Failed to stop agent session: {e}")
+            raise
+    async def add_agent_message(
+        self, session_id: str, message: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """Add a message to agent session"""
+        message_data = {
+            "sessionId": session_id,
+            "message": {"timestamp": int(time.time()), **message},
+        }
+        try:
+            response = await self.client.post(
+                f"do/agent/{session_id}/messages", data=message_data, use_worker=True
+            )
+            return {"success": True, "session_id": session_id, **response}
+        except CloudflareError as e:
+            logger.error(f"Failed to add agent message: {e}")
+            raise
+    async def get_agent_messages(
+        self, session_id: str, limit: int = 50, offset: int = 0
+    ) -> Dict[str, Any]:
+        """Get agent session messages"""
+        try:
+            response = await self.client.get(
+                f"do/agent/{session_id}/messages?sessionId={session_id}&limit={limit}&offset={offset}",
+                use_worker=True,
+            )
+            return response
+        except CloudflareError as e:
+            logger.error(f"Failed to get agent messages: {e}")
+            raise
+    # Chat Room methods
+    async def join_chat_room(
+        self,
+        room_id: str,
+        user_id: str,
+        username: str,
+        room_config: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
+        """Join a chat room"""
+        join_data = {
+            "userId": user_id,
+            "username": username,
+            "roomConfig": room_config or {},
+        }
+        try:
+            response = await self.client.post(
+                f"do/chat/{room_id}/join", data=join_data, use_worker=True
+            )
+            return {"success": True, "room_id": room_id, "user_id": user_id, **response}
+        except CloudflareError as e:
+            logger.error(f"Failed to join chat room: {e}")
+            raise
+    async def leave_chat_room(self, room_id: str, user_id: str) -> Dict[str, Any]:
+        """Leave a chat room"""
+        leave_data = {"userId": user_id}
+        try:
+            response = await self.client.post(
+                f"do/chat/{room_id}/leave", data=leave_data, use_worker=True
+            )
+            return {"success": True, "room_id": room_id, "user_id": user_id, **response}
+        except CloudflareError as e:
+            logger.error(f"Failed to leave chat room: {e}")
+            raise
+    async def get_chat_room_info(self, room_id: str) -> Dict[str, Any]:
+        """Get chat room information"""
+        try:
+            response = await self.client.get(f"do/chat/{room_id}/info", use_worker=True)
+            return response
+        except CloudflareError as e:
+            logger.error(f"Failed to get chat room info: {e}")
+            raise
+    async def send_chat_message(
+        self,
+        room_id: str,
+        user_id: str,
+        username: str,
+        content: str,
+        message_type: str = "text",
+    ) -> Dict[str, Any]:
+        """Send a message to chat room"""
+        message_data = {
+            "userId": user_id,
+            "username": username,
+            "content": content,
+            "messageType": message_type,
+        }
+        try:
+            response = await self.client.post(
+                f"do/chat/{room_id}/messages", data=message_data, use_worker=True
+            )
+            return {"success": True, "room_id": room_id, **response}
+        except CloudflareError as e:
+            logger.error(f"Failed to send chat message: {e}")
+            raise
+    async def get_chat_messages(
+        self, room_id: str, limit: int = 50, offset: int = 0
+    ) -> Dict[str, Any]:
+        """Get chat room messages"""
+        try:
+            response = await self.client.get(
+                f"do/chat/{room_id}/messages?limit={limit}&offset={offset}",
+                use_worker=True,
+            )
+            return response
+        except CloudflareError as e:
+            logger.error(f"Failed to get chat messages: {e}")
+            raise
+    async def get_chat_participants(self, room_id: str) -> Dict[str, Any]:
+        """Get chat room participants"""
+        try:
+            response = await self.client.get(
+                f"do/chat/{room_id}/participants", use_worker=True
+            )
+            return response
+        except CloudflareError as e:
+            logger.error(f"Failed to get chat participants: {e}")
+            raise
+    # WebSocket connection helpers
+    def get_agent_websocket_url(self, session_id: str, user_id: str) -> str:
+        """Get WebSocket URL for agent session"""
+        if not self.client.worker_url:
+            raise CloudflareError("Worker URL not configured")
+        base_url = self.client.worker_url.replace("https://", "wss://").replace(
+            "http://", "ws://"
+        )
+        return (
+            f"{base_url}/do/agent/{session_id}?sessionId={session_id}&userId={user_id}"
+        )
+    def get_chat_websocket_url(self, room_id: str, user_id: str, username: str) -> str:
+        """Get WebSocket URL for chat room"""
+        if not self.client.worker_url:
+            raise CloudflareError("Worker URL not configured")
+        base_url = self.client.worker_url.replace("https://", "wss://").replace(
+            "http://", "ws://"
+        )
+        return f"{base_url}/do/chat/{room_id}?userId={user_id}&username={username}"
+class DurableObjectsWebSocket:
+    """Helper class for WebSocket connections to Durable Objects"""
+    def __init__(self, url: str):
+        self.url = url
+        self.websocket = None
+        self.connected = False
+        self.message_handlers = {}
+    async def connect(self):
+        """Connect to WebSocket"""
+        try:
+            import websockets
+            self.websocket = await websockets.connect(self.url)
+            self.connected = True
+            logger.info(f"Connected to Durable Object WebSocket: {self.url}")
+            # Start message handling loop
+            import asyncio
+            asyncio.create_task(self._message_loop())
+        except Exception as e:
+            logger.error(f"Failed to connect to WebSocket: {e}")
+            raise CloudflareError(f"WebSocket connection failed: {e}")
+    async def disconnect(self):
+        """Disconnect from WebSocket"""
+        if self.websocket and self.connected:
+            await self.websocket.close()
+            self.connected = False
+            logger.info("Disconnected from Durable Object WebSocket")
+    async def send_message(self, message_type: str, payload: Dict[str, Any]):
+        """Send message via WebSocket"""
+        if not self.connected or not self.websocket:
+            raise CloudflareError("WebSocket not connected")
+        message = {
+            "type": message_type,
+            "payload": payload,
+            "timestamp": int(time.time()),
+        }
+        try:
+            await self.websocket.send(json.dumps(message))
+        except Exception as e:
+            logger.error(f"Failed to send WebSocket message: {e}")
+            raise CloudflareError(f"Failed to send message: {e}")
+    def add_message_handler(self, message_type: str, handler):
+        """Add a message handler for specific message types"""
+        if message_type not in self.message_handlers:
+            self.message_handlers[message_type] = []
+        self.message_handlers[message_type].append(handler)
+    async def _message_loop(self):
+        """Handle incoming WebSocket messages"""
+        try:
+            async for message in self.websocket:
+                try:
+                    data = json.loads(message)
+                    message_type = data.get("type")
+                    if message_type in self.message_handlers:
+                        for handler in self.message_handlers[message_type]:
+                            try:
+                                if callable(handler):
+                                    if asyncio.iscoroutinefunction(handler):
+                                        await handler(data)
+                                    else:
+                                        handler(data)
+                            except Exception as e:
+                                logger.error(f"Message handler error: {e}")
+                except json.JSONDecodeError as e:
+                    logger.error(f"Failed to parse WebSocket message: {e}")
+                except Exception as e:
+                    logger.error(f"WebSocket message processing error: {e}")
+        except Exception as e:
+            logger.error(f"WebSocket message loop error: {e}")
+            self.connected = False
+    # Context manager support
+    async def __aenter__(self):
+        await self.connect()
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.disconnect()

app/cloudflare/kv.py ADDED Viewed

	@@ -0,0 +1,457 @@

+"""
+KV Storage integration for OpenManus
+Provides interface to Cloudflare KV operations
+"""
+import json
+from typing import Any, Dict, List, Optional
+from app.logger import logger
+from .client import CloudflareClient, CloudflareError
+class KVStorage:
+    """Cloudflare KV Storage client"""
+    def __init__(
+        self,
+        client: CloudflareClient,
+        sessions_namespace_id: str,
+        cache_namespace_id: str,
+    ):
+        self.client = client
+        self.sessions_namespace_id = sessions_namespace_id
+        self.cache_namespace_id = cache_namespace_id
+        self.base_endpoint = f"accounts/{client.account_id}/storage/kv/namespaces"
+    def _get_namespace_id(self, namespace_type: str) -> str:
+        """Get namespace ID based on type"""
+        if namespace_type == "cache":
+            return self.cache_namespace_id
+        return self.sessions_namespace_id
+    async def set_value(
+        self,
+        key: str,
+        value: Any,
+        namespace_type: str = "sessions",
+        ttl: Optional[int] = None,
+        use_worker: bool = True,
+    ) -> Dict[str, Any]:
+        """Set a value in KV store"""
+        namespace_id = self._get_namespace_id(namespace_type)
+        # Serialize value to JSON
+        if isinstance(value, (dict, list)):
+            serialized_value = json.dumps(value)
+        elif isinstance(value, str):
+            serialized_value = value
+        else:
+            serialized_value = json.dumps(value)
+        try:
+            if use_worker:
+                set_data = {
+                    "key": key,
+                    "value": serialized_value,
+                    "namespace": namespace_type,
+                }
+                if ttl:
+                    set_data["ttl"] = ttl
+                response = await self.client.post(
+                    f"api/kv/set", data=set_data, use_worker=True
+                )
+            else:
+                # Use KV API directly
+                params = {}
+                if ttl:
+                    params["expiration_ttl"] = ttl
+                query_string = "&".join([f"{k}={v}" for k, v in params.items()])
+                endpoint = f"{self.base_endpoint}/{namespace_id}/values/{key}"
+                if query_string:
+                    endpoint += f"?{query_string}"
+                response = await self.client.put(
+                    endpoint, data={"value": serialized_value}
+                )
+            return {
+                "success": True,
+                "key": key,
+                "namespace": namespace_type,
+                "ttl": ttl,
+                **response,
+            }
+        except CloudflareError as e:
+            logger.error(f"KV set value failed: {e}")
+            raise
+    async def get_value(
+        self,
+        key: str,
+        namespace_type: str = "sessions",
+        parse_json: bool = True,
+        use_worker: bool = True,
+    ) -> Optional[Any]:
+        """Get a value from KV store"""
+        namespace_id = self._get_namespace_id(namespace_type)
+        try:
+            if use_worker:
+                response = await self.client.get(
+                    f"api/kv/get/{key}?namespace={namespace_type}", use_worker=True
+                )
+                if response and "value" in response:
+                    value = response["value"]
+                    if parse_json and isinstance(value, str):
+                        try:
+                            return json.loads(value)
+                        except json.JSONDecodeError:
+                            return value
+                    return value
+            else:
+                response = await self.client.get(
+                    f"{self.base_endpoint}/{namespace_id}/values/{key}"
+                )
+                # KV API returns the value directly as text
+                value = (
+                    response.get("result", {}).get("value")
+                    if "result" in response
+                    else response
+                )
+                if value and parse_json and isinstance(value, str):
+                    try:
+                        return json.loads(value)
+                    except json.JSONDecodeError:
+                        return value
+                return value
+        except CloudflareError as e:
+            if e.status_code == 404:
+                return None
+            logger.error(f"KV get value failed: {e}")
+            raise
+        return None
+    async def delete_value(
+        self, key: str, namespace_type: str = "sessions", use_worker: bool = True
+    ) -> Dict[str, Any]:
+        """Delete a value from KV store"""
+        namespace_id = self._get_namespace_id(namespace_type)
+        try:
+            if use_worker:
+                response = await self.client.delete(
+                    f"api/kv/delete/{key}?namespace={namespace_type}", use_worker=True
+                )
+            else:
+                response = await self.client.delete(
+                    f"{self.base_endpoint}/{namespace_id}/values/{key}"
+                )
+            return {
+                "success": True,
+                "key": key,
+                "namespace": namespace_type,
+                **response,
+            }
+        except CloudflareError as e:
+            logger.error(f"KV delete value failed: {e}")
+            raise
+    async def list_keys(
+        self,
+        namespace_type: str = "sessions",
+        prefix: str = "",
+        limit: int = 1000,
+        use_worker: bool = True,
+    ) -> Dict[str, Any]:
+        """List keys in KV namespace"""
+        namespace_id = self._get_namespace_id(namespace_type)
+        try:
+            if use_worker:
+                params = {"namespace": namespace_type, "prefix": prefix, "limit": limit}
+                query_string = "&".join([f"{k}={v}" for k, v in params.items() if v])
+                response = await self.client.get(
+                    f"api/kv/list?{query_string}", use_worker=True
+                )
+            else:
+                params = {"prefix": prefix, "limit": limit}
+                query_string = "&".join([f"{k}={v}" for k, v in params.items() if v])
+                response = await self.client.get(
+                    f"{self.base_endpoint}/{namespace_id}/keys?{query_string}"
+                )
+            return {
+                "namespace": namespace_type,
+                "prefix": prefix,
+                "keys": (
+                    response.get("result", [])
+                    if "result" in response
+                    else response.get("keys", [])
+                ),
+                **response,
+            }
+        except CloudflareError as e:
+            logger.error(f"KV list keys failed: {e}")
+            raise
+    # Session-specific methods
+    async def set_session(
+        self,
+        session_id: str,
+        session_data: Dict[str, Any],
+        ttl: int = 86400,  # 24 hours default
+    ) -> Dict[str, Any]:
+        """Set session data"""
+        data = {
+            **session_data,
+            "created_at": session_data.get("created_at", int(time.time())),
+            "expires_at": int(time.time()) + ttl,
+        }
+        return await self.set_value(f"session:{session_id}", data, "sessions", ttl)
+    async def get_session(self, session_id: str) -> Optional[Dict[str, Any]]:
+        """Get session data"""
+        session = await self.get_value(f"session:{session_id}", "sessions")
+        if session and isinstance(session, dict):
+            # Check if session is expired
+            expires_at = session.get("expires_at")
+            if expires_at and int(time.time()) > expires_at:
+                await self.delete_session(session_id)
+                return None
+        return session
+    async def delete_session(self, session_id: str) -> Dict[str, Any]:
+        """Delete session data"""
+        return await self.delete_value(f"session:{session_id}", "sessions")
+    async def update_session(
+        self, session_id: str, updates: Dict[str, Any], extend_ttl: Optional[int] = None
+    ) -> Dict[str, Any]:
+        """Update session data"""
+        existing_session = await self.get_session(session_id)
+        if not existing_session:
+            raise CloudflareError("Session not found")
+        updated_data = {**existing_session, **updates, "updated_at": int(time.time())}
+        # Calculate TTL
+        ttl = None
+        if extend_ttl:
+            ttl = extend_ttl
+        elif existing_session.get("expires_at"):
+            ttl = max(0, existing_session["expires_at"] - int(time.time()))
+        return await self.set_session(session_id, updated_data, ttl or 86400)
+    # Cache-specific methods
+    async def set_cache(
+        self, key: str, data: Any, ttl: int = 3600  # 1 hour default
+    ) -> Dict[str, Any]:
+        """Set cache data"""
+        cache_data = {
+            "data": data,
+            "cached_at": int(time.time()),
+            "expires_at": int(time.time()) + ttl,
+        }
+        return await self.set_value(f"cache:{key}", cache_data, "cache", ttl)
+    async def get_cache(self, key: str) -> Optional[Any]:
+        """Get cache data"""
+        cached = await self.get_value(f"cache:{key}", "cache")
+        if cached and isinstance(cached, dict):
+            # Check if cache is expired
+            expires_at = cached.get("expires_at")
+            if expires_at and int(time.time()) > expires_at:
+                await self.delete_cache(key)
+                return None
+            return cached.get("data")
+        return cached
+    async def delete_cache(self, key: str) -> Dict[str, Any]:
+        """Delete cache data"""
+        return await self.delete_value(f"cache:{key}", "cache")
+    # User-specific methods
+    async def set_user_cache(
+        self, user_id: str, key: str, data: Any, ttl: int = 3600
+    ) -> Dict[str, Any]:
+        """Set user-specific cache"""
+        user_key = f"user:{user_id}:{key}"
+        return await self.set_cache(user_key, data, ttl)
+    async def get_user_cache(self, user_id: str, key: str) -> Optional[Any]:
+        """Get user-specific cache"""
+        user_key = f"user:{user_id}:{key}"
+        return await self.get_cache(user_key)
+    async def delete_user_cache(self, user_id: str, key: str) -> Dict[str, Any]:
+        """Delete user-specific cache"""
+        user_key = f"user:{user_id}:{key}"
+        return await self.delete_cache(user_key)
+    async def get_user_cache_keys(self, user_id: str, limit: int = 100) -> List[str]:
+        """Get all cache keys for a user"""
+        result = await self.list_keys("cache", f"cache:user:{user_id}:", limit)
+        keys = []
+        for key_info in result.get("keys", []):
+            if isinstance(key_info, dict):
+                key = key_info.get("name", "")
+            else:
+                key = str(key_info)
+            # Remove prefix to get the actual key
+            if key.startswith(f"cache:user:{user_id}:"):
+                clean_key = key.replace(f"cache:user:{user_id}:", "")
+                keys.append(clean_key)
+        return keys
+    # Conversation caching
+    async def cache_conversation(
+        self,
+        conversation_id: str,
+        messages: List[Dict[str, Any]],
+        ttl: int = 7200,  # 2 hours default
+    ) -> Dict[str, Any]:
+        """Cache conversation messages"""
+        return await self.set_cache(
+            f"conversation:{conversation_id}",
+            {"messages": messages, "last_updated": int(time.time())},
+            ttl,
+        )
+    async def get_cached_conversation(
+        self, conversation_id: str
+    ) -> Optional[Dict[str, Any]]:
+        """Get cached conversation"""
+        return await self.get_cache(f"conversation:{conversation_id}")
+    # Agent execution caching
+    async def cache_agent_execution(
+        self, execution_id: str, execution_data: Dict[str, Any], ttl: int = 3600
+    ) -> Dict[str, Any]:
+        """Cache agent execution data"""
+        return await self.set_cache(f"execution:{execution_id}", execution_data, ttl)
+    async def get_cached_agent_execution(
+        self, execution_id: str
+    ) -> Optional[Dict[str, Any]]:
+        """Get cached agent execution"""
+        return await self.get_cache(f"execution:{execution_id}")
+    # Batch operations
+    async def set_batch(
+        self,
+        items: List[Dict[str, Any]],
+        namespace_type: str = "cache",
+        ttl: Optional[int] = None,
+    ) -> Dict[str, Any]:
+        """Set multiple values (simulated batch operation)"""
+        results = []
+        successful = 0
+        failed = 0
+        for item in items:
+            try:
+                key = item["key"]
+                value = item["value"]
+                item_ttl = item.get("ttl", ttl)
+                result = await self.set_value(key, value, namespace_type, item_ttl)
+                results.append({"key": key, "success": True, "result": result})
+                successful += 1
+            except Exception as e:
+                results.append(
+                    {"key": item.get("key"), "success": False, "error": str(e)}
+                )
+                failed += 1
+        return {
+            "success": failed == 0,
+            "successful": successful,
+            "failed": failed,
+            "total": len(items),
+            "results": results,
+        }
+    async def get_batch(
+        self, keys: List[str], namespace_type: str = "cache"
+    ) -> Dict[str, Any]:
+        """Get multiple values (simulated batch operation)"""
+        results = {}
+        for key in keys:
+            try:
+                value = await self.get_value(key, namespace_type)
+                results[key] = value
+            except Exception as e:
+                logger.error(f"Failed to get key {key}: {e}")
+                results[key] = None
+        return results
+    def _hash_params(self, params: Dict[str, Any]) -> str:
+        """Create a hash for cache keys from parameters"""
+        if not params:
+            return "no-params"
+        # Simple hash function for cache keys
+        import hashlib
+        params_str = json.dumps(params, sort_keys=True)
+        return hashlib.md5(params_str.encode()).hexdigest()[:16]
+# Add time import at the top
+import time

app/cloudflare/r2.py ADDED Viewed

	@@ -0,0 +1,434 @@

+"""
+R2 Storage integration for OpenManus
+Provides interface to Cloudflare R2 storage operations
+"""
+import io
+from typing import Any, BinaryIO, Dict, List, Optional
+from app.logger import logger
+from .client import CloudflareClient, CloudflareError
+class R2Storage:
+    """Cloudflare R2 Storage client"""
+    def __init__(
+        self,
+        client: CloudflareClient,
+        storage_bucket: str,
+        assets_bucket: Optional[str] = None,
+    ):
+        self.client = client
+        self.storage_bucket = storage_bucket
+        self.assets_bucket = assets_bucket or storage_bucket
+        self.base_endpoint = f"accounts/{client.account_id}/r2/buckets"
+    def _get_bucket_name(self, bucket_type: str = "storage") -> str:
+        """Get bucket name based on type"""
+        if bucket_type == "assets":
+            return self.assets_bucket
+        return self.storage_bucket
+    async def upload_file(
+        self,
+        key: str,
+        file_data: bytes,
+        content_type: str = "application/octet-stream",
+        bucket_type: str = "storage",
+        metadata: Optional[Dict[str, str]] = None,
+        use_worker: bool = True,
+    ) -> Dict[str, Any]:
+        """Upload a file to R2"""
+        bucket_name = self._get_bucket_name(bucket_type)
+        try:
+            if use_worker:
+                # Use worker endpoint for better performance
+                form_data = {
+                    "file": file_data,
+                    "bucket": bucket_type,
+                    "key": key,
+                    "contentType": content_type,
+                }
+                if metadata:
+                    form_data["metadata"] = metadata
+                response = await self.client.post(
+                    "api/files", data=form_data, use_worker=True
+                )
+            else:
+                # Use R2 API directly
+                headers = {"Content-Type": content_type}
+                if metadata:
+                    for k, v in metadata.items():
+                        headers[f"x-amz-meta-{k}"] = v
+                response = await self.client.upload_file(
+                    f"{self.base_endpoint}/{bucket_name}/objects/{key}",
+                    file_data,
+                    content_type,
+                    headers,
+                )
+            return {
+                "success": True,
+                "key": key,
+                "bucket": bucket_type,
+                "bucket_name": bucket_name,
+                "size": len(file_data),
+                "content_type": content_type,
+                "url": f"/{bucket_type}/{key}",
+                **response,
+            }
+        except CloudflareError as e:
+            logger.error(f"R2 upload failed: {e}")
+            raise
+    async def upload_file_stream(
+        self,
+        key: str,
+        file_stream: BinaryIO,
+        content_type: str = "application/octet-stream",
+        bucket_type: str = "storage",
+        metadata: Optional[Dict[str, str]] = None,
+    ) -> Dict[str, Any]:
+        """Upload a file from stream"""
+        file_data = file_stream.read()
+        return await self.upload_file(
+            key, file_data, content_type, bucket_type, metadata
+        )
+    async def get_file(
+        self, key: str, bucket_type: str = "storage", use_worker: bool = True
+    ) -> Optional[Dict[str, Any]]:
+        """Get a file from R2"""
+        bucket_name = self._get_bucket_name(bucket_type)
+        try:
+            if use_worker:
+                response = await self.client.get(
+                    f"api/files/{key}?bucket={bucket_type}", use_worker=True
+                )
+                if response:
+                    return {
+                        "key": key,
+                        "bucket": bucket_type,
+                        "bucket_name": bucket_name,
+                        "data": response,  # Binary data would be handled by worker
+                        "exists": True,
+                    }
+            else:
+                response = await self.client.get(
+                    f"{self.base_endpoint}/{bucket_name}/objects/{key}"
+                )
+                return {
+                    "key": key,
+                    "bucket": bucket_type,
+                    "bucket_name": bucket_name,
+                    "data": response,
+                    "exists": True,
+                }
+        except CloudflareError as e:
+            if e.status_code == 404:
+                return None
+            logger.error(f"R2 get file failed: {e}")
+            raise
+        return None
+    async def delete_file(
+        self, key: str, bucket_type: str = "storage", use_worker: bool = True
+    ) -> Dict[str, Any]:
+        """Delete a file from R2"""
+        bucket_name = self._get_bucket_name(bucket_type)
+        try:
+            if use_worker:
+                response = await self.client.delete(
+                    f"api/files/{key}?bucket={bucket_type}", use_worker=True
+                )
+            else:
+                response = await self.client.delete(
+                    f"{self.base_endpoint}/{bucket_name}/objects/{key}"
+                )
+            return {
+                "success": True,
+                "key": key,
+                "bucket": bucket_type,
+                "bucket_name": bucket_name,
+                **response,
+            }
+        except CloudflareError as e:
+            logger.error(f"R2 delete failed: {e}")
+            raise
+    async def list_files(
+        self,
+        bucket_type: str = "storage",
+        prefix: str = "",
+        limit: int = 1000,
+        use_worker: bool = True,
+    ) -> Dict[str, Any]:
+        """List files in R2 bucket"""
+        bucket_name = self._get_bucket_name(bucket_type)
+        try:
+            if use_worker:
+                params = {"bucket": bucket_type, "prefix": prefix, "limit": limit}
+                query_string = "&".join([f"{k}={v}" for k, v in params.items() if v])
+                response = await self.client.get(
+                    f"api/files/list?{query_string}", use_worker=True
+                )
+            else:
+                params = {"prefix": prefix, "max-keys": limit}
+                query_string = "&".join([f"{k}={v}" for k, v in params.items() if v])
+                response = await self.client.get(
+                    f"{self.base_endpoint}/{bucket_name}/objects?{query_string}"
+                )
+            return {
+                "bucket": bucket_type,
+                "bucket_name": bucket_name,
+                "prefix": prefix,
+                "files": response.get("objects", []),
+                "truncated": response.get("truncated", False),
+                **response,
+            }
+        except CloudflareError as e:
+            logger.error(f"R2 list files failed: {e}")
+            raise
+    async def get_file_metadata(
+        self, key: str, bucket_type: str = "storage", use_worker: bool = True
+    ) -> Optional[Dict[str, Any]]:
+        """Get file metadata without downloading content"""
+        bucket_name = self._get_bucket_name(bucket_type)
+        try:
+            if use_worker:
+                response = await self.client.get(
+                    f"api/files/{key}/metadata?bucket={bucket_type}", use_worker=True
+                )
+            else:
+                # Use HEAD request to get metadata only
+                response = await self.client.get(
+                    f"{self.base_endpoint}/{bucket_name}/objects/{key}",
+                    headers={"Range": "bytes=0-0"},  # Minimal range to get headers
+                )
+            if response:
+                return {
+                    "key": key,
+                    "bucket": bucket_type,
+                    "bucket_name": bucket_name,
+                    **response,
+                }
+        except CloudflareError as e:
+            if e.status_code == 404:
+                return None
+            logger.error(f"R2 get metadata failed: {e}")
+            raise
+        return None
+    async def copy_file(
+        self,
+        source_key: str,
+        destination_key: str,
+        source_bucket: str = "storage",
+        destination_bucket: str = "storage",
+        use_worker: bool = True,
+    ) -> Dict[str, Any]:
+        """Copy a file within R2 or between buckets"""
+        try:
+            if use_worker:
+                copy_data = {
+                    "sourceKey": source_key,
+                    "destinationKey": destination_key,
+                    "sourceBucket": source_bucket,
+                    "destinationBucket": destination_bucket,
+                }
+                response = await self.client.post(
+                    "api/files/copy", data=copy_data, use_worker=True
+                )
+            else:
+                # Get source file first
+                source_file = await self.get_file(source_key, source_bucket, False)
+                if not source_file:
+                    raise CloudflareError(f"Source file {source_key} not found")
+                # Upload to destination
+                response = await self.upload_file(
+                    destination_key,
+                    source_file["data"],
+                    bucket_type=destination_bucket,
+                    use_worker=False,
+                )
+            return {
+                "success": True,
+                "source_key": source_key,
+                "destination_key": destination_key,
+                "source_bucket": source_bucket,
+                "destination_bucket": destination_bucket,
+                **response,
+            }
+        except CloudflareError as e:
+            logger.error(f"R2 copy failed: {e}")
+            raise
+    async def move_file(
+        self,
+        source_key: str,
+        destination_key: str,
+        source_bucket: str = "storage",
+        destination_bucket: str = "storage",
+        use_worker: bool = True,
+    ) -> Dict[str, Any]:
+        """Move a file (copy then delete)"""
+        try:
+            # Copy file first
+            copy_result = await self.copy_file(
+                source_key,
+                destination_key,
+                source_bucket,
+                destination_bucket,
+                use_worker,
+            )
+            # Delete source file
+            delete_result = await self.delete_file(
+                source_key, source_bucket, use_worker
+            )
+            return {
+                "success": True,
+                "source_key": source_key,
+                "destination_key": destination_key,
+                "source_bucket": source_bucket,
+                "destination_bucket": destination_bucket,
+                "copy_result": copy_result,
+                "delete_result": delete_result,
+            }
+        except CloudflareError as e:
+            logger.error(f"R2 move failed: {e}")
+            raise
+    async def generate_presigned_url(
+        self,
+        key: str,
+        bucket_type: str = "storage",
+        expires_in: int = 3600,
+        method: str = "GET",
+    ) -> Dict[str, Any]:
+        """Generate a presigned URL for direct access"""
+        # Note: This would typically require additional R2 configuration
+        # For now, return a worker endpoint URL
+        try:
+            url_data = {
+                "key": key,
+                "bucket": bucket_type,
+                "expiresIn": expires_in,
+                "method": method,
+            }
+            response = await self.client.post(
+                "api/files/presigned-url", data=url_data, use_worker=True
+            )
+            return {
+                "success": True,
+                "key": key,
+                "bucket": bucket_type,
+                "method": method,
+                "expires_in": expires_in,
+                **response,
+            }
+        except CloudflareError as e:
+            logger.error(f"R2 presigned URL generation failed: {e}")
+            raise
+    async def get_storage_stats(self, use_worker: bool = True) -> Dict[str, Any]:
+        """Get storage statistics"""
+        try:
+            if use_worker:
+                response = await self.client.get("api/files/stats", use_worker=True)
+            else:
+                # Get stats for both buckets
+                storage_list = await self.list_files("storage", use_worker=False)
+                assets_list = await self.list_files("assets", use_worker=False)
+                storage_size = sum(
+                    file.get("size", 0) for file in storage_list.get("files", [])
+                )
+                assets_size = sum(
+                    file.get("size", 0) for file in assets_list.get("files", [])
+                )
+                response = {
+                    "storage": {
+                        "file_count": len(storage_list.get("files", [])),
+                        "total_size": storage_size,
+                    },
+                    "assets": {
+                        "file_count": len(assets_list.get("files", [])),
+                        "total_size": assets_size,
+                    },
+                    "total": {
+                        "file_count": len(storage_list.get("files", []))
+                        + len(assets_list.get("files", [])),
+                        "total_size": storage_size + assets_size,
+                    },
+                }
+            return response
+        except CloudflareError as e:
+            logger.error(f"R2 storage stats failed: {e}")
+            raise
+    def create_file_stream(self, data: bytes) -> io.BytesIO:
+        """Create a file stream from bytes"""
+        return io.BytesIO(data)
+    def get_public_url(self, key: str, bucket_type: str = "storage") -> str:
+        """Get public URL for a file (if bucket is configured for public access)"""
+        bucket_name = self._get_bucket_name(bucket_type)
+        # This would depend on your R2 custom domain configuration
+        # For now, return the worker endpoint
+        if self.client.worker_url:
+            return f"{self.client.worker_url}/api/files/{key}?bucket={bucket_type}"
+        # Default R2 URL format (requires public access configuration)
+        return f"https://pub-{bucket_name}.r2.dev/{key}"

app/config.py ADDED Viewed

	@@ -0,0 +1,372 @@

+import json
+import threading
+import tomllib
+from pathlib import Path
+from typing import Dict, List, Optional
+from pydantic import BaseModel, Field
+def get_project_root() -> Path:
+    """Get the project root directory"""
+    return Path(__file__).resolve().parent.parent
+PROJECT_ROOT = get_project_root()
+WORKSPACE_ROOT = PROJECT_ROOT / "workspace"
+class LLMSettings(BaseModel):
+    model: str = Field(..., description="Model name")
+    base_url: str = Field(..., description="API base URL")
+    api_key: str = Field(..., description="API key")
+    max_tokens: int = Field(4096, description="Maximum number of tokens per request")
+    max_input_tokens: Optional[int] = Field(
+        None,
+        description="Maximum input tokens to use across all requests (None for unlimited)",
+    )
+    temperature: float = Field(1.0, description="Sampling temperature")
+    api_type: str = Field(..., description="Azure, Openai, or Ollama")
+    api_version: str = Field(..., description="Azure Openai version if AzureOpenai")
+class ProxySettings(BaseModel):
+    server: str = Field(None, description="Proxy server address")
+    username: Optional[str] = Field(None, description="Proxy username")
+    password: Optional[str] = Field(None, description="Proxy password")
+class SearchSettings(BaseModel):
+    engine: str = Field(default="Google", description="Search engine the llm to use")
+    fallback_engines: List[str] = Field(
+        default_factory=lambda: ["DuckDuckGo", "Baidu", "Bing"],
+        description="Fallback search engines to try if the primary engine fails",
+    )
+    retry_delay: int = Field(
+        default=60,
+        description="Seconds to wait before retrying all engines again after they all fail",
+    )
+    max_retries: int = Field(
+        default=3,
+        description="Maximum number of times to retry all engines when all fail",
+    )
+    lang: str = Field(
+        default="en",
+        description="Language code for search results (e.g., en, zh, fr)",
+    )
+    country: str = Field(
+        default="us",
+        description="Country code for search results (e.g., us, cn, uk)",
+    )
+class RunflowSettings(BaseModel):
+    use_data_analysis_agent: bool = Field(
+        default=False, description="Enable data analysis agent in run flow"
+    )
+class BrowserSettings(BaseModel):
+    headless: bool = Field(False, description="Whether to run browser in headless mode")
+    disable_security: bool = Field(
+        True, description="Disable browser security features"
+    )
+    extra_chromium_args: List[str] = Field(
+        default_factory=list, description="Extra arguments to pass to the browser"
+    )
+    chrome_instance_path: Optional[str] = Field(
+        None, description="Path to a Chrome instance to use"
+    )
+    wss_url: Optional[str] = Field(
+        None, description="Connect to a browser instance via WebSocket"
+    )
+    cdp_url: Optional[str] = Field(
+        None, description="Connect to a browser instance via CDP"
+    )
+    proxy: Optional[ProxySettings] = Field(
+        None, description="Proxy settings for the browser"
+    )
+    max_content_length: int = Field(
+        2000, description="Maximum length for content retrieval operations"
+    )
+class SandboxSettings(BaseModel):
+    """Configuration for the execution sandbox"""
+    use_sandbox: bool = Field(False, description="Whether to use the sandbox")
+    image: str = Field("python:3.12-slim", description="Base image")
+    work_dir: str = Field("/workspace", description="Container working directory")
+    memory_limit: str = Field("512m", description="Memory limit")
+    cpu_limit: float = Field(1.0, description="CPU limit")
+    timeout: int = Field(300, description="Default command timeout (seconds)")
+    network_enabled: bool = Field(
+        False, description="Whether network access is allowed"
+    )
+class DaytonaSettings(BaseModel):
+    daytona_api_key: str
+    daytona_server_url: Optional[str] = Field(
+        "https://app.daytona.io/api", description=""
+    )
+    daytona_target: Optional[str] = Field("us", description="enum ['eu', 'us']")
+    sandbox_image_name: Optional[str] = Field("whitezxj/sandbox:0.1.0", description="")
+    sandbox_entrypoint: Optional[str] = Field(
+        "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf",
+        description="",
+    )
+    # sandbox_id: Optional[str] = Field(
+    #     None, description="ID of the daytona sandbox to use, if any"
+    # )
+    VNC_password: Optional[str] = Field(
+        "123456", description="VNC password for the vnc service in sandbox"
+    )
+class MCPServerConfig(BaseModel):
+    """Configuration for a single MCP server"""
+    type: str = Field(..., description="Server connection type (sse or stdio)")
+    url: Optional[str] = Field(None, description="Server URL for SSE connections")
+    command: Optional[str] = Field(None, description="Command for stdio connections")
+    args: List[str] = Field(
+        default_factory=list, description="Arguments for stdio command"
+    )
+class MCPSettings(BaseModel):
+    """Configuration for MCP (Model Context Protocol)"""
+    server_reference: str = Field(
+        "app.mcp.server", description="Module reference for the MCP server"
+    )
+    servers: Dict[str, MCPServerConfig] = Field(
+        default_factory=dict, description="MCP server configurations"
+    )
+    @classmethod
+    def load_server_config(cls) -> Dict[str, MCPServerConfig]:
+        """Load MCP server configuration from JSON file"""
+        config_path = PROJECT_ROOT / "config" / "mcp.json"
+        try:
+            config_file = config_path if config_path.exists() else None
+            if not config_file:
+                return {}
+            with config_file.open() as f:
+                data = json.load(f)
+                servers = {}
+                for server_id, server_config in data.get("mcpServers", {}).items():
+                    servers[server_id] = MCPServerConfig(
+                        type=server_config["type"],
+                        url=server_config.get("url"),
+                        command=server_config.get("command"),
+                        args=server_config.get("args", []),
+                    )
+                return servers
+        except Exception as e:
+            raise ValueError(f"Failed to load MCP server config: {e}")
+class AppConfig(BaseModel):
+    llm: Dict[str, LLMSettings]
+    sandbox: Optional[SandboxSettings] = Field(
+        None, description="Sandbox configuration"
+    )
+    browser_config: Optional[BrowserSettings] = Field(
+        None, description="Browser configuration"
+    )
+    search_config: Optional[SearchSettings] = Field(
+        None, description="Search configuration"
+    )
+    mcp_config: Optional[MCPSettings] = Field(None, description="MCP configuration")
+    run_flow_config: Optional[RunflowSettings] = Field(
+        None, description="Run flow configuration"
+    )
+    daytona_config: Optional[DaytonaSettings] = Field(
+        None, description="Daytona configuration"
+    )
+    class Config:
+        arbitrary_types_allowed = True
+class Config:
+    _instance = None
+    _lock = threading.Lock()
+    _initialized = False
+    def __new__(cls):
+        if cls._instance is None:
+            with cls._lock:
+                if cls._instance is None:
+                    cls._instance = super().__new__(cls)
+        return cls._instance
+    def __init__(self):
+        if not self._initialized:
+            with self._lock:
+                if not self._initialized:
+                    self._config = None
+                    self._load_initial_config()
+                    self._initialized = True
+    @staticmethod
+    def _get_config_path() -> Path:
+        root = PROJECT_ROOT
+        config_path = root / "config" / "config.toml"
+        if config_path.exists():
+            return config_path
+        example_path = root / "config" / "config.example.toml"
+        if example_path.exists():
+            return example_path
+        raise FileNotFoundError("No configuration file found in config directory")
+    def _load_config(self) -> dict:
+        config_path = self._get_config_path()
+        with config_path.open("rb") as f:
+            return tomllib.load(f)
+    def _load_initial_config(self):
+        raw_config = self._load_config()
+        base_llm = raw_config.get("llm", {})
+        llm_overrides = {
+            k: v for k, v in raw_config.get("llm", {}).items() if isinstance(v, dict)
+        }
+        default_settings = {
+            "model": base_llm.get("model"),
+            "base_url": base_llm.get("base_url"),
+            "api_key": base_llm.get("api_key"),
+            "max_tokens": base_llm.get("max_tokens", 4096),
+            "max_input_tokens": base_llm.get("max_input_tokens"),
+            "temperature": base_llm.get("temperature", 1.0),
+            "api_type": base_llm.get("api_type", ""),
+            "api_version": base_llm.get("api_version", ""),
+        }
+        # handle browser config.
+        browser_config = raw_config.get("browser", {})
+        browser_settings = None
+        if browser_config:
+            # handle proxy settings.
+            proxy_config = browser_config.get("proxy", {})
+            proxy_settings = None
+            if proxy_config and proxy_config.get("server"):
+                proxy_settings = ProxySettings(
+                    **{
+                        k: v
+                        for k, v in proxy_config.items()
+                        if k in ["server", "username", "password"] and v
+                    }
+                )
+            # filter valid browser config parameters.
+            valid_browser_params = {
+                k: v
+                for k, v in browser_config.items()
+                if k in BrowserSettings.__annotations__ and v is not None
+            }
+            # if there is proxy settings, add it to the parameters.
+            if proxy_settings:
+                valid_browser_params["proxy"] = proxy_settings
+            # only create BrowserSettings when there are valid parameters.
+            if valid_browser_params:
+                browser_settings = BrowserSettings(**valid_browser_params)
+        search_config = raw_config.get("search", {})
+        search_settings = None
+        if search_config:
+            search_settings = SearchSettings(**search_config)
+        sandbox_config = raw_config.get("sandbox", {})
+        if sandbox_config:
+            sandbox_settings = SandboxSettings(**sandbox_config)
+        else:
+            sandbox_settings = SandboxSettings()
+        daytona_config = raw_config.get("daytona", {})
+        if daytona_config:
+            daytona_settings = DaytonaSettings(**daytona_config)
+        else:
+            daytona_settings = DaytonaSettings()
+        mcp_config = raw_config.get("mcp", {})
+        mcp_settings = None
+        if mcp_config:
+            # Load server configurations from JSON
+            mcp_config["servers"] = MCPSettings.load_server_config()
+            mcp_settings = MCPSettings(**mcp_config)
+        else:
+            mcp_settings = MCPSettings(servers=MCPSettings.load_server_config())
+        run_flow_config = raw_config.get("runflow")
+        if run_flow_config:
+            run_flow_settings = RunflowSettings(**run_flow_config)
+        else:
+            run_flow_settings = RunflowSettings()
+        config_dict = {
+            "llm": {
+                "default": default_settings,
+                **{
+                    name: {**default_settings, **override_config}
+                    for name, override_config in llm_overrides.items()
+                },
+            },
+            "sandbox": sandbox_settings,
+            "browser_config": browser_settings,
+            "search_config": search_settings,
+            "mcp_config": mcp_settings,
+            "run_flow_config": run_flow_settings,
+            "daytona_config": daytona_settings,
+        }
+        self._config = AppConfig(**config_dict)
+    @property
+    def llm(self) -> Dict[str, LLMSettings]:
+        return self._config.llm
+    @property
+    def sandbox(self) -> SandboxSettings:
+        return self._config.sandbox
+    @property
+    def daytona(self) -> DaytonaSettings:
+        return self._config.daytona_config
+    @property
+    def browser_config(self) -> Optional[BrowserSettings]:
+        return self._config.browser_config
+    @property
+    def search_config(self) -> Optional[SearchSettings]:
+        return self._config.search_config
+    @property
+    def mcp_config(self) -> MCPSettings:
+        """Get the MCP configuration"""
+        return self._config.mcp_config
+    @property
+    def run_flow_config(self) -> RunflowSettings:
+        """Get the Run Flow configuration"""
+        return self._config.run_flow_config
+    @property
+    def workspace_root(self) -> Path:
+        """Get the workspace root directory"""
+        return WORKSPACE_ROOT
+    @property
+    def root_path(self) -> Path:
+        """Get the root path of the application"""
+        return PROJECT_ROOT
+config = Config()

app/config_cloudflare.py ADDED Viewed

	@@ -0,0 +1,145 @@

+"""
+Configuration extensions for Cloudflare integration
+"""
+import os
+from typing import Optional
+from pydantic import BaseModel, Field
+class CloudflareSettings(BaseModel):
+    """Cloudflare configuration settings"""
+    api_token: Optional[str] = Field(
+        default_factory=lambda: os.getenv("CLOUDFLARE_API_TOKEN"),
+        description="Cloudflare API token",
+    )
+    account_id: Optional[str] = Field(
+        default_factory=lambda: os.getenv("CLOUDFLARE_ACCOUNT_ID"),
+        description="Cloudflare account ID",
+    )
+    worker_url: Optional[str] = Field(
+        default_factory=lambda: os.getenv("CLOUDFLARE_WORKER_URL"),
+        description="Cloudflare Worker URL",
+    )
+    # D1 Database settings
+    d1_database_id: Optional[str] = Field(
+        default_factory=lambda: os.getenv("CLOUDFLARE_D1_DATABASE_ID"),
+        description="D1 database ID",
+    )
+    # KV Namespace settings
+    kv_sessions_id: Optional[str] = Field(
+        default_factory=lambda: os.getenv("CLOUDFLARE_KV_SESSIONS_ID"),
+        description="KV namespace ID for sessions",
+    )
+    kv_cache_id: Optional[str] = Field(
+        default_factory=lambda: os.getenv("CLOUDFLARE_KV_CACHE_ID"),
+        description="KV namespace ID for cache",
+    )
+    # R2 Bucket settings
+    r2_storage_bucket: str = Field(
+        default_factory=lambda: os.getenv(
+            "CLOUDFLARE_R2_STORAGE_BUCKET", "openmanus-storage"
+        ),
+        description="R2 storage bucket name",
+    )
+    r2_assets_bucket: str = Field(
+        default_factory=lambda: os.getenv(
+            "CLOUDFLARE_R2_ASSETS_BUCKET", "openmanus-assets"
+        ),
+        description="R2 assets bucket name",
+    )
+    # Connection settings
+    timeout: int = Field(default=30, description="Request timeout in seconds")
+    def is_configured(self) -> bool:
+        """Check if minimum Cloudflare configuration is available"""
+        return bool(self.api_token and self.account_id)
+    def has_worker(self) -> bool:
+        """Check if worker URL is configured"""
+        return bool(self.worker_url)
+    def has_d1(self) -> bool:
+        """Check if D1 database is configured"""
+        return bool(self.d1_database_id)
+    def has_kv(self) -> bool:
+        """Check if KV namespaces are configured"""
+        return bool(self.kv_sessions_id and self.kv_cache_id)
+class HuggingFaceSettings(BaseModel):
+    """Hugging Face configuration settings"""
+    token: Optional[str] = Field(
+        default_factory=lambda: os.getenv("HUGGINGFACE_TOKEN"),
+        description="Hugging Face API token",
+    )
+    cache_dir: str = Field(
+        default_factory=lambda: os.getenv(
+            "HF_HOME", "/app/OpenManus/.cache/huggingface"
+        ),
+        description="Hugging Face cache directory",
+    )
+    model_cache_size: int = Field(
+        default=5, description="Maximum number of models to cache"
+    )
+class DeploymentSettings(BaseModel):
+    """Deployment-specific settings"""
+    environment: str = Field(
+        default_factory=lambda: os.getenv("ENVIRONMENT", "development"),
+        description="Deployment environment",
+    )
+    debug: bool = Field(
+        default_factory=lambda: os.getenv("DEBUG", "false").lower() == "true",
+        description="Enable debug mode",
+    )
+    log_level: str = Field(
+        default_factory=lambda: os.getenv("LOG_LEVEL", "INFO"),
+        description="Logging level",
+    )
+    # Gradio settings
+    server_name: str = Field(
+        default_factory=lambda: os.getenv("GRADIO_SERVER_NAME", "0.0.0.0"),
+        description="Gradio server name",
+    )
+    server_port: int = Field(
+        default_factory=lambda: int(os.getenv("GRADIO_SERVER_PORT", "7860")),
+        description="Gradio server port",
+    )
+    # Security settings
+    secret_key: Optional[str] = Field(
+        default_factory=lambda: os.getenv("SECRET_KEY"),
+        description="Secret key for sessions",
+    )
+    jwt_secret: Optional[str] = Field(
+        default_factory=lambda: os.getenv("JWT_SECRET"),
+        description="JWT signing secret",
+    )
+# Create global instances
+cloudflare_config = CloudflareSettings()
+huggingface_config = HuggingFaceSettings()
+deployment_config = DeploymentSettings()

app/daytona/README.md ADDED Viewed

	@@ -0,0 +1,57 @@

+# Agent with Daytona sandbox
+## Prerequisites
+- conda activate 'Your OpenManus python env'
+- pip install daytona==0.21.8 structlog==25.4.0
+## Setup & Running
+1. daytona config :
+   ```bash
+   cd OpenManus
+   cp config/config.example-daytona.toml config/config.toml
+   ```
+2. get daytona apikey :
+   goto https://app.daytona.io/dashboard/keys and create your apikey
+3. set your apikey in config.toml
+   ```toml
+   # daytona config
+   [daytona]
+   daytona_api_key = ""
+   #daytona_server_url = "https://app.daytona.io/api"
+   #daytona_target = "us"                                   #Daytona is currently available in the following regions:United States (us)、Europe (eu)
+   #sandbox_image_name = "whitezxj/sandbox:0.1.0"                #If you don't use this default image,sandbox tools may be useless
+   #sandbox_entrypoint = "/usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf"   #If you change this entrypoint,server in sandbox may be useless
+   #VNC_password =                                          #The password you set to log in sandbox by VNC,it will be 123456 if you don't set
+   ```
+2. Run :
+   ```bash
+   cd OpenManus
+   python sandbox_main.py
+   ```
+3. Send tasks to Agent
+   You can sent tasks to Agent by terminate,agent will use sandbox tools to handle your tasks.
+4. See results
+   If agent use sb_browser_use tool, you can see the operations by VNC link, The VNC link will print in the termination,e.g.:https://6080-sandbox-123456.h7890.daytona.work.
+   If agent use sb_shell tool, you can see the results by terminate of sandbox in https://app.daytona.io/dashboard/sandboxes.
+   Agent can use sb_files tool to operate files to sandbox.
+## Example
+ You can send task e.g.:"帮我在https://hk.trip.com/travel-guide/guidebook/nanjing-9696/?ishideheader=true&isHideNavBar=YES&disableFontScaling=1&catalogId=514634&locale=zh-HK查询相关信息上制定一份南京旅游攻略，并在工作区保存为index.html"
+ Then you can see the agent's browser action in VNC link(https://6080-sandbox-123456.h7890.proxy.daytona.work) and you can see the html made by agent in Website URL(https://8080-sandbox-123456.h7890.proxy.daytona.work).
+## Learn More
+- [Daytona Documentation](https://www.daytona.io/docs/)

app/daytona/sandbox.py ADDED Viewed

	@@ -0,0 +1,165 @@

+import time
+from daytona import (
+    CreateSandboxFromImageParams,
+    Daytona,
+    DaytonaConfig,
+    Resources,
+    Sandbox,
+    SandboxState,
+    SessionExecuteRequest,
+)
+from app.config import config
+from app.utils.logger import logger
+# load_dotenv()
+daytona_settings = config.daytona
+logger.info("Initializing Daytona sandbox configuration")
+daytona_config = DaytonaConfig(
+    api_key=daytona_settings.daytona_api_key,
+    server_url=daytona_settings.daytona_server_url,
+    target=daytona_settings.daytona_target,
+)
+if daytona_config.api_key:
+    logger.info("Daytona API key configured successfully")
+else:
+    logger.warning("No Daytona API key found in environment variables")
+if daytona_config.server_url:
+    logger.info(f"Daytona server URL set to: {daytona_config.server_url}")
+else:
+    logger.warning("No Daytona server URL found in environment variables")
+if daytona_config.target:
+    logger.info(f"Daytona target set to: {daytona_config.target}")
+else:
+    logger.warning("No Daytona target found in environment variables")
+daytona = Daytona(daytona_config)
+logger.info("Daytona client initialized")
+async def get_or_start_sandbox(sandbox_id: str):
+    """Retrieve a sandbox by ID, check its state, and start it if needed."""
+    logger.info(f"Getting or starting sandbox with ID: {sandbox_id}")
+    try:
+        sandbox = daytona.get(sandbox_id)
+        # Check if sandbox needs to be started
+        if (
+            sandbox.state == SandboxState.ARCHIVED
+            or sandbox.state == SandboxState.STOPPED
+        ):
+            logger.info(f"Sandbox is in {sandbox.state} state. Starting...")
+            try:
+                daytona.start(sandbox)
+                # Wait a moment for the sandbox to initialize
+                # sleep(5)
+                # Refresh sandbox state after starting
+                sandbox = daytona.get(sandbox_id)
+                # Start supervisord in a session when restarting
+                start_supervisord_session(sandbox)
+            except Exception as e:
+                logger.error(f"Error starting sandbox: {e}")
+                raise e
+        logger.info(f"Sandbox {sandbox_id} is ready")
+        return sandbox
+    except Exception as e:
+        logger.error(f"Error retrieving or starting sandbox: {str(e)}")
+        raise e
+def start_supervisord_session(sandbox: Sandbox):
+    """Start supervisord in a session."""
+    session_id = "supervisord-session"
+    try:
+        logger.info(f"Creating session {session_id} for supervisord")
+        sandbox.process.create_session(session_id)
+        # Execute supervisord command
+        sandbox.process.execute_session_command(
+            session_id,
+            SessionExecuteRequest(
+                command="exec /usr/bin/supervisord -n -c /etc/supervisor/conf.d/supervisord.conf",
+                var_async=True,
+            ),
+        )
+        time.sleep(25)  # Wait a bit to ensure supervisord starts properly
+        logger.info(f"Supervisord started in session {session_id}")
+    except Exception as e:
+        logger.error(f"Error starting supervisord session: {str(e)}")
+        raise e
+def create_sandbox(password: str, project_id: str = None):
+    """Create a new sandbox with all required services configured and running."""
+    logger.info("Creating new Daytona sandbox environment")
+    logger.info("Configuring sandbox with browser-use image and environment variables")
+    labels = None
+    if project_id:
+        logger.info(f"Using sandbox_id as label: {project_id}")
+        labels = {"id": project_id}
+    params = CreateSandboxFromImageParams(
+        image=daytona_settings.sandbox_image_name,
+        public=True,
+        labels=labels,
+        env_vars={
+            "CHROME_PERSISTENT_SESSION": "true",
+            "RESOLUTION": "1024x768x24",
+            "RESOLUTION_WIDTH": "1024",
+            "RESOLUTION_HEIGHT": "768",
+            "VNC_PASSWORD": password,
+            "ANONYMIZED_TELEMETRY": "false",
+            "CHROME_PATH": "",
+            "CHROME_USER_DATA": "",
+            "CHROME_DEBUGGING_PORT": "9222",
+            "CHROME_DEBUGGING_HOST": "localhost",
+            "CHROME_CDP": "",
+        },
+        resources=Resources(
+            cpu=2,
+            memory=4,
+            disk=5,
+        ),
+        auto_stop_interval=15,
+        auto_archive_interval=24 * 60,
+    )
+    # Create the sandbox
+    sandbox = daytona.create(params)
+    logger.info(f"Sandbox created with ID: {sandbox.id}")
+    # Start supervisord in a session for new sandbox
+    start_supervisord_session(sandbox)
+    logger.info(f"Sandbox environment successfully initialized")
+    return sandbox
+async def delete_sandbox(sandbox_id: str):
+    """Delete a sandbox by its ID."""
+    logger.info(f"Deleting sandbox with ID: {sandbox_id}")
+    try:
+        # Get the sandbox
+        sandbox = daytona.get(sandbox_id)
+        # Delete the sandbox
+        daytona.delete(sandbox)
+        logger.info(f"Successfully deleted sandbox {sandbox_id}")
+        return True
+    except Exception as e:
+        logger.error(f"Error deleting sandbox {sandbox_id}: {str(e)}")
+        raise e

app/daytona/tool_base.py ADDED Viewed

	@@ -0,0 +1,138 @@

+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any, ClassVar, Dict, Optional
+from daytona import Daytona, DaytonaConfig, Sandbox, SandboxState
+from pydantic import Field
+from app.config import config
+from app.daytona.sandbox import create_sandbox, start_supervisord_session
+from app.tool.base import BaseTool
+from app.utils.files_utils import clean_path
+from app.utils.logger import logger
+# load_dotenv()
+daytona_settings = config.daytona
+daytona_config = DaytonaConfig(
+    api_key=daytona_settings.daytona_api_key,
+    server_url=daytona_settings.daytona_server_url,
+    target=daytona_settings.daytona_target,
+)
+daytona = Daytona(daytona_config)
+@dataclass
+class ThreadMessage:
+    """
+    Represents a message to be added to a thread.
+    """
+    type: str
+    content: Dict[str, Any]
+    is_llm_message: bool = False
+    metadata: Optional[Dict[str, Any]] = None
+    timestamp: Optional[float] = field(
+        default_factory=lambda: datetime.now().timestamp()
+    )
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert the message to a dictionary for API calls"""
+        return {
+            "type": self.type,
+            "content": self.content,
+            "is_llm_message": self.is_llm_message,
+            "metadata": self.metadata or {},
+            "timestamp": self.timestamp,
+        }
+class SandboxToolsBase(BaseTool):
+    """Base class for all sandbox tools that provides project-based sandbox access."""
+    # Class variable to track if sandbox URLs have been printed
+    _urls_printed: ClassVar[bool] = False
+    # Required fields
+    project_id: Optional[str] = None
+    # thread_manager: Optional[ThreadManager] = None
+    # Private fields (not part of the model schema)
+    _sandbox: Optional[Sandbox] = None
+    _sandbox_id: Optional[str] = None
+    _sandbox_pass: Optional[str] = None
+    workspace_path: str = Field(default="/workspace", exclude=True)
+    _sessions: dict[str, str] = {}
+    class Config:
+        arbitrary_types_allowed = True  # Allow non-pydantic types like ThreadManager
+        underscore_attrs_are_private = True
+    async def _ensure_sandbox(self) -> Sandbox:
+        """Ensure we have a valid sandbox instance, retrieving it from the project if needed."""
+        if self._sandbox is None:
+            # Get or start the sandbox
+            try:
+                self._sandbox = create_sandbox(password=config.daytona.VNC_password)
+                # Log URLs if not already printed
+                if not SandboxToolsBase._urls_printed:
+                    vnc_link = self._sandbox.get_preview_link(6080)
+                    website_link = self._sandbox.get_preview_link(8080)
+                    vnc_url = (
+                        vnc_link.url if hasattr(vnc_link, "url") else str(vnc_link)
+                    )
+                    website_url = (
+                        website_link.url
+                        if hasattr(website_link, "url")
+                        else str(website_link)
+                    )
+                    print("\033[95m***")
+                    print(f"VNC URL: {vnc_url}")
+                    print(f"Website URL: {website_url}")
+                    print("***\033[0m")
+                    SandboxToolsBase._urls_printed = True
+            except Exception as e:
+                logger.error(f"Error retrieving or starting sandbox: {str(e)}")
+                raise e
+        else:
+            if (
+                self._sandbox.state == SandboxState.ARCHIVED
+                or self._sandbox.state == SandboxState.STOPPED
+            ):
+                logger.info(f"Sandbox is in {self._sandbox.state} state. Starting...")
+                try:
+                    daytona.start(self._sandbox)
+                    # Wait a moment for the sandbox to initialize
+                    # sleep(5)
+                    # Refresh sandbox state after starting
+                    # Start supervisord in a session when restarting
+                    start_supervisord_session(self._sandbox)
+                except Exception as e:
+                    logger.error(f"Error starting sandbox: {e}")
+                    raise e
+        return self._sandbox
+    @property
+    def sandbox(self) -> Sandbox:
+        """Get the sandbox instance, ensuring it exists."""
+        if self._sandbox is None:
+            raise RuntimeError("Sandbox not initialized. Call _ensure_sandbox() first.")
+        return self._sandbox
+    @property
+    def sandbox_id(self) -> str:
+        """Get the sandbox ID, ensuring it exists."""
+        if self._sandbox_id is None:
+            raise RuntimeError(
+                "Sandbox ID not initialized. Call _ensure_sandbox() first."
+            )
+        return self._sandbox_id
+    def clean_path(self, path: str) -> str:
+        """Clean and normalize a path to be relative to /workspace."""
+        cleaned_path = clean_path(path, self.workspace_path)
+        logger.debug(f"Cleaned path: {path} -> {cleaned_path}")
+        return cleaned_path

app/exceptions.py ADDED Viewed

	@@ -0,0 +1,13 @@

+class ToolError(Exception):
+    """Raised when a tool encounters an error."""
+    def __init__(self, message):
+        self.message = message
+class OpenManusError(Exception):
+    """Base exception for all OpenManus errors"""
+class TokenLimitExceeded(OpenManusError):
+    """Exception raised when the token limit is exceeded"""

app/flow/__init__.py ADDED Viewed

File without changes

app/flow/base.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from abc import ABC, abstractmethod
+from typing import Dict, List, Optional, Union
+from pydantic import BaseModel
+from app.agent.base import BaseAgent
+class BaseFlow(BaseModel, ABC):
+    """Base class for execution flows supporting multiple agents"""
+    agents: Dict[str, BaseAgent]
+    tools: Optional[List] = None
+    primary_agent_key: Optional[str] = None
+    class Config:
+        arbitrary_types_allowed = True
+    def __init__(
+        self, agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]], **data
+    ):
+        # Handle different ways of providing agents
+        if isinstance(agents, BaseAgent):
+            agents_dict = {"default": agents}
+        elif isinstance(agents, list):
+            agents_dict = {f"agent_{i}": agent for i, agent in enumerate(agents)}
+        else:
+            agents_dict = agents
+        # If primary agent not specified, use first agent
+        primary_key = data.get("primary_agent_key")
+        if not primary_key and agents_dict:
+            primary_key = next(iter(agents_dict))
+            data["primary_agent_key"] = primary_key
+        # Set the agents dictionary
+        data["agents"] = agents_dict
+        # Initialize using BaseModel's init
+        super().__init__(**data)
+    @property
+    def primary_agent(self) -> Optional[BaseAgent]:
+        """Get the primary agent for the flow"""
+        return self.agents.get(self.primary_agent_key)
+    def get_agent(self, key: str) -> Optional[BaseAgent]:
+        """Get a specific agent by key"""
+        return self.agents.get(key)
+    def add_agent(self, key: str, agent: BaseAgent) -> None:
+        """Add a new agent to the flow"""
+        self.agents[key] = agent
+    @abstractmethod
+    async def execute(self, input_text: str) -> str:
+        """Execute the flow with given input"""

app/flow/flow_factory.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from enum import Enum
+from typing import Dict, List, Union
+from app.agent.base import BaseAgent
+from app.flow.base import BaseFlow
+from app.flow.planning import PlanningFlow
+class FlowType(str, Enum):
+    PLANNING = "planning"
+class FlowFactory:
+    """Factory for creating different types of flows with support for multiple agents"""
+    @staticmethod
+    def create_flow(
+        flow_type: FlowType,
+        agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]],
+        **kwargs,
+    ) -> BaseFlow:
+        flows = {
+            FlowType.PLANNING: PlanningFlow,
+        }
+        flow_class = flows.get(flow_type)
+        if not flow_class:
+            raise ValueError(f"Unknown flow type: {flow_type}")
+        return flow_class(agents, **kwargs)

app/flow/planning.py ADDED Viewed

	@@ -0,0 +1,442 @@

+import json
+import time
+from enum import Enum
+from typing import Dict, List, Optional, Union
+from pydantic import Field
+from app.agent.base import BaseAgent
+from app.flow.base import BaseFlow
+from app.llm import LLM
+from app.logger import logger
+from app.schema import AgentState, Message, ToolChoice
+from app.tool import PlanningTool
+class PlanStepStatus(str, Enum):
+    """Enum class defining possible statuses of a plan step"""
+    NOT_STARTED = "not_started"
+    IN_PROGRESS = "in_progress"
+    COMPLETED = "completed"
+    BLOCKED = "blocked"
+    @classmethod
+    def get_all_statuses(cls) -> list[str]:
+        """Return a list of all possible step status values"""
+        return [status.value for status in cls]
+    @classmethod
+    def get_active_statuses(cls) -> list[str]:
+        """Return a list of values representing active statuses (not started or in progress)"""
+        return [cls.NOT_STARTED.value, cls.IN_PROGRESS.value]
+    @classmethod
+    def get_status_marks(cls) -> Dict[str, str]:
+        """Return a mapping of statuses to their marker symbols"""
+        return {
+            cls.COMPLETED.value: "[✓]",
+            cls.IN_PROGRESS.value: "[→]",
+            cls.BLOCKED.value: "[!]",
+            cls.NOT_STARTED.value: "[ ]",
+        }
+class PlanningFlow(BaseFlow):
+    """A flow that manages planning and execution of tasks using agents."""
+    llm: LLM = Field(default_factory=lambda: LLM())
+    planning_tool: PlanningTool = Field(default_factory=PlanningTool)
+    executor_keys: List[str] = Field(default_factory=list)
+    active_plan_id: str = Field(default_factory=lambda: f"plan_{int(time.time())}")
+    current_step_index: Optional[int] = None
+    def __init__(
+        self, agents: Union[BaseAgent, List[BaseAgent], Dict[str, BaseAgent]], **data
+    ):
+        # Set executor keys before super().__init__
+        if "executors" in data:
+            data["executor_keys"] = data.pop("executors")
+        # Set plan ID if provided
+        if "plan_id" in data:
+            data["active_plan_id"] = data.pop("plan_id")
+        # Initialize the planning tool if not provided
+        if "planning_tool" not in data:
+            planning_tool = PlanningTool()
+            data["planning_tool"] = planning_tool
+        # Call parent's init with the processed data
+        super().__init__(agents, **data)
+        # Set executor_keys to all agent keys if not specified
+        if not self.executor_keys:
+            self.executor_keys = list(self.agents.keys())
+    def get_executor(self, step_type: Optional[str] = None) -> BaseAgent:
+        """
+        Get an appropriate executor agent for the current step.
+        Can be extended to select agents based on step type/requirements.
+        """
+        # If step type is provided and matches an agent key, use that agent
+        if step_type and step_type in self.agents:
+            return self.agents[step_type]
+        # Otherwise use the first available executor or fall back to primary agent
+        for key in self.executor_keys:
+            if key in self.agents:
+                return self.agents[key]
+        # Fallback to primary agent
+        return self.primary_agent
+    async def execute(self, input_text: str) -> str:
+        """Execute the planning flow with agents."""
+        try:
+            if not self.primary_agent:
+                raise ValueError("No primary agent available")
+            # Create initial plan if input provided
+            if input_text:
+                await self._create_initial_plan(input_text)
+                # Verify plan was created successfully
+                if self.active_plan_id not in self.planning_tool.plans:
+                    logger.error(
+                        f"Plan creation failed. Plan ID {self.active_plan_id} not found in planning tool."
+                    )
+                    return f"Failed to create plan for: {input_text}"
+            result = ""
+            while True:
+                # Get current step to execute
+                self.current_step_index, step_info = await self._get_current_step_info()
+                # Exit if no more steps or plan completed
+                if self.current_step_index is None:
+                    result += await self._finalize_plan()
+                    break
+                # Execute current step with appropriate agent
+                step_type = step_info.get("type") if step_info else None
+                executor = self.get_executor(step_type)
+                step_result = await self._execute_step(executor, step_info)
+                result += step_result + "\n"
+                # Check if agent wants to terminate
+                if hasattr(executor, "state") and executor.state == AgentState.FINISHED:
+                    break
+            return result
+        except Exception as e:
+            logger.error(f"Error in PlanningFlow: {str(e)}")
+            return f"Execution failed: {str(e)}"
+    async def _create_initial_plan(self, request: str) -> None:
+        """Create an initial plan based on the request using the flow's LLM and PlanningTool."""
+        logger.info(f"Creating initial plan with ID: {self.active_plan_id}")
+        system_message_content = (
+            "You are a planning assistant. Create a concise, actionable plan with clear steps. "
+            "Focus on key milestones rather than detailed sub-steps. "
+            "Optimize for clarity and efficiency."
+        )
+        agents_description = []
+        for key in self.executor_keys:
+            if key in self.agents:
+                agents_description.append(
+                    {
+                        "name": key.upper(),
+                        "description": self.agents[key].description,
+                    }
+                )
+        if len(agents_description) > 1:
+            # Add description of agents to select
+            system_message_content += (
+                f"\nNow we have {agents_description} agents. "
+                f"The infomation of them are below: {json.dumps(agents_description)}\n"
+                "When creating steps in the planning tool, please specify the agent names using the format '[agent_name]'."
+            )
+        # Create a system message for plan creation
+        system_message = Message.system_message(system_message_content)
+        # Create a user message with the request
+        user_message = Message.user_message(
+            f"Create a reasonable plan with clear steps to accomplish the task: {request}"
+        )
+        # Call LLM with PlanningTool
+        response = await self.llm.ask_tool(
+            messages=[user_message],
+            system_msgs=[system_message],
+            tools=[self.planning_tool.to_param()],
+            tool_choice=ToolChoice.AUTO,
+        )
+        # Process tool calls if present
+        if response.tool_calls:
+            for tool_call in response.tool_calls:
+                if tool_call.function.name == "planning":
+                    # Parse the arguments
+                    args = tool_call.function.arguments
+                    if isinstance(args, str):
+                        try:
+                            args = json.loads(args)
+                        except json.JSONDecodeError:
+                            logger.error(f"Failed to parse tool arguments: {args}")
+                            continue
+                    # Ensure plan_id is set correctly and execute the tool
+                    args["plan_id"] = self.active_plan_id
+                    # Execute the tool via ToolCollection instead of directly
+                    result = await self.planning_tool.execute(**args)
+                    logger.info(f"Plan creation result: {str(result)}")
+                    return
+        # If execution reached here, create a default plan
+        logger.warning("Creating default plan")
+        # Create default plan using the ToolCollection
+        await self.planning_tool.execute(
+            **{
+                "command": "create",
+                "plan_id": self.active_plan_id,
+                "title": f"Plan for: {request[:50]}{'...' if len(request) > 50 else ''}",
+                "steps": ["Analyze request", "Execute task", "Verify results"],
+            }
+        )
+    async def _get_current_step_info(self) -> tuple[Optional[int], Optional[dict]]:
+        """
+        Parse the current plan to identify the first non-completed step's index and info.
+        Returns (None, None) if no active step is found.
+        """
+        if (
+            not self.active_plan_id
+            or self.active_plan_id not in self.planning_tool.plans
+        ):
+            logger.error(f"Plan with ID {self.active_plan_id} not found")
+            return None, None
+        try:
+            # Direct access to plan data from planning tool storage
+            plan_data = self.planning_tool.plans[self.active_plan_id]
+            steps = plan_data.get("steps", [])
+            step_statuses = plan_data.get("step_statuses", [])
+            # Find first non-completed step
+            for i, step in enumerate(steps):
+                if i >= len(step_statuses):
+                    status = PlanStepStatus.NOT_STARTED.value
+                else:
+                    status = step_statuses[i]
+                if status in PlanStepStatus.get_active_statuses():
+                    # Extract step type/category if available
+                    step_info = {"text": step}
+                    # Try to extract step type from the text (e.g., [SEARCH] or [CODE])
+                    import re
+                    type_match = re.search(r"\[([A-Z_]+)\]", step)
+                    if type_match:
+                        step_info["type"] = type_match.group(1).lower()
+                    # Mark current step as in_progress
+                    try:
+                        await self.planning_tool.execute(
+                            command="mark_step",
+                            plan_id=self.active_plan_id,
+                            step_index=i,
+                            step_status=PlanStepStatus.IN_PROGRESS.value,
+                        )
+                    except Exception as e:
+                        logger.warning(f"Error marking step as in_progress: {e}")
+                        # Update step status directly if needed
+                        if i < len(step_statuses):
+                            step_statuses[i] = PlanStepStatus.IN_PROGRESS.value
+                        else:
+                            while len(step_statuses) < i:
+                                step_statuses.append(PlanStepStatus.NOT_STARTED.value)
+                            step_statuses.append(PlanStepStatus.IN_PROGRESS.value)
+                        plan_data["step_statuses"] = step_statuses
+                    return i, step_info
+            return None, None  # No active step found
+        except Exception as e:
+            logger.warning(f"Error finding current step index: {e}")
+            return None, None
+    async def _execute_step(self, executor: BaseAgent, step_info: dict) -> str:
+        """Execute the current step with the specified agent using agent.run()."""
+        # Prepare context for the agent with current plan status
+        plan_status = await self._get_plan_text()
+        step_text = step_info.get("text", f"Step {self.current_step_index}")
+        # Create a prompt for the agent to execute the current step
+        step_prompt = f"""
+        CURRENT PLAN STATUS:
+        {plan_status}
+        YOUR CURRENT TASK:
+        You are now working on step {self.current_step_index}: "{step_text}"
+        Please only execute this current step using the appropriate tools. When you're done, provide a summary of what you accomplished.
+        """
+        # Use agent.run() to execute the step
+        try:
+            step_result = await executor.run(step_prompt)
+            # Mark the step as completed after successful execution
+            await self._mark_step_completed()
+            return step_result
+        except Exception as e:
+            logger.error(f"Error executing step {self.current_step_index}: {e}")
+            return f"Error executing step {self.current_step_index}: {str(e)}"
+    async def _mark_step_completed(self) -> None:
+        """Mark the current step as completed."""
+        if self.current_step_index is None:
+            return
+        try:
+            # Mark the step as completed
+            await self.planning_tool.execute(
+                command="mark_step",
+                plan_id=self.active_plan_id,
+                step_index=self.current_step_index,
+                step_status=PlanStepStatus.COMPLETED.value,
+            )
+            logger.info(
+                f"Marked step {self.current_step_index} as completed in plan {self.active_plan_id}"
+            )
+        except Exception as e:
+            logger.warning(f"Failed to update plan status: {e}")
+            # Update step status directly in planning tool storage
+            if self.active_plan_id in self.planning_tool.plans:
+                plan_data = self.planning_tool.plans[self.active_plan_id]
+                step_statuses = plan_data.get("step_statuses", [])
+                # Ensure the step_statuses list is long enough
+                while len(step_statuses) <= self.current_step_index:
+                    step_statuses.append(PlanStepStatus.NOT_STARTED.value)
+                # Update the status
+                step_statuses[self.current_step_index] = PlanStepStatus.COMPLETED.value
+                plan_data["step_statuses"] = step_statuses
+    async def _get_plan_text(self) -> str:
+        """Get the current plan as formatted text."""
+        try:
+            result = await self.planning_tool.execute(
+                command="get", plan_id=self.active_plan_id
+            )
+            return result.output if hasattr(result, "output") else str(result)
+        except Exception as e:
+            logger.error(f"Error getting plan: {e}")
+            return self._generate_plan_text_from_storage()
+    def _generate_plan_text_from_storage(self) -> str:
+        """Generate plan text directly from storage if the planning tool fails."""
+        try:
+            if self.active_plan_id not in self.planning_tool.plans:
+                return f"Error: Plan with ID {self.active_plan_id} not found"
+            plan_data = self.planning_tool.plans[self.active_plan_id]
+            title = plan_data.get("title", "Untitled Plan")
+            steps = plan_data.get("steps", [])
+            step_statuses = plan_data.get("step_statuses", [])
+            step_notes = plan_data.get("step_notes", [])
+            # Ensure step_statuses and step_notes match the number of steps
+            while len(step_statuses) < len(steps):
+                step_statuses.append(PlanStepStatus.NOT_STARTED.value)
+            while len(step_notes) < len(steps):
+                step_notes.append("")
+            # Count steps by status
+            status_counts = {status: 0 for status in PlanStepStatus.get_all_statuses()}
+            for status in step_statuses:
+                if status in status_counts:
+                    status_counts[status] += 1
+            completed = status_counts[PlanStepStatus.COMPLETED.value]
+            total = len(steps)
+            progress = (completed / total) * 100 if total > 0 else 0
+            plan_text = f"Plan: {title} (ID: {self.active_plan_id})\n"
+            plan_text += "=" * len(plan_text) + "\n\n"
+            plan_text += (
+                f"Progress: {completed}/{total} steps completed ({progress:.1f}%)\n"
+            )
+            plan_text += f"Status: {status_counts[PlanStepStatus.COMPLETED.value]} completed, {status_counts[PlanStepStatus.IN_PROGRESS.value]} in progress, "
+            plan_text += f"{status_counts[PlanStepStatus.BLOCKED.value]} blocked, {status_counts[PlanStepStatus.NOT_STARTED.value]} not started\n\n"
+            plan_text += "Steps:\n"
+            status_marks = PlanStepStatus.get_status_marks()
+            for i, (step, status, notes) in enumerate(
+                zip(steps, step_statuses, step_notes)
+            ):
+                # Use status marks to indicate step status
+                status_mark = status_marks.get(
+                    status, status_marks[PlanStepStatus.NOT_STARTED.value]
+                )
+                plan_text += f"{i}. {status_mark} {step}\n"
+                if notes:
+                    plan_text += f"   Notes: {notes}\n"
+            return plan_text
+        except Exception as e:
+            logger.error(f"Error generating plan text from storage: {e}")
+            return f"Error: Unable to retrieve plan with ID {self.active_plan_id}"
+    async def _finalize_plan(self) -> str:
+        """Finalize the plan and provide a summary using the flow's LLM directly."""
+        plan_text = await self._get_plan_text()
+        # Create a summary using the flow's LLM directly
+        try:
+            system_message = Message.system_message(
+                "You are a planning assistant. Your task is to summarize the completed plan."
+            )
+            user_message = Message.user_message(
+                f"The plan has been completed. Here is the final plan status:\n\n{plan_text}\n\nPlease provide a summary of what was accomplished and any final thoughts."
+            )
+            response = await self.llm.ask(
+                messages=[user_message], system_msgs=[system_message]
+            )
+            return f"Plan completed:\n\n{response}"
+        except Exception as e:
+            logger.error(f"Error finalizing plan with LLM: {e}")
+            # Fallback to using an agent for the summary
+            try:
+                agent = self.primary_agent
+                summary_prompt = f"""
+                The plan has been completed. Here is the final plan status:
+                {plan_text}
+                Please provide a summary of what was accomplished and any final thoughts.
+                """
+                summary = await agent.run(summary_prompt)
+                return f"Plan completed:\n\n{summary}"
+            except Exception as e2:
+                logger.error(f"Error finalizing plan with agent: {e2}")
+                return "Plan completed. Error generating summary."

app/huggingface_models.py ADDED Viewed

The diff for this file is too large to render. See raw diff

app/huggingface_models_backup.py ADDED Viewed

	@@ -0,0 +1,2237 @@

+"""
+Hugging Face Models Integration for OpenManus AI Agent
+Comprehensive integration with Hugging Face Inference API for all model categories
+"""
+import asyncio
+import base64
+import io
+import json
+import logging
+from dataclasses import dataclass
+from enum import Enum
+from typing import Any, Dict, List, Optional, Union
+import aiohttp
+import PIL.Image
+from pydantic import BaseModel
+logger = logging.getLogger(__name__)
+class ModelCategory(Enum):
+    """Categories of Hugging Face models available"""
+    # Core AI categories
+    TEXT_GENERATION = "text-generation"
+    TEXT_TO_IMAGE = "text-to-image"
+    IMAGE_TO_TEXT = "image-to-text"
+    AUTOMATIC_SPEECH_RECOGNITION = "automatic-speech-recognition"
+    TEXT_TO_SPEECH = "text-to-speech"
+    IMAGE_CLASSIFICATION = "image-classification"
+    OBJECT_DETECTION = "object-detection"
+    FEATURE_EXTRACTION = "feature-extraction"
+    SENTENCE_SIMILARITY = "sentence-similarity"
+    TRANSLATION = "translation"
+    SUMMARIZATION = "summarization"
+    QUESTION_ANSWERING = "question-answering"
+    FILL_MASK = "fill-mask"
+    TOKEN_CLASSIFICATION = "token-classification"
+    ZERO_SHOT_CLASSIFICATION = "zero-shot-classification"
+    AUDIO_CLASSIFICATION = "audio-classification"
+    CONVERSATIONAL = "conversational"
+    # Video and Motion
+    TEXT_TO_VIDEO = "text-to-video"
+    VIDEO_TO_TEXT = "video-to-text"
+    VIDEO_CLASSIFICATION = "video-classification"
+    VIDEO_GENERATION = "video-generation"
+    MOTION_GENERATION = "motion-generation"
+    DEEPFAKE_DETECTION = "deepfake-detection"
+    # Code and Development
+    CODE_GENERATION = "code-generation"
+    CODE_COMPLETION = "code-completion"
+    CODE_EXPLANATION = "code-explanation"
+    CODE_TRANSLATION = "code-translation"
+    CODE_REVIEW = "code-review"
+    APP_GENERATION = "app-generation"
+    API_GENERATION = "api-generation"
+    DATABASE_GENERATION = "database-generation"
+    # 3D and AR/VR
+    TEXT_TO_3D = "text-to-3d"
+    IMAGE_TO_3D = "image-to-3d"
+    THREE_D_GENERATION = "3d-generation"
+    MESH_GENERATION = "mesh-generation"
+    TEXTURE_GENERATION = "texture-generation"
+    AR_CONTENT = "ar-content"
+    VR_ENVIRONMENT = "vr-environment"
+    # Document Processing
+    OCR = "ocr"
+    DOCUMENT_ANALYSIS = "document-analysis"
+    PDF_PROCESSING = "pdf-processing"
+    LAYOUT_ANALYSIS = "layout-analysis"
+    TABLE_EXTRACTION = "table-extraction"
+    HANDWRITING_RECOGNITION = "handwriting-recognition"
+    FORM_PROCESSING = "form-processing"
+    # Multimodal AI
+    VISION_LANGUAGE = "vision-language"
+    MULTIMODAL_REASONING = "multimodal-reasoning"
+    CROSS_MODAL_GENERATION = "cross-modal-generation"
+    VISUAL_QUESTION_ANSWERING = "visual-question-answering"
+    IMAGE_TEXT_MATCHING = "image-text-matching"
+    MULTIMODAL_CHAT = "multimodal-chat"
+    # Specialized AI
+    MUSIC_GENERATION = "music-generation"
+    VOICE_CLONING = "voice-cloning"
+    STYLE_TRANSFER = "style-transfer"
+    SUPER_RESOLUTION = "super-resolution"
+    IMAGE_INPAINTING = "image-inpainting"
+    IMAGE_OUTPAINTING = "image-outpainting"
+    BACKGROUND_REMOVAL = "background-removal"
+    FACE_RESTORATION = "face-restoration"
+    # Content Creation
+    CREATIVE_WRITING = "creative-writing"
+    STORY_GENERATION = "story-generation"
+    SCREENPLAY_WRITING = "screenplay-writing"
+    POETRY_GENERATION = "poetry-generation"
+    BLOG_WRITING = "blog-writing"
+    MARKETING_COPY = "marketing-copy"
+    # Game Development
+    GAME_ASSET_GENERATION = "game-asset-generation"
+    CHARACTER_GENERATION = "character-generation"
+    LEVEL_GENERATION = "level-generation"
+    DIALOGUE_GENERATION = "dialogue-generation"
+    # Science and Research
+    PROTEIN_FOLDING = "protein-folding"
+    MOLECULE_GENERATION = "molecule-generation"
+    SCIENTIFIC_WRITING = "scientific-writing"
+    RESEARCH_ASSISTANCE = "research-assistance"
+    DATA_ANALYSIS = "data-analysis"
+    # Business and Productivity
+    EMAIL_GENERATION = "email-generation"
+    PRESENTATION_CREATION = "presentation-creation"
+    REPORT_GENERATION = "report-generation"
+    MEETING_SUMMARIZATION = "meeting-summarization"
+    PROJECT_PLANNING = "project-planning"
+    # AI Teacher and Education
+    AI_TUTORING = "ai-tutoring"
+    EDUCATIONAL_CONTENT = "educational-content"
+    LESSON_PLANNING = "lesson-planning"
+    CONCEPT_EXPLANATION = "concept-explanation"
+    HOMEWORK_ASSISTANCE = "homework-assistance"
+    QUIZ_GENERATION = "quiz-generation"
+    CURRICULUM_DESIGN = "curriculum-design"
+    LEARNING_ASSESSMENT = "learning-assessment"
+    ADAPTIVE_LEARNING = "adaptive-learning"
+    SUBJECT_TEACHING = "subject-teaching"
+    MATH_TUTORING = "math-tutoring"
+    SCIENCE_TUTORING = "science-tutoring"
+    LANGUAGE_TUTORING = "language-tutoring"
+    HISTORY_TUTORING = "history-tutoring"
+    CODING_INSTRUCTION = "coding-instruction"
+    EXAM_PREPARATION = "exam-preparation"
+    STUDY_GUIDE_CREATION = "study-guide-creation"
+    EDUCATIONAL_GAMES = "educational-games"
+    LEARNING_ANALYTICS = "learning-analytics"
+    PERSONALIZED_LEARNING = "personalized-learning"
+@dataclass
+class HFModel:
+    """Hugging Face model definition"""
+    name: str
+    model_id: str
+    category: ModelCategory
+    description: str
+    endpoint_compatible: bool = False
+    requires_auth: bool = False
+    max_tokens: Optional[int] = None
+    supports_streaming: bool = False
+class HuggingFaceModels:
+    """Comprehensive collection of Hugging Face models for all categories"""
+    # Text Generation Models (Latest and Popular)
+    TEXT_GENERATION_MODELS = [
+        HFModel(
+            "MiniMax-M2",
+            "MiniMaxAI/MiniMax-M2",
+            ModelCategory.TEXT_GENERATION,
+            "Latest high-performance text generation model",
+            True,
+            False,
+            4096,
+            True,
+        ),
+        HFModel(
+            "Kimi Linear 48B",
+            "moonshotai/Kimi-Linear-48B-A3B-Instruct",
+            ModelCategory.TEXT_GENERATION,
+            "Large instruction-tuned model with linear attention",
+            True,
+            False,
+            8192,
+            True,
+        ),
+        HFModel(
+            "GPT-OSS 20B",
+            "openai/gpt-oss-20b",
+            ModelCategory.TEXT_GENERATION,
+            "Open-source GPT model by OpenAI",
+            True,
+            False,
+            4096,
+            True,
+        ),
+        HFModel(
+            "GPT-OSS 120B",
+            "openai/gpt-oss-120b",
+            ModelCategory.TEXT_GENERATION,
+            "Large open-source GPT model",
+            True,
+            False,
+            4096,
+            True,
+        ),
+        HFModel(
+            "Granite 4.0 1B",
+            "ibm-granite/granite-4.0-1b",
+            ModelCategory.TEXT_GENERATION,
+            "IBM's enterprise-grade small language model",
+            True,
+            False,
+            2048,
+            True,
+        ),
+        HFModel(
+            "GLM-4.6",
+            "zai-org/GLM-4.6",
+            ModelCategory.TEXT_GENERATION,
+            "Multilingual conversational model",
+            True,
+            False,
+            4096,
+            True,
+        ),
+        HFModel(
+            "Llama 3.1 8B Instruct",
+            "meta-llama/Llama-3.1-8B-Instruct",
+            ModelCategory.TEXT_GENERATION,
+            "Meta's instruction-tuned Llama model",
+            True,
+            True,
+            8192,
+            True,
+        ),
+        HFModel(
+            "Tongyi DeepResearch 30B",
+            "Alibaba-NLP/Tongyi-DeepResearch-30B-A3B",
+            ModelCategory.TEXT_GENERATION,
+            "Alibaba's research-focused large language model",
+            True,
+            False,
+            4096,
+            True,
+        ),
+        HFModel(
+            "EuroLLM 9B",
+            "utter-project/EuroLLM-9B",
+            ModelCategory.TEXT_GENERATION,
+            "European multilingual language model",
+            True,
+            False,
+            4096,
+            True,
+        ),
+    ]
+    # Text-to-Image Models (Latest and Best)
+    TEXT_TO_IMAGE_MODELS = [
+        HFModel(
+            "FIBO",
+            "briaai/FIBO",
+            ModelCategory.TEXT_TO_IMAGE,
+            "Advanced text-to-image generation model",
+            True,
+            False,
+        ),
+        HFModel(
+            "FLUX.1 Dev",
+            "black-forest-labs/FLUX.1-dev",
+            ModelCategory.TEXT_TO_IMAGE,
+            "State-of-the-art image generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "FLUX.1 Schnell",
+            "black-forest-labs/FLUX.1-schnell",
+            ModelCategory.TEXT_TO_IMAGE,
+            "Fast high-quality image generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Qwen Image",
+            "Qwen/Qwen-Image",
+            ModelCategory.TEXT_TO_IMAGE,
+            "Multilingual text-to-image model",
+            True,
+            False,
+        ),
+        HFModel(
+            "Stable Diffusion XL",
+            "stabilityai/stable-diffusion-xl-base-1.0",
+            ModelCategory.TEXT_TO_IMAGE,
+            "Popular high-resolution image generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Stable Diffusion 3.5 Large",
+            "stabilityai/stable-diffusion-3.5-large",
+            ModelCategory.TEXT_TO_IMAGE,
+            "Latest Stable Diffusion model",
+            True,
+            False,
+        ),
+        HFModel(
+            "HunyuanImage 3.0",
+            "tencent/HunyuanImage-3.0",
+            ModelCategory.TEXT_TO_IMAGE,
+            "Tencent's advanced image generation model",
+            True,
+            False,
+        ),
+        HFModel(
+            "Nitro-E",
+            "amd/Nitro-E",
+            ModelCategory.TEXT_TO_IMAGE,
+            "AMD's efficient image generation model",
+            True,
+            False,
+        ),
+        HFModel(
+            "Qwen Image Lightning",
+            "lightx2v/Qwen-Image-Lightning",
+            ModelCategory.TEXT_TO_IMAGE,
+            "Fast distilled image generation",
+            True,
+            False,
+        ),
+    ]
+    # Automatic Speech Recognition Models
+    ASR_MODELS = [
+        HFModel(
+            "Whisper Large v3",
+            "openai/whisper-large-v3",
+            ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
+            "OpenAI's best multilingual speech recognition",
+            True,
+            False,
+        ),
+        HFModel(
+            "Whisper Large v3 Turbo",
+            "openai/whisper-large-v3-turbo",
+            ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
+            "Faster version of Whisper Large v3",
+            True,
+            False,
+        ),
+        HFModel(
+            "Parakeet TDT 0.6B v3",
+            "nvidia/parakeet-tdt-0.6b-v3",
+            ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
+            "NVIDIA's multilingual ASR model",
+            True,
+            False,
+        ),
+        HFModel(
+            "Canary Qwen 2.5B",
+            "nvidia/canary-qwen-2.5b",
+            ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
+            "NVIDIA's advanced ASR with Qwen integration",
+            True,
+            False,
+        ),
+        HFModel(
+            "Canary 1B v2",
+            "nvidia/canary-1b-v2",
+            ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
+            "Compact multilingual ASR model",
+            True,
+            False,
+        ),
+        HFModel(
+            "Whisper Small",
+            "openai/whisper-small",
+            ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
+            "Lightweight multilingual ASR",
+            True,
+            False,
+        ),
+        HFModel(
+            "Speaker Diarization 3.1",
+            "pyannote/speaker-diarization-3.1",
+            ModelCategory.AUTOMATIC_SPEECH_RECOGNITION,
+            "Advanced speaker identification and diarization",
+            True,
+            False,
+        ),
+    ]
+    # Text-to-Speech Models
+    TTS_MODELS = [
+        HFModel(
+            "SoulX Podcast 1.7B",
+            "Soul-AILab/SoulX-Podcast-1.7B",
+            ModelCategory.TEXT_TO_SPEECH,
+            "High-quality podcast-style speech synthesis",
+            True,
+            False,
+        ),
+        HFModel(
+            "NeuTTS Air",
+            "neuphonic/neutts-air",
+            ModelCategory.TEXT_TO_SPEECH,
+            "Advanced neural text-to-speech",
+            True,
+            False,
+        ),
+        HFModel(
+            "Kokoro 82M",
+            "hexgrad/Kokoro-82M",
+            ModelCategory.TEXT_TO_SPEECH,
+            "Lightweight high-quality TTS",
+            True,
+            False,
+        ),
+        HFModel(
+            "Kani TTS 400M EN",
+            "nineninesix/kani-tts-400m-en",
+            ModelCategory.TEXT_TO_SPEECH,
+            "English-focused text-to-speech model",
+            True,
+            False,
+        ),
+        HFModel(
+            "XTTS v2",
+            "coqui/XTTS-v2",
+            ModelCategory.TEXT_TO_SPEECH,
+            "Zero-shot voice cloning TTS",
+            True,
+            False,
+        ),
+        HFModel(
+            "Chatterbox",
+            "ResembleAI/chatterbox",
+            ModelCategory.TEXT_TO_SPEECH,
+            "Multilingual voice cloning",
+            True,
+            False,
+        ),
+        HFModel(
+            "VibeVoice 1.5B",
+            "microsoft/VibeVoice-1.5B",
+            ModelCategory.TEXT_TO_SPEECH,
+            "Microsoft's advanced TTS model",
+            True,
+            False,
+        ),
+        HFModel(
+            "OpenAudio S1 Mini",
+            "fishaudio/openaudio-s1-mini",
+            ModelCategory.TEXT_TO_SPEECH,
+            "Compact multilingual TTS",
+            True,
+            False,
+        ),
+    ]
+    # Image Classification Models
+    IMAGE_CLASSIFICATION_MODELS = [
+        HFModel(
+            "NSFW Image Detection",
+            "Falconsai/nsfw_image_detection",
+            ModelCategory.IMAGE_CLASSIFICATION,
+            "Content safety image classification",
+            True,
+            False,
+        ),
+        HFModel(
+            "ViT Base Patch16",
+            "google/vit-base-patch16-224",
+            ModelCategory.IMAGE_CLASSIFICATION,
+            "Google's Vision Transformer",
+            True,
+            False,
+        ),
+        HFModel(
+            "Deepfake Detection",
+            "dima806/deepfake_vs_real_image_detection",
+            ModelCategory.IMAGE_CLASSIFICATION,
+            "Detect AI-generated vs real images",
+            True,
+            False,
+        ),
+        HFModel(
+            "Facial Emotions Detection",
+            "dima806/facial_emotions_image_detection",
+            ModelCategory.IMAGE_CLASSIFICATION,
+            "Recognize facial emotions",
+            True,
+            False,
+        ),
+        HFModel(
+            "SDXL Detector",
+            "Organika/sdxl-detector",
+            ModelCategory.IMAGE_CLASSIFICATION,
+            "Detect Stable Diffusion XL generated images",
+            True,
+            False,
+        ),
+        HFModel(
+            "ViT NSFW Detector",
+            "AdamCodd/vit-base-nsfw-detector",
+            ModelCategory.IMAGE_CLASSIFICATION,
+            "NSFW content detection with ViT",
+            True,
+            False,
+        ),
+        HFModel(
+            "ResNet 101",
+            "microsoft/resnet-101",
+            ModelCategory.IMAGE_CLASSIFICATION,
+            "Microsoft's ResNet for classification",
+            True,
+            False,
+        ),
+    ]
+    # Additional Categories
+    FEATURE_EXTRACTION_MODELS = [
+        HFModel(
+            "Sentence Transformers All MiniLM",
+            "sentence-transformers/all-MiniLM-L6-v2",
+            ModelCategory.FEATURE_EXTRACTION,
+            "Lightweight sentence embeddings",
+            True,
+            False,
+        ),
+        HFModel(
+            "BGE Large EN",
+            "BAAI/bge-large-en-v1.5",
+            ModelCategory.FEATURE_EXTRACTION,
+            "High-quality English embeddings",
+            True,
+            False,
+        ),
+        HFModel(
+            "E5 Large v2",
+            "intfloat/e5-large-v2",
+            ModelCategory.FEATURE_EXTRACTION,
+            "Multilingual text embeddings",
+            True,
+            False,
+        ),
+    ]
+    TRANSLATION_MODELS = [
+        HFModel(
+            "M2M100 1.2B",
+            "facebook/m2m100_1.2B",
+            ModelCategory.TRANSLATION,
+            "Multilingual machine translation",
+            True,
+            False,
+        ),
+        HFModel(
+            "NLLB 200 3.3B",
+            "facebook/nllb-200-3.3B",
+            ModelCategory.TRANSLATION,
+            "No Language Left Behind translation",
+            True,
+            False,
+        ),
+        HFModel(
+            "mBART Large 50",
+            "facebook/mbart-large-50-many-to-many-mmt",
+            ModelCategory.TRANSLATION,
+            "Multilingual BART for translation",
+            True,
+            False,
+        ),
+    ]
+    SUMMARIZATION_MODELS = [
+        HFModel(
+            "PEGASUS XSum",
+            "google/pegasus-xsum",
+            ModelCategory.SUMMARIZATION,
+            "Abstractive summarization model",
+            True,
+            False,
+        ),
+        HFModel(
+            "BART Large CNN",
+            "facebook/bart-large-cnn",
+            ModelCategory.SUMMARIZATION,
+            "CNN/DailyMail summarization",
+            True,
+            False,
+        ),
+        HFModel(
+            "T5 Base",
+            "t5-base",
+            ModelCategory.SUMMARIZATION,
+            "Text-to-Text Transfer Transformer",
+            True,
+            False,
+        ),
+    ]
+    # Video Generation and Processing Models
+    VIDEO_GENERATION_MODELS = [
+        HFModel(
+            "Stable Video Diffusion",
+            "stabilityai/stable-video-diffusion-img2vid",
+            ModelCategory.TEXT_TO_VIDEO,
+            "Image-to-video generation model",
+            True,
+            False,
+        ),
+        HFModel(
+            "AnimateDiff",
+            "guoyww/animatediff",
+            ModelCategory.VIDEO_GENERATION,
+            "Text-to-video animation generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "VideoCrafter",
+            "videogen/VideoCrafter",
+            ModelCategory.TEXT_TO_VIDEO,
+            "High-quality text-to-video generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Video ChatGPT",
+            "mbzuai-oryx/Video-ChatGPT-7B",
+            ModelCategory.VIDEO_TO_TEXT,
+            "Video understanding and description",
+            True,
+            False,
+        ),
+        HFModel(
+            "Video-BLIP",
+            "salesforce/video-blip-opt-2.7b",
+            ModelCategory.VIDEO_CLASSIFICATION,
+            "Video content analysis and classification",
+            True,
+            False,
+        ),
+    ]
+    # Code Generation and Development Models
+    CODE_GENERATION_MODELS = [
+        HFModel(
+            "CodeLlama 34B Instruct",
+            "codellama/CodeLlama-34b-Instruct-hf",
+            ModelCategory.CODE_GENERATION,
+            "Large instruction-tuned code generation model",
+            True,
+            True,
+        ),
+        HFModel(
+            "StarCoder2 15B",
+            "bigcode/starcoder2-15b",
+            ModelCategory.CODE_GENERATION,
+            "Advanced code generation and completion",
+            True,
+            False,
+        ),
+        HFModel(
+            "DeepSeek Coder V2",
+            "deepseek-ai/deepseek-coder-6.7b-instruct",
+            ModelCategory.CODE_GENERATION,
+            "Specialized coding assistant",
+            True,
+            False,
+        ),
+        HFModel(
+            "WizardCoder 34B",
+            "WizardLM/WizardCoder-Python-34B-V1.0",
+            ModelCategory.CODE_GENERATION,
+            "Python-focused code generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Phind CodeLlama",
+            "Phind/Phind-CodeLlama-34B-v2",
+            ModelCategory.CODE_GENERATION,
+            "Optimized for code explanation and debugging",
+            True,
+            False,
+        ),
+        HFModel(
+            "Code T5+",
+            "Salesforce/codet5p-770m",
+            ModelCategory.CODE_COMPLETION,
+            "Code understanding and generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "InCoder",
+            "facebook/incoder-6B",
+            ModelCategory.CODE_COMPLETION,
+            "Bidirectional code generation",
+            True,
+            False,
+        ),
+    ]
+    # 3D and AR/VR Content Generation Models
+    THREE_D_MODELS = [
+        HFModel(
+            "Shap-E",
+            "openai/shap-e",
+            ModelCategory.TEXT_TO_3D,
+            "Text-to-3D shape generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Point-E",
+            "openai/point-e",
+            ModelCategory.TEXT_TO_3D,
+            "Text-to-3D point cloud generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "DreamFusion",
+            "google/dreamfusion",
+            ModelCategory.IMAGE_TO_3D,
+            "Image-to-3D mesh generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Magic3D",
+            "nvidia/magic3d",
+            ModelCategory.THREE_D_GENERATION,
+            "High-quality 3D content creation",
+            True,
+            False,
+        ),
+        HFModel(
+            "GET3D",
+            "nvidia/get3d",
+            ModelCategory.MESH_GENERATION,
+            "3D mesh generation from text",
+            True,
+            False,
+        ),
+    ]
+    # Document Processing and OCR Models
+    DOCUMENT_PROCESSING_MODELS = [
+        HFModel(
+            "TrOCR Large",
+            "microsoft/trocr-large-printed",
+            ModelCategory.OCR,
+            "Transformer-based OCR for printed text",
+            True,
+            False,
+        ),
+        HFModel(
+            "TrOCR Handwritten",
+            "microsoft/trocr-large-handwritten",
+            ModelCategory.HANDWRITING_RECOGNITION,
+            "Handwritten text recognition",
+            True,
+            False,
+        ),
+        HFModel(
+            "LayoutLMv3",
+            "microsoft/layoutlmv3-large",
+            ModelCategory.DOCUMENT_ANALYSIS,
+            "Document layout analysis and understanding",
+            True,
+            False,
+        ),
+        HFModel(
+            "Donut",
+            "naver-clova-ix/donut-base",
+            ModelCategory.DOCUMENT_ANALYSIS,
+            "OCR-free document understanding",
+            True,
+            False,
+        ),
+        HFModel(
+            "TableTransformer",
+            "microsoft/table-transformer-structure-recognition",
+            ModelCategory.TABLE_EXTRACTION,
+            "Table structure recognition",
+            True,
+            False,
+        ),
+        HFModel(
+            "FormNet",
+            "microsoft/formnet",
+            ModelCategory.FORM_PROCESSING,
+            "Form understanding and processing",
+            True,
+            False,
+        ),
+    ]
+    # Multimodal AI Models
+    MULTIMODAL_MODELS = [
+        HFModel(
+            "BLIP-2",
+            "Salesforce/blip2-opt-2.7b",
+            ModelCategory.VISION_LANGUAGE,
+            "Vision-language understanding and generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "InstructBLIP",
+            "Salesforce/instructblip-vicuna-7b",
+            ModelCategory.MULTIMODAL_REASONING,
+            "Instruction-following multimodal model",
+            True,
+            False,
+        ),
+        HFModel(
+            "LLaVA",
+            "liuhaotian/llava-v1.5-7b",
+            ModelCategory.VISUAL_QUESTION_ANSWERING,
+            "Large Language and Vision Assistant",
+            True,
+            False,
+        ),
+        HFModel(
+            "GPT-4V",
+            "openai/gpt-4-vision-preview",
+            ModelCategory.MULTIMODAL_CHAT,
+            "Advanced multimodal conversational AI",
+            True,
+            True,
+        ),
+        HFModel(
+            "Flamingo",
+            "deepmind/flamingo-9b",
+            ModelCategory.CROSS_MODAL_GENERATION,
+            "Few-shot learning for vision and language",
+            True,
+            False,
+        ),
+    ]
+    # Specialized AI Models
+    SPECIALIZED_AI_MODELS = [
+        HFModel(
+            "MusicGen",
+            "facebook/musicgen-medium",
+            ModelCategory.MUSIC_GENERATION,
+            "Text-to-music generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "AudioCraft",
+            "facebook/audiocraft_musicgen_melody",
+            ModelCategory.MUSIC_GENERATION,
+            "Melody-conditioned music generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Real-ESRGAN",
+            "xinntao/realesrgan-x4plus",
+            ModelCategory.SUPER_RESOLUTION,
+            "Image super-resolution",
+            True,
+            False,
+        ),
+        HFModel(
+            "GFPGAN",
+            "TencentARC/GFPGAN",
+            ModelCategory.FACE_RESTORATION,
+            "Face restoration and enhancement",
+            True,
+            False,
+        ),
+        HFModel(
+            "LaMa",
+            "advimman/lama",
+            ModelCategory.IMAGE_INPAINTING,
+            "Large Mask Inpainting",
+            True,
+            False,
+        ),
+        HFModel(
+            "Background Remover",
+            "briaai/RMBG-1.4",
+            ModelCategory.BACKGROUND_REMOVAL,
+            "Automatic background removal",
+            True,
+            False,
+        ),
+        HFModel(
+            "Voice Cloner",
+            "coqui/XTTS-v2",
+            ModelCategory.VOICE_CLONING,
+            "Multilingual voice cloning",
+            True,
+            False,
+        ),
+    ]
+    # Creative Content Models
+    CREATIVE_CONTENT_MODELS = [
+        HFModel(
+            "GPT-3.5 Creative",
+            "openai/gpt-3.5-turbo-instruct",
+            ModelCategory.CREATIVE_WRITING,
+            "Creative writing and storytelling",
+            True,
+            True,
+        ),
+        HFModel(
+            "Novel AI",
+            "novelai/genji-python-6b",
+            ModelCategory.STORY_GENERATION,
+            "Interactive story generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Poet Assistant",
+            "gpt2-poetry",
+            ModelCategory.POETRY_GENERATION,
+            "Poetry generation and analysis",
+            True,
+            False,
+        ),
+        HFModel(
+            "Blog Writer",
+            "google/flan-t5-large",
+            ModelCategory.BLOG_WRITING,
+            "Blog content creation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Marketing Copy AI",
+            "microsoft/DialoGPT-large",
+            ModelCategory.MARKETING_COPY,
+            "Marketing content generation",
+            True,
+            False,
+        ),
+    ]
+    # Game Development Models
+    GAME_DEVELOPMENT_MODELS = [
+        HFModel(
+            "Character AI",
+            "character-ai/character-generator",
+            ModelCategory.CHARACTER_GENERATION,
+            "Game character generation and design",
+            True,
+            False,
+        ),
+        HFModel(
+            "Level Designer",
+            "unity/level-generator",
+            ModelCategory.LEVEL_GENERATION,
+            "Game level and environment generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Dialogue Writer",
+            "bioware/dialogue-generator",
+            ModelCategory.DIALOGUE_GENERATION,
+            "Game dialogue and narrative generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Asset Creator",
+            "epic/asset-generator",
+            ModelCategory.GAME_ASSET_GENERATION,
+            "Game asset and texture generation",
+            True,
+            False,
+        ),
+    ]
+    # Science and Research Models
+    SCIENCE_RESEARCH_MODELS = [
+        HFModel(
+            "AlphaFold",
+            "deepmind/alphafold2",
+            ModelCategory.PROTEIN_FOLDING,
+            "Protein structure prediction",
+            True,
+            False,
+        ),
+        HFModel(
+            "ChemBERTa",
+            "DeepChem/ChemBERTa-77M-MLM",
+            ModelCategory.MOLECULE_GENERATION,
+            "Chemical compound analysis",
+            True,
+            False,
+        ),
+        HFModel(
+            "SciBERT",
+            "allenai/scibert_scivocab_uncased",
+            ModelCategory.SCIENTIFIC_WRITING,
+            "Scientific text understanding",
+            True,
+            False,
+        ),
+        HFModel(
+            "Research Assistant",
+            "microsoft/specter2",
+            ModelCategory.RESEARCH_ASSISTANCE,
+            "Research paper analysis and recommendations",
+            True,
+            False,
+        ),
+        HFModel(
+            "Data Analyst",
+            "microsoft/data-copilot",
+            ModelCategory.DATA_ANALYSIS,
+            "Automated data analysis and insights",
+            True,
+            False,
+        ),
+    ]
+    # Business and Productivity Models
+    BUSINESS_PRODUCTIVITY_MODELS = [
+        HFModel(
+            "Email Assistant",
+            "microsoft/email-generator",
+            ModelCategory.EMAIL_GENERATION,
+            "Professional email composition",
+            True,
+            False,
+        ),
+        HFModel(
+            "Presentation AI",
+            "gamma/presentation-generator",
+            ModelCategory.PRESENTATION_CREATION,
+            "Automated presentation creation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Report Writer",
+            "openai/report-generator",
+            ModelCategory.REPORT_GENERATION,
+            "Business report generation",
+            True,
+            False,
+        ),
+        HFModel(
+            "Meeting Summarizer",
+            "microsoft/meeting-summarizer",
+            ModelCategory.MEETING_SUMMARIZATION,
+            "Meeting notes and action items",
+            True,
+            False,
+        ),
+        HFModel(
+            "Project Planner",
+            "atlassian/project-ai",
+            ModelCategory.PROJECT_PLANNING,
+            "Project planning and management",
+            True,
+            False,
+        ),
+    ]
+    # AI Teacher Models - Best-in-Class Educational AI System
+    AI_TEACHER_MODELS = [
+        # Primary AI Tutoring Models
+        HFModel(
+            "AI Tutor Interactive",
+            "microsoft/DialoGPT-medium",
+            ModelCategory.AI_TUTORING,
+            "Interactive AI tutor for conversational learning",
+            True,
+            False,
+            2048,
+            True,
+        ),
+        HFModel(
+            "Goal-Oriented Tutor",
+            "microsoft/GODEL-v1_1-large-seq2seq",
+            ModelCategory.AI_TUTORING,
+            "Goal-oriented conversational AI for personalized tutoring",
+            True,
+            False,
+            2048,
+            True,
+        ),
+        HFModel(
+            "Code Instructor AI",
+            "microsoft/codebert-base",
+            ModelCategory.CODING_INSTRUCTION,
+            "AI coding instructor for programming education",
+            True,
+            False,
+            1024,
+            False,
+        ),
+        HFModel(
+            "deepmind/flamingo-base",
+            "ADAPTIVE_LEARNING",
+            ModelCategory.ADAPTIVE_LEARNING,
+            "Multimodal AI for adaptive learning experiences",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Educational Content Generation
+        HFModel(
+            "gpt2-medium",
+            "EDUCATIONAL_CONTENT",
+            ModelCategory.EDUCATIONAL_CONTENT,
+            "Educational content generation for curriculum development",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "facebook/bart-large-cnn",
+            "LESSON_PLANNING",
+            ModelCategory.LESSON_PLANNING,
+            "Lesson plan generation and educational summarization",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "microsoft/prophetnet-large-uncased",
+            "STUDY_GUIDE_CREATION",
+            ModelCategory.STUDY_GUIDE_CREATION,
+            "Study guide and learning material generation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "bigscience/bloom-560m",
+            "EDUCATIONAL_CONTENT",
+            ModelCategory.EDUCATIONAL_CONTENT,
+            "Multilingual educational content for global learning",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Subject-Specific Teaching Models
+        HFModel(
+            "microsoft/codebert-base",
+            "CODING_INSTRUCTION",
+            ModelCategory.CODING_INSTRUCTION,
+            "Programming education and code explanation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "allenai/scibert_scivocab_uncased",
+            "SCIENCE_TUTORING",
+            ModelCategory.SCIENCE_TUTORING,
+            "Science education and scientific concept explanation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "google/flan-t5-base",
+            "SUBJECT_TEACHING",
+            ModelCategory.SUBJECT_TEACHING,
+            "Multi-subject teaching AI with instruction following",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "microsoft/unixcoder-base",
+            "CODING_INSTRUCTION",
+            ModelCategory.CODING_INSTRUCTION,
+            "Advanced programming instruction and debugging help",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Math and STEM Education
+        HFModel(
+            "microsoft/DialoGPT-small",
+            "MATH_TUTORING",
+            ModelCategory.MATH_TUTORING,
+            "Interactive math tutoring and problem solving",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "facebook/galactica-125m",
+            "SCIENCE_TUTORING",
+            ModelCategory.SCIENCE_TUTORING,
+            "Scientific knowledge and research education",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "microsoft/graphcodebert-base",
+            "CODING_INSTRUCTION",
+            ModelCategory.CODING_INSTRUCTION,
+            "Code structure and algorithm education",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "deepmind/mathematical-reasoning",
+            "MATH_TUTORING",
+            ModelCategory.MATH_TUTORING,
+            "Mathematical reasoning and proof assistance",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Language and Literature Education
+        HFModel(
+            "microsoft/prophetnet-large-uncased-cnndm",
+            "LANGUAGE_TUTORING",
+            ModelCategory.LANGUAGE_TUTORING,
+            "Language learning and literature analysis",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "facebook/mbart-large-50-many-to-many-mmt",
+            "LANGUAGE_TUTORING",
+            ModelCategory.LANGUAGE_TUTORING,
+            "Multilingual language education and translation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "google/electra-base-discriminator",
+            "LANGUAGE_TUTORING",
+            ModelCategory.LANGUAGE_TUTORING,
+            "Language comprehension and grammar instruction",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Assessment and Testing
+        HFModel(
+            "microsoft/DialoGPT-large",
+            "QUIZ_GENERATION",
+            ModelCategory.QUIZ_GENERATION,
+            "Interactive quiz and assessment generation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "facebook/bart-large",
+            "LEARNING_ASSESSMENT",
+            ModelCategory.LEARNING_ASSESSMENT,
+            "Learning progress assessment and feedback",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "google/t5-base",
+            "QUIZ_GENERATION",
+            ModelCategory.QUIZ_GENERATION,
+            "Question generation for educational assessment",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "microsoft/unilm-base-cased",
+            "EXAM_PREPARATION",
+            ModelCategory.EXAM_PREPARATION,
+            "Exam preparation and practice test generation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Personalized Learning
+        HFModel(
+            "huggingface/distilbert-base-uncased",
+            "PERSONALIZED_LEARNING",
+            ModelCategory.PERSONALIZED_LEARNING,
+            "Personalized learning path recommendation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "microsoft/layoutlm-base-uncased",
+            "LEARNING_ANALYTICS",
+            ModelCategory.LEARNING_ANALYTICS,
+            "Educational document analysis and insights",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "facebook/opt-125m",
+            "ADAPTIVE_LEARNING",
+            ModelCategory.ADAPTIVE_LEARNING,
+            "Adaptive learning system with dynamic content",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Concept Explanation and Understanding
+        HFModel(
+            "microsoft/deberta-base",
+            "CONCEPT_EXPLANATION",
+            ModelCategory.CONCEPT_EXPLANATION,
+            "Clear concept explanation and knowledge breakdown",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "google/pegasus-xsum",
+            "CONCEPT_EXPLANATION",
+            ModelCategory.CONCEPT_EXPLANATION,
+            "Concept summarization and explanation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "facebook/bart-base",
+            "CONCEPT_EXPLANATION",
+            ModelCategory.CONCEPT_EXPLANATION,
+            "Interactive concept teaching and clarification",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Homework and Study Assistance
+        HFModel(
+            "microsoft/codebert-base-mlm",
+            "HOMEWORK_ASSISTANCE",
+            ModelCategory.HOMEWORK_ASSISTANCE,
+            "Programming homework help and debugging",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "google/flan-t5-small",
+            "HOMEWORK_ASSISTANCE",
+            ModelCategory.HOMEWORK_ASSISTANCE,
+            "General homework assistance across subjects",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "facebook/mbart-large-cc25",
+            "HOMEWORK_ASSISTANCE",
+            ModelCategory.HOMEWORK_ASSISTANCE,
+            "Multilingual homework support and explanation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Curriculum Design and Planning
+        HFModel(
+            "microsoft/prophetnet-base-uncased",
+            "CURRICULUM_DESIGN",
+            ModelCategory.CURRICULUM_DESIGN,
+            "Curriculum planning and educational structure design",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "google/t5-small",
+            "LESSON_PLANNING",
+            ModelCategory.LESSON_PLANNING,
+            "Detailed lesson planning and activity design",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "facebook/bart-large-xsum",
+            "CURRICULUM_DESIGN",
+            ModelCategory.CURRICULUM_DESIGN,
+            "Educational program summarization and design",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Educational Games and Interactive Learning
+        HFModel(
+            "microsoft/DialoGPT-base",
+            "EDUCATIONAL_GAMES",
+            ModelCategory.EDUCATIONAL_GAMES,
+            "Interactive educational games and learning activities",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "huggingface/bert-base-uncased",
+            "EDUCATIONAL_GAMES",
+            ModelCategory.EDUCATIONAL_GAMES,
+            "Educational quiz games and interactive learning",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # History and Social Studies
+        HFModel(
+            "microsoft/deberta-large",
+            "HISTORY_TUTORING",
+            ModelCategory.HISTORY_TUTORING,
+            "Historical analysis and social studies education",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "facebook/opt-350m",
+            "HISTORY_TUTORING",
+            ModelCategory.HISTORY_TUTORING,
+            "Interactive history lessons and timeline explanation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        # Advanced Educational Features
+        HFModel(
+            "microsoft/unilm-large-cased",
+            "LEARNING_ANALYTICS",
+            ModelCategory.LEARNING_ANALYTICS,
+            "Advanced learning analytics and progress tracking",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "google/electra-large-discriminator",
+            "PERSONALIZED_LEARNING",
+            ModelCategory.PERSONALIZED_LEARNING,
+            "Advanced personalized learning with AI adaptation",
+            True,
+            False,
+            1024,
+            True,
+        ),
+        HFModel(
+            "facebook/mbart-large-50",
+            "ADAPTIVE_LEARNING",
+            ModelCategory.ADAPTIVE_LEARNING,
+            "Multilingual adaptive learning system",
+            True,
+            False,
+            1024,
+            True,
+        ),
+    ]
+class HuggingFaceInference:
+    """Hugging Face Inference API integration"""
+    def __init__(
+        self,
+        api_token: str,
+        base_url: str = "https://api-inference.huggingface.co/models/",
+    ):
+        self.api_token = api_token
+        self.base_url = base_url
+        self.session = None
+    async def __aenter__(self):
+        self.session = aiohttp.ClientSession(
+            headers={"Authorization": f"Bearer {self.api_token}"},
+            timeout=aiohttp.ClientTimeout(total=300),  # 5 minutes timeout
+        )
+        return self
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        if self.session:
+            await self.session.close()
+    async def text_generation(
+        self,
+        model_id: str,
+        prompt: str,
+        max_tokens: int = 100,
+        temperature: float = 0.7,
+        stream: bool = False,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Generate text using a text generation model"""
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "max_new_tokens": max_tokens,
+                "temperature": temperature,
+                "do_sample": True,
+                **kwargs,
+            },
+            "options": {"use_cache": False},
+        }
+        if stream:
+            return await self._stream_request(model_id, payload)
+        else:
+            return await self._request(model_id, payload)
+    async def text_to_image(
+        self,
+        model_id: str,
+        prompt: str,
+        negative_prompt: Optional[str] = None,
+        **kwargs,
+    ) -> bytes:
+        """Generate image from text prompt"""
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                **({"negative_prompt": negative_prompt} if negative_prompt else {}),
+                **kwargs,
+            },
+        }
+        response = await self._request(model_id, payload, expect_json=False)
+        return response
+    async def automatic_speech_recognition(
+        self, model_id: str, audio_data: bytes, **kwargs
+    ) -> Dict[str, Any]:
+        """Transcribe audio to text"""
+        # Convert audio bytes to base64 for API
+        audio_b64 = base64.b64encode(audio_data).decode()
+        payload = {"inputs": audio_b64, "parameters": kwargs}
+        return await self._request(model_id, payload)
+    async def text_to_speech(self, model_id: str, text: str, **kwargs) -> bytes:
+        """Convert text to speech audio"""
+        payload = {"inputs": text, "parameters": kwargs}
+        response = await self._request(model_id, payload, expect_json=False)
+        return response
+    async def image_classification(
+        self, model_id: str, image_data: bytes, **kwargs
+    ) -> Dict[str, Any]:
+        """Classify images"""
+        # Convert image to base64
+        image_b64 = base64.b64encode(image_data).decode()
+        payload = {"inputs": image_b64, "parameters": kwargs}
+        return await self._request(model_id, payload)
+    async def feature_extraction(
+        self, model_id: str, texts: Union[str, List[str]], **kwargs
+    ) -> Dict[str, Any]:
+        """Extract embeddings from text"""
+        payload = {"inputs": texts, "parameters": kwargs}
+        return await self._request(model_id, payload)
+    async def translation(
+        self,
+        model_id: str,
+        text: str,
+        src_lang: Optional[str] = None,
+        tgt_lang: Optional[str] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Translate text between languages"""
+        payload = {
+            "inputs": text,
+            "parameters": {
+                **({"src_lang": src_lang} if src_lang else {}),
+                **({"tgt_lang": tgt_lang} if tgt_lang else {}),
+                **kwargs,
+            },
+        }
+        return await self._request(model_id, payload)
+    async def summarization(
+        self,
+        model_id: str,
+        text: str,
+        max_length: int = 150,
+        min_length: int = 30,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Summarize text"""
+        payload = {
+            "inputs": text,
+            "parameters": {
+                "max_length": max_length,
+                "min_length": min_length,
+                **kwargs,
+            },
+        }
+        return await self._request(model_id, payload)
+    async def question_answering(
+        self, model_id: str, question: str, context: str, **kwargs
+    ) -> Dict[str, Any]:
+        """Answer questions based on context"""
+        payload = {
+            "inputs": {"question": question, "context": context},
+            "parameters": kwargs,
+        }
+        return await self._request(model_id, payload)
+    async def zero_shot_classification(
+        self, model_id: str, text: str, candidate_labels: List[str], **kwargs
+    ) -> Dict[str, Any]:
+        """Classify text without training data"""
+        payload = {
+            "inputs": text,
+            "parameters": {"candidate_labels": candidate_labels, **kwargs},
+        }
+        return await self._request(model_id, payload)
+    async def conversational(
+        self,
+        model_id: str,
+        text: str,
+        conversation_history: Optional[List[Dict[str, str]]] = None,
+        **kwargs,
+    ) -> Dict[str, Any]:
+        """Have a conversation with a model"""
+        payload = {
+            "inputs": {
+                "text": text,
+                **(
+                    {
+                        "past_user_inputs": [
+                            h["user"] for h in conversation_history if "user" in h
+                        ]
+                    }
+                    if conversation_history
+                    else {}
+                ),
+                **(
+                    {
+                        "generated_responses": [
+                            h["bot"] for h in conversation_history if "bot" in h
+                        ]
+                    }
+                    if conversation_history
+                    else {}
+                ),
+            },
+            "parameters": kwargs,
+        }
+        return await self._request(model_id, payload)
+    async def _request(
+        self, model_id: str, payload: Dict[str, Any], expect_json: bool = True
+    ) -> Union[Dict[str, Any], bytes]:
+        """Make HTTP request to Hugging Face API"""
+        url = f"{self.base_url}{model_id}"
+        try:
+            async with self.session.post(url, json=payload) as response:
+                if response.status == 200:
+                    if expect_json:
+                        return await response.json()
+                    else:
+                        return await response.read()
+                elif response.status == 503:
+                    # Model is loading, wait and retry
+                    error_info = await response.json()
+                    estimated_time = error_info.get("estimated_time", 30)
+                    logger.info(
+                        f"Model {model_id} is loading, waiting {estimated_time}s"
+                    )
+                    await asyncio.sleep(min(estimated_time, 60))  # Cap at 60 seconds
+                    return await self._request(model_id, payload, expect_json)
+                else:
+                    error_text = await response.text()
+                    raise Exception(
+                        f"API request failed with status {response.status}: {error_text}"
+                    )
+        except Exception as e:
+            logger.error(f"Error calling Hugging Face API for {model_id}: {e}")
+            raise
+    async def _stream_request(self, model_id: str, payload: Dict[str, Any]):
+        """Stream response from Hugging Face API"""
+        url = f"{self.base_url}{model_id}"
+        payload["stream"] = True
+        try:
+            async with self.session.post(url, json=payload) as response:
+                if response.status == 200:
+                    async for chunk in response.content:
+                        if chunk:
+                            yield chunk.decode("utf-8")
+                else:
+                    error_text = await response.text()
+                    raise Exception(
+                        f"Streaming request failed with status {response.status}: {error_text}"
+                    )
+        except Exception as e:
+            logger.error(f"Error streaming from Hugging Face API for {model_id}: {e}")
+            raise
+    # New methods for expanded model categories
+    async def text_to_video(
+        self, model_id: str, prompt: str, **kwargs
+    ) -> Dict[str, Any]:
+        """Generate video from text prompt"""
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "duration": kwargs.get("duration", 5),
+                "fps": kwargs.get("fps", 24),
+                "width": kwargs.get("width", 512),
+                "height": kwargs.get("height", 512),
+                **kwargs,
+            },
+        }
+        return await self._request(model_id, payload)
+    async def video_to_text(
+        self, model_id: str, video_data: bytes, **kwargs
+    ) -> Dict[str, Any]:
+        """Analyze video and generate text description"""
+        video_b64 = base64.b64encode(video_data).decode()
+        payload = {
+            "inputs": {"video": video_b64},
+            "parameters": kwargs,
+        }
+        return await self._request(model_id, payload)
+    async def code_generation(
+        self, model_id: str, prompt: str, **kwargs
+    ) -> Dict[str, Any]:
+        """Generate code from natural language prompt"""
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "max_length": kwargs.get("max_length", 500),
+                "temperature": kwargs.get("temperature", 0.2),
+                "language": kwargs.get("language", "python"),
+                **kwargs,
+            },
+        }
+        return await self._request(model_id, payload)
+    async def code_completion(
+        self, model_id: str, code: str, **kwargs
+    ) -> Dict[str, Any]:
+        """Complete partial code"""
+        payload = {
+            "inputs": code,
+            "parameters": {
+                "max_length": kwargs.get("max_length", 100),
+                "temperature": kwargs.get("temperature", 0.1),
+                **kwargs,
+            },
+        }
+        return await self._request(model_id, payload)
+    async def text_to_3d(self, model_id: str, prompt: str, **kwargs) -> Dict[str, Any]:
+        """Generate 3D model from text description"""
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "resolution": kwargs.get("resolution", 64),
+                "format": kwargs.get("format", "obj"),
+                **kwargs,
+            },
+        }
+        return await self._request(model_id, payload)
+    async def image_to_3d(
+        self, model_id: str, image_data: bytes, **kwargs
+    ) -> Dict[str, Any]:
+        """Generate 3D model from image"""
+        image_b64 = base64.b64encode(image_data).decode()
+        payload = {
+            "inputs": {"image": image_b64},
+            "parameters": kwargs,
+        }
+        return await self._request(model_id, payload)
+    async def ocr(self, model_id: str, image_data: bytes, **kwargs) -> Dict[str, Any]:
+        """Perform optical character recognition on image"""
+        image_b64 = base64.b64encode(image_data).decode()
+        payload = {
+            "inputs": {"image": image_b64},
+            "parameters": {"language": kwargs.get("language", "en"), **kwargs},
+        }
+        return await self._request(model_id, payload)
+    async def document_analysis(
+        self, model_id: str, document_data: bytes, **kwargs
+    ) -> Dict[str, Any]:
+        """Analyze document structure and content"""
+        doc_b64 = base64.b64encode(document_data).decode()
+        payload = {
+            "inputs": {"document": doc_b64},
+            "parameters": kwargs,
+        }
+        return await self._request(model_id, payload)
+    async def vision_language(
+        self, model_id: str, image_data: bytes, text: str, **kwargs
+    ) -> Dict[str, Any]:
+        """Process image and text together"""
+        image_b64 = base64.b64encode(image_data).decode()
+        payload = {
+            "inputs": {"image": image_b64, "text": text},
+            "parameters": kwargs,
+        }
+        return await self._request(model_id, payload)
+    async def multimodal_reasoning(
+        self, model_id: str, inputs: Dict[str, Any], **kwargs
+    ) -> Dict[str, Any]:
+        """Perform reasoning across multiple modalities"""
+        payload = {
+            "inputs": inputs,
+            "parameters": kwargs,
+        }
+        return await self._request(model_id, payload)
+    async def music_generation(
+        self, model_id: str, prompt: str, **kwargs
+    ) -> Dict[str, Any]:
+        """Generate music from text prompt"""
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "duration": kwargs.get("duration", 30),
+                "bpm": kwargs.get("bpm", 120),
+                "genre": kwargs.get("genre", "electronic"),
+                **kwargs,
+            },
+        }
+        return await self._request(model_id, payload)
+    async def voice_cloning(
+        self, model_id: str, text: str, voice_sample: bytes, **kwargs
+    ) -> bytes:
+        """Clone voice and synthesize speech"""
+        voice_b64 = base64.b64encode(voice_sample).decode()
+        payload = {
+            "inputs": {"text": text, "voice_sample": voice_b64},
+            "parameters": kwargs,
+        }
+        return await self._request(model_id, payload, expect_json=False)
+    async def super_resolution(
+        self, model_id: str, image_data: bytes, **kwargs
+    ) -> bytes:
+        """Enhance image resolution"""
+        image_b64 = base64.b64encode(image_data).decode()
+        payload = {
+            "inputs": {"image": image_b64},
+            "parameters": {"scale_factor": kwargs.get("scale_factor", 4), **kwargs},
+        }
+        return await self._request(model_id, payload, expect_json=False)
+    async def background_removal(
+        self, model_id: str, image_data: bytes, **kwargs
+    ) -> bytes:
+        """Remove background from image"""
+        image_b64 = base64.b64encode(image_data).decode()
+        payload = {
+            "inputs": {"image": image_b64},
+            "parameters": kwargs,
+        }
+        return await self._request(model_id, payload, expect_json=False)
+    async def creative_writing(
+        self, model_id: str, prompt: str, **kwargs
+    ) -> Dict[str, Any]:
+        """Generate creative content"""
+        payload = {
+            "inputs": prompt,
+            "parameters": {
+                "max_length": kwargs.get("max_length", 1000),
+                "creativity": kwargs.get("creativity", 0.8),
+                "genre": kwargs.get("genre", "general"),
+                **kwargs,
+            },
+        }
+        return await self._request(model_id, payload)
+    async def business_document(
+        self, model_id: str, document_type: str, context: str, **kwargs
+    ) -> Dict[str, Any]:
+        """Generate business documents"""
+        payload = {
+            "inputs": f"Generate {document_type}: {context}",
+            "parameters": {
+                "format": kwargs.get("format", "professional"),
+                "length": kwargs.get("length", "medium"),
+                **kwargs,
+            },
+        }
+        return await self._request(model_id, payload)
+class HuggingFaceModelManager:
+    """Manager for all Hugging Face model operations"""
+    def __init__(self, api_token: str):
+        self.api_token = api_token
+        self.models = HuggingFaceModels()
+    def get_models_by_category(self, category: ModelCategory) -> List[HFModel]:
+        """Get all models for a specific category"""
+        all_models = []
+        if category == ModelCategory.TEXT_GENERATION:
+            all_models = self.models.TEXT_GENERATION_MODELS
+        elif category == ModelCategory.TEXT_TO_IMAGE:
+            all_models = self.models.TEXT_TO_IMAGE_MODELS
+        elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION:
+            all_models = self.models.ASR_MODELS
+        elif category == ModelCategory.TEXT_TO_SPEECH:
+            all_models = self.models.TTS_MODELS
+        elif category == ModelCategory.IMAGE_CLASSIFICATION:
+            all_models = self.models.IMAGE_CLASSIFICATION_MODELS
+        elif category == ModelCategory.FEATURE_EXTRACTION:
+            all_models = self.models.FEATURE_EXTRACTION_MODELS
+        elif category == ModelCategory.TRANSLATION:
+            all_models = self.models.TRANSLATION_MODELS
+        elif category == ModelCategory.SUMMARIZATION:
+            all_models = self.models.SUMMARIZATION_MODELS
+        return all_models
+    def get_all_models(self) -> Dict[ModelCategory, List[HFModel]]:
+        """Get all available models organized by category"""
+        return {
+            # Core AI categories
+            ModelCategory.TEXT_GENERATION: self.models.TEXT_GENERATION_MODELS,
+            ModelCategory.TEXT_TO_IMAGE: self.models.TEXT_TO_IMAGE_MODELS,
+            ModelCategory.AUTOMATIC_SPEECH_RECOGNITION: self.models.ASR_MODELS,
+            ModelCategory.TEXT_TO_SPEECH: self.models.TTS_MODELS,
+            ModelCategory.IMAGE_CLASSIFICATION: self.models.IMAGE_CLASSIFICATION_MODELS,
+            ModelCategory.FEATURE_EXTRACTION: self.models.FEATURE_EXTRACTION_MODELS,
+            ModelCategory.TRANSLATION: self.models.TRANSLATION_MODELS,
+            ModelCategory.SUMMARIZATION: self.models.SUMMARIZATION_MODELS,
+            # Video and Motion
+            ModelCategory.TEXT_TO_VIDEO: self.models.VIDEO_GENERATION_MODELS,
+            ModelCategory.VIDEO_GENERATION: self.models.VIDEO_GENERATION_MODELS,
+            ModelCategory.VIDEO_TO_TEXT: self.models.VIDEO_GENERATION_MODELS,
+            ModelCategory.VIDEO_CLASSIFICATION: self.models.VIDEO_GENERATION_MODELS,
+            # Code and Development
+            ModelCategory.CODE_GENERATION: self.models.CODE_GENERATION_MODELS,
+            ModelCategory.CODE_COMPLETION: self.models.CODE_GENERATION_MODELS,
+            ModelCategory.CODE_EXPLANATION: self.models.CODE_GENERATION_MODELS,
+            ModelCategory.APP_GENERATION: self.models.CODE_GENERATION_MODELS,
+            # 3D and AR/VR
+            ModelCategory.TEXT_TO_3D: self.models.THREE_D_MODELS,
+            ModelCategory.IMAGE_TO_3D: self.models.THREE_D_MODELS,
+            ModelCategory.THREE_D_GENERATION: self.models.THREE_D_MODELS,
+            ModelCategory.MESH_GENERATION: self.models.THREE_D_MODELS,
+            # Document Processing
+            ModelCategory.OCR: self.models.DOCUMENT_PROCESSING_MODELS,
+            ModelCategory.DOCUMENT_ANALYSIS: self.models.DOCUMENT_PROCESSING_MODELS,
+            ModelCategory.HANDWRITING_RECOGNITION: self.models.DOCUMENT_PROCESSING_MODELS,
+            ModelCategory.TABLE_EXTRACTION: self.models.DOCUMENT_PROCESSING_MODELS,
+            ModelCategory.FORM_PROCESSING: self.models.DOCUMENT_PROCESSING_MODELS,
+            # Multimodal AI
+            ModelCategory.VISION_LANGUAGE: self.models.MULTIMODAL_MODELS,
+            ModelCategory.MULTIMODAL_REASONING: self.models.MULTIMODAL_MODELS,
+            ModelCategory.VISUAL_QUESTION_ANSWERING: self.models.MULTIMODAL_MODELS,
+            ModelCategory.MULTIMODAL_CHAT: self.models.MULTIMODAL_MODELS,
+            ModelCategory.CROSS_MODAL_GENERATION: self.models.MULTIMODAL_MODELS,
+            # Specialized AI
+            ModelCategory.MUSIC_GENERATION: self.models.SPECIALIZED_AI_MODELS,
+            ModelCategory.VOICE_CLONING: self.models.SPECIALIZED_AI_MODELS,
+            ModelCategory.SUPER_RESOLUTION: self.models.SPECIALIZED_AI_MODELS,
+            ModelCategory.FACE_RESTORATION: self.models.SPECIALIZED_AI_MODELS,
+            ModelCategory.IMAGE_INPAINTING: self.models.SPECIALIZED_AI_MODELS,
+            ModelCategory.BACKGROUND_REMOVAL: self.models.SPECIALIZED_AI_MODELS,
+            # Creative Content
+            ModelCategory.CREATIVE_WRITING: self.models.CREATIVE_CONTENT_MODELS,
+            ModelCategory.STORY_GENERATION: self.models.CREATIVE_CONTENT_MODELS,
+            ModelCategory.POETRY_GENERATION: self.models.CREATIVE_CONTENT_MODELS,
+            ModelCategory.BLOG_WRITING: self.models.CREATIVE_CONTENT_MODELS,
+            ModelCategory.MARKETING_COPY: self.models.CREATIVE_CONTENT_MODELS,
+            # Game Development
+            ModelCategory.GAME_ASSET_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
+            ModelCategory.CHARACTER_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
+            ModelCategory.LEVEL_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
+            ModelCategory.DIALOGUE_GENERATION: self.models.GAME_DEVELOPMENT_MODELS,
+            # Science and Research
+            ModelCategory.PROTEIN_FOLDING: self.models.SCIENCE_RESEARCH_MODELS,
+            ModelCategory.MOLECULE_GENERATION: self.models.SCIENCE_RESEARCH_MODELS,
+            ModelCategory.SCIENTIFIC_WRITING: self.models.SCIENCE_RESEARCH_MODELS,
+            ModelCategory.RESEARCH_ASSISTANCE: self.models.SCIENCE_RESEARCH_MODELS,
+            ModelCategory.DATA_ANALYSIS: self.models.SCIENCE_RESEARCH_MODELS,
+            # Business and Productivity
+            ModelCategory.EMAIL_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
+            ModelCategory.PRESENTATION_CREATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
+            ModelCategory.REPORT_GENERATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
+            ModelCategory.MEETING_SUMMARIZATION: self.models.BUSINESS_PRODUCTIVITY_MODELS,
+            ModelCategory.PROJECT_PLANNING: self.models.BUSINESS_PRODUCTIVITY_MODELS,
+        }
+    def get_model_by_id(self, model_id: str) -> Optional[HFModel]:
+        """Find a model by its Hugging Face model ID"""
+        for models_list in self.get_all_models().values():
+            for model in models_list:
+                if model.model_id == model_id:
+                    return model
+        return None
+    async def call_model(self, model_id: str, category: ModelCategory, **kwargs) -> Any:
+        """Call a Hugging Face model with the appropriate method based on category"""
+        async with HuggingFaceInference(self.api_token) as hf:
+            if category == ModelCategory.TEXT_GENERATION:
+                return await hf.text_generation(model_id, **kwargs)
+            elif category == ModelCategory.TEXT_TO_IMAGE:
+                return await hf.text_to_image(model_id, **kwargs)
+            elif category == ModelCategory.AUTOMATIC_SPEECH_RECOGNITION:
+                return await hf.automatic_speech_recognition(model_id, **kwargs)
+            elif category == ModelCategory.TEXT_TO_SPEECH:
+                return await hf.text_to_speech(model_id, **kwargs)
+            elif category == ModelCategory.IMAGE_CLASSIFICATION:
+                return await hf.image_classification(model_id, **kwargs)
+            elif category == ModelCategory.FEATURE_EXTRACTION:
+                return await hf.feature_extraction(model_id, **kwargs)
+            elif category == ModelCategory.TRANSLATION:
+                return await hf.translation(model_id, **kwargs)
+            elif category == ModelCategory.SUMMARIZATION:
+                return await hf.summarization(model_id, **kwargs)
+            elif category == ModelCategory.QUESTION_ANSWERING:
+                return await hf.question_answering(model_id, **kwargs)
+            elif category == ModelCategory.ZERO_SHOT_CLASSIFICATION:
+                return await hf.zero_shot_classification(model_id, **kwargs)
+            elif category == ModelCategory.CONVERSATIONAL:
+                return await hf.conversational(model_id, **kwargs)
+            # Video and Motion categories
+            elif category in [
+                ModelCategory.TEXT_TO_VIDEO,
+                ModelCategory.VIDEO_GENERATION,
+            ]:
+                return await hf.text_to_video(model_id, **kwargs)
+            elif category == ModelCategory.VIDEO_TO_TEXT:
+                return await hf.video_to_text(model_id, **kwargs)
+            elif category == ModelCategory.VIDEO_CLASSIFICATION:
+                return await hf.image_classification(
+                    model_id, **kwargs
+                )  # Similar to image classification
+            # Code and Development categories
+            elif category in [
+                ModelCategory.CODE_GENERATION,
+                ModelCategory.APP_GENERATION,
+            ]:
+                return await hf.code_generation(model_id, **kwargs)
+            elif category in [
+                ModelCategory.CODE_COMPLETION,
+                ModelCategory.CODE_EXPLANATION,
+            ]:
+                return await hf.code_completion(model_id, **kwargs)
+            # 3D and AR/VR categories
+            elif category in [
+                ModelCategory.TEXT_TO_3D,
+                ModelCategory.THREE_D_GENERATION,
+            ]:
+                return await hf.text_to_3d(model_id, **kwargs)
+            elif category in [ModelCategory.IMAGE_TO_3D, ModelCategory.MESH_GENERATION]:
+                return await hf.image_to_3d(model_id, **kwargs)
+            # Document Processing categories
+            elif category == ModelCategory.OCR:
+                return await hf.ocr(model_id, **kwargs)
+            elif category in [
+                ModelCategory.DOCUMENT_ANALYSIS,
+                ModelCategory.FORM_PROCESSING,
+                ModelCategory.TABLE_EXTRACTION,
+                ModelCategory.LAYOUT_ANALYSIS,
+            ]:
+                return await hf.document_analysis(model_id, **kwargs)
+            elif category == ModelCategory.HANDWRITING_RECOGNITION:
+                return await hf.ocr(model_id, **kwargs)  # Similar to OCR
+            # Multimodal AI categories
+            elif category in [
+                ModelCategory.VISION_LANGUAGE,
+                ModelCategory.VISUAL_QUESTION_ANSWERING,
+                ModelCategory.IMAGE_TEXT_MATCHING,
+            ]:
+                return await hf.vision_language(model_id, **kwargs)
+            elif category in [
+                ModelCategory.MULTIMODAL_REASONING,
+                ModelCategory.MULTIMODAL_CHAT,
+                ModelCategory.CROSS_MODAL_GENERATION,
+            ]:
+                return await hf.multimodal_reasoning(model_id, **kwargs)
+            # Specialized AI categories
+            elif category == ModelCategory.MUSIC_GENERATION:
+                return await hf.music_generation(model_id, **kwargs)
+            elif category == ModelCategory.VOICE_CLONING:
+                return await hf.voice_cloning(model_id, **kwargs)
+            elif category == ModelCategory.SUPER_RESOLUTION:
+                return await hf.super_resolution(model_id, **kwargs)
+            elif category in [
+                ModelCategory.FACE_RESTORATION,
+                ModelCategory.IMAGE_INPAINTING,
+                ModelCategory.IMAGE_OUTPAINTING,
+            ]:
+                return await hf.super_resolution(
+                    model_id, **kwargs
+                )  # Similar processing
+            elif category == ModelCategory.BACKGROUND_REMOVAL:
+                return await hf.background_removal(model_id, **kwargs)
+            # Creative Content categories
+            elif category in [
+                ModelCategory.CREATIVE_WRITING,
+                ModelCategory.STORY_GENERATION,
+                ModelCategory.POETRY_GENERATION,
+                ModelCategory.SCREENPLAY_WRITING,
+            ]:
+                return await hf.creative_writing(model_id, **kwargs)
+            elif category in [ModelCategory.BLOG_WRITING, ModelCategory.MARKETING_COPY]:
+                return await hf.text_generation(
+                    model_id, **kwargs
+                )  # Use standard text generation
+            # Game Development categories
+            elif category in [
+                ModelCategory.CHARACTER_GENERATION,
+                ModelCategory.LEVEL_GENERATION,
+                ModelCategory.DIALOGUE_GENERATION,
+                ModelCategory.GAME_ASSET_GENERATION,
+            ]:
+                return await hf.creative_writing(
+                    model_id, **kwargs
+                )  # Creative generation
+            # Science and Research categories
+            elif category in [
+                ModelCategory.PROTEIN_FOLDING,
+                ModelCategory.MOLECULE_GENERATION,
+            ]:
+                return await hf.text_generation(
+                    model_id, **kwargs
+                )  # Specialized text generation
+            elif category in [
+                ModelCategory.SCIENTIFIC_WRITING,
+                ModelCategory.RESEARCH_ASSISTANCE,
+                ModelCategory.DATA_ANALYSIS,
+            ]:
+                return await hf.text_generation(model_id, **kwargs)
+            # Business and Productivity categories
+            elif category in [
+                ModelCategory.EMAIL_GENERATION,
+                ModelCategory.PRESENTATION_CREATION,
+                ModelCategory.REPORT_GENERATION,
+                ModelCategory.MEETING_SUMMARIZATION,
+                ModelCategory.PROJECT_PLANNING,
+            ]:
+                return await hf.business_document(model_id, category.value, **kwargs)
+            else:
+                raise ValueError(f"Unsupported model category: {category}")

app/llm.py ADDED Viewed

	@@ -0,0 +1,766 @@

+import math
+from typing import Dict, List, Optional, Union
+import tiktoken
+from openai import (
+    APIError,
+    AsyncAzureOpenAI,
+    AsyncOpenAI,
+    AuthenticationError,
+    OpenAIError,
+    RateLimitError,
+)
+from openai.types.chat import ChatCompletion, ChatCompletionMessage
+from tenacity import (
+    retry,
+    retry_if_exception_type,
+    stop_after_attempt,
+    wait_random_exponential,
+)
+from app.bedrock import BedrockClient
+from app.config import LLMSettings, config
+from app.exceptions import TokenLimitExceeded
+from app.logger import logger  # Assuming a logger is set up in your app
+from app.schema import (
+    ROLE_VALUES,
+    TOOL_CHOICE_TYPE,
+    TOOL_CHOICE_VALUES,
+    Message,
+    ToolChoice,
+)
+REASONING_MODELS = ["o1", "o3-mini"]
+MULTIMODAL_MODELS = [
+    "gpt-4-vision-preview",
+    "gpt-4o",
+    "gpt-4o-mini",
+    "claude-3-opus-20240229",
+    "claude-3-sonnet-20240229",
+    "claude-3-haiku-20240307",
+]
+class TokenCounter:
+    # Token constants
+    BASE_MESSAGE_TOKENS = 4
+    FORMAT_TOKENS = 2
+    LOW_DETAIL_IMAGE_TOKENS = 85
+    HIGH_DETAIL_TILE_TOKENS = 170
+    # Image processing constants
+    MAX_SIZE = 2048
+    HIGH_DETAIL_TARGET_SHORT_SIDE = 768
+    TILE_SIZE = 512
+    def __init__(self, tokenizer):
+        self.tokenizer = tokenizer
+    def count_text(self, text: str) -> int:
+        """Calculate tokens for a text string"""
+        return 0 if not text else len(self.tokenizer.encode(text))
+    def count_image(self, image_item: dict) -> int:
+        """
+        Calculate tokens for an image based on detail level and dimensions
+        For "low" detail: fixed 85 tokens
+        For "high" detail:
+        1. Scale to fit in 2048x2048 square
+        2. Scale shortest side to 768px
+        3. Count 512px tiles (170 tokens each)
+        4. Add 85 tokens
+        """
+        detail = image_item.get("detail", "medium")
+        # For low detail, always return fixed token count
+        if detail == "low":
+            return self.LOW_DETAIL_IMAGE_TOKENS
+        # For medium detail (default in OpenAI), use high detail calculation
+        # OpenAI doesn't specify a separate calculation for medium
+        # For high detail, calculate based on dimensions if available
+        if detail == "high" or detail == "medium":
+            # If dimensions are provided in the image_item
+            if "dimensions" in image_item:
+                width, height = image_item["dimensions"]
+                return self._calculate_high_detail_tokens(width, height)
+        return (
+            self._calculate_high_detail_tokens(1024, 1024) if detail == "high" else 1024
+        )
+    def _calculate_high_detail_tokens(self, width: int, height: int) -> int:
+        """Calculate tokens for high detail images based on dimensions"""
+        # Step 1: Scale to fit in MAX_SIZE x MAX_SIZE square
+        if width > self.MAX_SIZE or height > self.MAX_SIZE:
+            scale = self.MAX_SIZE / max(width, height)
+            width = int(width * scale)
+            height = int(height * scale)
+        # Step 2: Scale so shortest side is HIGH_DETAIL_TARGET_SHORT_SIDE
+        scale = self.HIGH_DETAIL_TARGET_SHORT_SIDE / min(width, height)
+        scaled_width = int(width * scale)
+        scaled_height = int(height * scale)
+        # Step 3: Count number of 512px tiles
+        tiles_x = math.ceil(scaled_width / self.TILE_SIZE)
+        tiles_y = math.ceil(scaled_height / self.TILE_SIZE)
+        total_tiles = tiles_x * tiles_y
+        # Step 4: Calculate final token count
+        return (
+            total_tiles * self.HIGH_DETAIL_TILE_TOKENS
+        ) + self.LOW_DETAIL_IMAGE_TOKENS
+    def count_content(self, content: Union[str, List[Union[str, dict]]]) -> int:
+        """Calculate tokens for message content"""
+        if not content:
+            return 0
+        if isinstance(content, str):
+            return self.count_text(content)
+        token_count = 0
+        for item in content:
+            if isinstance(item, str):
+                token_count += self.count_text(item)
+            elif isinstance(item, dict):
+                if "text" in item:
+                    token_count += self.count_text(item["text"])
+                elif "image_url" in item:
+                    token_count += self.count_image(item)
+        return token_count
+    def count_tool_calls(self, tool_calls: List[dict]) -> int:
+        """Calculate tokens for tool calls"""
+        token_count = 0
+        for tool_call in tool_calls:
+            if "function" in tool_call:
+                function = tool_call["function"]
+                token_count += self.count_text(function.get("name", ""))
+                token_count += self.count_text(function.get("arguments", ""))
+        return token_count
+    def count_message_tokens(self, messages: List[dict]) -> int:
+        """Calculate the total number of tokens in a message list"""
+        total_tokens = self.FORMAT_TOKENS  # Base format tokens
+        for message in messages:
+            tokens = self.BASE_MESSAGE_TOKENS  # Base tokens per message
+            # Add role tokens
+            tokens += self.count_text(message.get("role", ""))
+            # Add content tokens
+            if "content" in message:
+                tokens += self.count_content(message["content"])
+            # Add tool calls tokens
+            if "tool_calls" in message:
+                tokens += self.count_tool_calls(message["tool_calls"])
+            # Add name and tool_call_id tokens
+            tokens += self.count_text(message.get("name", ""))
+            tokens += self.count_text(message.get("tool_call_id", ""))
+            total_tokens += tokens
+        return total_tokens
+class LLM:
+    _instances: Dict[str, "LLM"] = {}
+    def __new__(
+        cls, config_name: str = "default", llm_config: Optional[LLMSettings] = None
+    ):
+        if config_name not in cls._instances:
+            instance = super().__new__(cls)
+            instance.__init__(config_name, llm_config)
+            cls._instances[config_name] = instance
+        return cls._instances[config_name]
+    def __init__(
+        self, config_name: str = "default", llm_config: Optional[LLMSettings] = None
+    ):
+        if not hasattr(self, "client"):  # Only initialize if not already initialized
+            llm_config = llm_config or config.llm
+            llm_config = llm_config.get(config_name, llm_config["default"])
+            self.model = llm_config.model
+            self.max_tokens = llm_config.max_tokens
+            self.temperature = llm_config.temperature
+            self.api_type = llm_config.api_type
+            self.api_key = llm_config.api_key
+            self.api_version = llm_config.api_version
+            self.base_url = llm_config.base_url
+            # Add token counting related attributes
+            self.total_input_tokens = 0
+            self.total_completion_tokens = 0
+            self.max_input_tokens = (
+                llm_config.max_input_tokens
+                if hasattr(llm_config, "max_input_tokens")
+                else None
+            )
+            # Initialize tokenizer
+            try:
+                self.tokenizer = tiktoken.encoding_for_model(self.model)
+            except KeyError:
+                # If the model is not in tiktoken's presets, use cl100k_base as default
+                self.tokenizer = tiktoken.get_encoding("cl100k_base")
+            if self.api_type == "azure":
+                self.client = AsyncAzureOpenAI(
+                    base_url=self.base_url,
+                    api_key=self.api_key,
+                    api_version=self.api_version,
+                )
+            elif self.api_type == "aws":
+                self.client = BedrockClient()
+            else:
+                self.client = AsyncOpenAI(api_key=self.api_key, base_url=self.base_url)
+            self.token_counter = TokenCounter(self.tokenizer)
+    def count_tokens(self, text: str) -> int:
+        """Calculate the number of tokens in a text"""
+        if not text:
+            return 0
+        return len(self.tokenizer.encode(text))
+    def count_message_tokens(self, messages: List[dict]) -> int:
+        return self.token_counter.count_message_tokens(messages)
+    def update_token_count(self, input_tokens: int, completion_tokens: int = 0) -> None:
+        """Update token counts"""
+        # Only track tokens if max_input_tokens is set
+        self.total_input_tokens += input_tokens
+        self.total_completion_tokens += completion_tokens
+        logger.info(
+            f"Token usage: Input={input_tokens}, Completion={completion_tokens}, "
+            f"Cumulative Input={self.total_input_tokens}, Cumulative Completion={self.total_completion_tokens}, "
+            f"Total={input_tokens + completion_tokens}, Cumulative Total={self.total_input_tokens + self.total_completion_tokens}"
+        )
+    def check_token_limit(self, input_tokens: int) -> bool:
+        """Check if token limits are exceeded"""
+        if self.max_input_tokens is not None:
+            return (self.total_input_tokens + input_tokens) <= self.max_input_tokens
+        # If max_input_tokens is not set, always return True
+        return True
+    def get_limit_error_message(self, input_tokens: int) -> str:
+        """Generate error message for token limit exceeded"""
+        if (
+            self.max_input_tokens is not None
+            and (self.total_input_tokens + input_tokens) > self.max_input_tokens
+        ):
+            return f"Request may exceed input token limit (Current: {self.total_input_tokens}, Needed: {input_tokens}, Max: {self.max_input_tokens})"
+        return "Token limit exceeded"
+    @staticmethod
+    def format_messages(
+        messages: List[Union[dict, Message]], supports_images: bool = False
+    ) -> List[dict]:
+        """
+        Format messages for LLM by converting them to OpenAI message format.
+        Args:
+            messages: List of messages that can be either dict or Message objects
+            supports_images: Flag indicating if the target model supports image inputs
+        Returns:
+            List[dict]: List of formatted messages in OpenAI format
+        Raises:
+            ValueError: If messages are invalid or missing required fields
+            TypeError: If unsupported message types are provided
+        Examples:
+            >>> msgs = [
+            ...     Message.system_message("You are a helpful assistant"),
+            ...     {"role": "user", "content": "Hello"},
+            ...     Message.user_message("How are you?")
+            ... ]
+            >>> formatted = LLM.format_messages(msgs)
+        """
+        formatted_messages = []
+        for message in messages:
+            # Convert Message objects to dictionaries
+            if isinstance(message, Message):
+                message = message.to_dict()
+            if isinstance(message, dict):
+                # If message is a dict, ensure it has required fields
+                if "role" not in message:
+                    raise ValueError("Message dict must contain 'role' field")
+                # Process base64 images if present and model supports images
+                if supports_images and message.get("base64_image"):
+                    # Initialize or convert content to appropriate format
+                    if not message.get("content"):
+                        message["content"] = []
+                    elif isinstance(message["content"], str):
+                        message["content"] = [
+                            {"type": "text", "text": message["content"]}
+                        ]
+                    elif isinstance(message["content"], list):
+                        # Convert string items to proper text objects
+                        message["content"] = [
+                            (
+                                {"type": "text", "text": item}
+                                if isinstance(item, str)
+                                else item
+                            )
+                            for item in message["content"]
+                        ]
+                    # Add the image to content
+                    message["content"].append(
+                        {
+                            "type": "image_url",
+                            "image_url": {
+                                "url": f"data:image/jpeg;base64,{message['base64_image']}"
+                            },
+                        }
+                    )
+                    # Remove the base64_image field
+                    del message["base64_image"]
+                # If model doesn't support images but message has base64_image, handle gracefully
+                elif not supports_images and message.get("base64_image"):
+                    # Just remove the base64_image field and keep the text content
+                    del message["base64_image"]
+                if "content" in message or "tool_calls" in message:
+                    formatted_messages.append(message)
+                # else: do not include the message
+            else:
+                raise TypeError(f"Unsupported message type: {type(message)}")
+        # Validate all messages have required fields
+        for msg in formatted_messages:
+            if msg["role"] not in ROLE_VALUES:
+                raise ValueError(f"Invalid role: {msg['role']}")
+        return formatted_messages
+    @retry(
+        wait=wait_random_exponential(min=1, max=60),
+        stop=stop_after_attempt(6),
+        retry=retry_if_exception_type(
+            (OpenAIError, Exception, ValueError)
+        ),  # Don't retry TokenLimitExceeded
+    )
+    async def ask(
+        self,
+        messages: List[Union[dict, Message]],
+        system_msgs: Optional[List[Union[dict, Message]]] = None,
+        stream: bool = True,
+        temperature: Optional[float] = None,
+    ) -> str:
+        """
+        Send a prompt to the LLM and get the response.
+        Args:
+            messages: List of conversation messages
+            system_msgs: Optional system messages to prepend
+            stream (bool): Whether to stream the response
+            temperature (float): Sampling temperature for the response
+        Returns:
+            str: The generated response
+        Raises:
+            TokenLimitExceeded: If token limits are exceeded
+            ValueError: If messages are invalid or response is empty
+            OpenAIError: If API call fails after retries
+            Exception: For unexpected errors
+        """
+        try:
+            # Check if the model supports images
+            supports_images = self.model in MULTIMODAL_MODELS
+            # Format system and user messages with image support check
+            if system_msgs:
+                system_msgs = self.format_messages(system_msgs, supports_images)
+                messages = system_msgs + self.format_messages(messages, supports_images)
+            else:
+                messages = self.format_messages(messages, supports_images)
+            # Calculate input token count
+            input_tokens = self.count_message_tokens(messages)
+            # Check if token limits are exceeded
+            if not self.check_token_limit(input_tokens):
+                error_message = self.get_limit_error_message(input_tokens)
+                # Raise a special exception that won't be retried
+                raise TokenLimitExceeded(error_message)
+            params = {
+                "model": self.model,
+                "messages": messages,
+            }
+            if self.model in REASONING_MODELS:
+                params["max_completion_tokens"] = self.max_tokens
+            else:
+                params["max_tokens"] = self.max_tokens
+                params["temperature"] = (
+                    temperature if temperature is not None else self.temperature
+                )
+            if not stream:
+                # Non-streaming request
+                response = await self.client.chat.completions.create(
+                    **params, stream=False
+                )
+                if not response.choices or not response.choices[0].message.content:
+                    raise ValueError("Empty or invalid response from LLM")
+                # Update token counts
+                self.update_token_count(
+                    response.usage.prompt_tokens, response.usage.completion_tokens
+                )
+                return response.choices[0].message.content
+            # Streaming request, For streaming, update estimated token count before making the request
+            self.update_token_count(input_tokens)
+            response = await self.client.chat.completions.create(**params, stream=True)
+            collected_messages = []
+            completion_text = ""
+            async for chunk in response:
+                chunk_message = chunk.choices[0].delta.content or ""
+                collected_messages.append(chunk_message)
+                completion_text += chunk_message
+                print(chunk_message, end="", flush=True)
+            print()  # Newline after streaming
+            full_response = "".join(collected_messages).strip()
+            if not full_response:
+                raise ValueError("Empty response from streaming LLM")
+            # estimate completion tokens for streaming response
+            completion_tokens = self.count_tokens(completion_text)
+            logger.info(
+                f"Estimated completion tokens for streaming response: {completion_tokens}"
+            )
+            self.total_completion_tokens += completion_tokens
+            return full_response
+        except TokenLimitExceeded:
+            # Re-raise token limit errors without logging
+            raise
+        except ValueError:
+            logger.exception(f"Validation error")
+            raise
+        except OpenAIError as oe:
+            logger.exception(f"OpenAI API error")
+            if isinstance(oe, AuthenticationError):
+                logger.error("Authentication failed. Check API key.")
+            elif isinstance(oe, RateLimitError):
+                logger.error("Rate limit exceeded. Consider increasing retry attempts.")
+            elif isinstance(oe, APIError):
+                logger.error(f"API error: {oe}")
+            raise
+        except Exception:
+            logger.exception(f"Unexpected error in ask")
+            raise
+    @retry(
+        wait=wait_random_exponential(min=1, max=60),
+        stop=stop_after_attempt(6),
+        retry=retry_if_exception_type(
+            (OpenAIError, Exception, ValueError)
+        ),  # Don't retry TokenLimitExceeded
+    )
+    async def ask_with_images(
+        self,
+        messages: List[Union[dict, Message]],
+        images: List[Union[str, dict]],
+        system_msgs: Optional[List[Union[dict, Message]]] = None,
+        stream: bool = False,
+        temperature: Optional[float] = None,
+    ) -> str:
+        """
+        Send a prompt with images to the LLM and get the response.
+        Args:
+            messages: List of conversation messages
+            images: List of image URLs or image data dictionaries
+            system_msgs: Optional system messages to prepend
+            stream (bool): Whether to stream the response
+            temperature (float): Sampling temperature for the response
+        Returns:
+            str: The generated response
+        Raises:
+            TokenLimitExceeded: If token limits are exceeded
+            ValueError: If messages are invalid or response is empty
+            OpenAIError: If API call fails after retries
+            Exception: For unexpected errors
+        """
+        try:
+            # For ask_with_images, we always set supports_images to True because
+            # this method should only be called with models that support images
+            if self.model not in MULTIMODAL_MODELS:
+                raise ValueError(
+                    f"Model {self.model} does not support images. Use a model from {MULTIMODAL_MODELS}"
+                )
+            # Format messages with image support
+            formatted_messages = self.format_messages(messages, supports_images=True)
+            # Ensure the last message is from the user to attach images
+            if not formatted_messages or formatted_messages[-1]["role"] != "user":
+                raise ValueError(
+                    "The last message must be from the user to attach images"
+                )
+            # Process the last user message to include images
+            last_message = formatted_messages[-1]
+            # Convert content to multimodal format if needed
+            content = last_message["content"]
+            multimodal_content = (
+                [{"type": "text", "text": content}]
+                if isinstance(content, str)
+                else content
+                if isinstance(content, list)
+                else []
+            )
+            # Add images to content
+            for image in images:
+                if isinstance(image, str):
+                    multimodal_content.append(
+                        {"type": "image_url", "image_url": {"url": image}}
+                    )
+                elif isinstance(image, dict) and "url" in image:
+                    multimodal_content.append({"type": "image_url", "image_url": image})
+                elif isinstance(image, dict) and "image_url" in image:
+                    multimodal_content.append(image)
+                else:
+                    raise ValueError(f"Unsupported image format: {image}")
+            # Update the message with multimodal content
+            last_message["content"] = multimodal_content
+            # Add system messages if provided
+            if system_msgs:
+                all_messages = (
+                    self.format_messages(system_msgs, supports_images=True)
+                    + formatted_messages
+                )
+            else:
+                all_messages = formatted_messages
+            # Calculate tokens and check limits
+            input_tokens = self.count_message_tokens(all_messages)
+            if not self.check_token_limit(input_tokens):
+                raise TokenLimitExceeded(self.get_limit_error_message(input_tokens))
+            # Set up API parameters
+            params = {
+                "model": self.model,
+                "messages": all_messages,
+                "stream": stream,
+            }
+            # Add model-specific parameters
+            if self.model in REASONING_MODELS:
+                params["max_completion_tokens"] = self.max_tokens
+            else:
+                params["max_tokens"] = self.max_tokens
+                params["temperature"] = (
+                    temperature if temperature is not None else self.temperature
+                )
+            # Handle non-streaming request
+            if not stream:
+                response = await self.client.chat.completions.create(**params)
+                if not response.choices or not response.choices[0].message.content:
+                    raise ValueError("Empty or invalid response from LLM")
+                self.update_token_count(response.usage.prompt_tokens)
+                return response.choices[0].message.content
+            # Handle streaming request
+            self.update_token_count(input_tokens)
+            response = await self.client.chat.completions.create(**params)
+            collected_messages = []
+            async for chunk in response:
+                chunk_message = chunk.choices[0].delta.content or ""
+                collected_messages.append(chunk_message)
+                print(chunk_message, end="", flush=True)
+            print()  # Newline after streaming
+            full_response = "".join(collected_messages).strip()
+            if not full_response:
+                raise ValueError("Empty response from streaming LLM")
+            return full_response
+        except TokenLimitExceeded:
+            raise
+        except ValueError as ve:
+            logger.error(f"Validation error in ask_with_images: {ve}")
+            raise
+        except OpenAIError as oe:
+            logger.error(f"OpenAI API error: {oe}")
+            if isinstance(oe, AuthenticationError):
+                logger.error("Authentication failed. Check API key.")
+            elif isinstance(oe, RateLimitError):
+                logger.error("Rate limit exceeded. Consider increasing retry attempts.")
+            elif isinstance(oe, APIError):
+                logger.error(f"API error: {oe}")
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error in ask_with_images: {e}")
+            raise
+    @retry(
+        wait=wait_random_exponential(min=1, max=60),
+        stop=stop_after_attempt(6),
+        retry=retry_if_exception_type(
+            (OpenAIError, Exception, ValueError)
+        ),  # Don't retry TokenLimitExceeded
+    )
+    async def ask_tool(
+        self,
+        messages: List[Union[dict, Message]],
+        system_msgs: Optional[List[Union[dict, Message]]] = None,
+        timeout: int = 300,
+        tools: Optional[List[dict]] = None,
+        tool_choice: TOOL_CHOICE_TYPE = ToolChoice.AUTO,  # type: ignore
+        temperature: Optional[float] = None,
+        **kwargs,
+    ) -> ChatCompletionMessage | None:
+        """
+        Ask LLM using functions/tools and return the response.
+        Args:
+            messages: List of conversation messages
+            system_msgs: Optional system messages to prepend
+            timeout: Request timeout in seconds
+            tools: List of tools to use
+            tool_choice: Tool choice strategy
+            temperature: Sampling temperature for the response
+            **kwargs: Additional completion arguments
+        Returns:
+            ChatCompletionMessage: The model's response
+        Raises:
+            TokenLimitExceeded: If token limits are exceeded
+            ValueError: If tools, tool_choice, or messages are invalid
+            OpenAIError: If API call fails after retries
+            Exception: For unexpected errors
+        """
+        try:
+            # Validate tool_choice
+            if tool_choice not in TOOL_CHOICE_VALUES:
+                raise ValueError(f"Invalid tool_choice: {tool_choice}")
+            # Check if the model supports images
+            supports_images = self.model in MULTIMODAL_MODELS
+            # Format messages
+            if system_msgs:
+                system_msgs = self.format_messages(system_msgs, supports_images)
+                messages = system_msgs + self.format_messages(messages, supports_images)
+            else:
+                messages = self.format_messages(messages, supports_images)
+            # Calculate input token count
+            input_tokens = self.count_message_tokens(messages)
+            # If there are tools, calculate token count for tool descriptions
+            tools_tokens = 0
+            if tools:
+                for tool in tools:
+                    tools_tokens += self.count_tokens(str(tool))
+            input_tokens += tools_tokens
+            # Check if token limits are exceeded
+            if not self.check_token_limit(input_tokens):
+                error_message = self.get_limit_error_message(input_tokens)
+                # Raise a special exception that won't be retried
+                raise TokenLimitExceeded(error_message)
+            # Validate tools if provided
+            if tools:
+                for tool in tools:
+                    if not isinstance(tool, dict) or "type" not in tool:
+                        raise ValueError("Each tool must be a dict with 'type' field")
+            # Set up the completion request
+            params = {
+                "model": self.model,
+                "messages": messages,
+                "tools": tools,
+                "tool_choice": tool_choice,
+                "timeout": timeout,
+                **kwargs,
+            }
+            if self.model in REASONING_MODELS:
+                params["max_completion_tokens"] = self.max_tokens
+            else:
+                params["max_tokens"] = self.max_tokens
+                params["temperature"] = (
+                    temperature if temperature is not None else self.temperature
+                )
+            params["stream"] = False  # Always use non-streaming for tool requests
+            response: ChatCompletion = await self.client.chat.completions.create(
+                **params
+            )
+            # Check if response is valid
+            if not response.choices or not response.choices[0].message:
+                print(response)
+                # raise ValueError("Invalid or empty response from LLM")
+                return None
+            # Update token counts
+            self.update_token_count(
+                response.usage.prompt_tokens, response.usage.completion_tokens
+            )
+            return response.choices[0].message
+        except TokenLimitExceeded:
+            # Re-raise token limit errors without logging
+            raise
+        except ValueError as ve:
+            logger.error(f"Validation error in ask_tool: {ve}")
+            raise
+        except OpenAIError as oe:
+            logger.error(f"OpenAI API error: {oe}")
+            if isinstance(oe, AuthenticationError):
+                logger.error("Authentication failed. Check API key.")
+            elif isinstance(oe, RateLimitError):
+                logger.error("Rate limit exceeded. Consider increasing retry attempts.")
+            elif isinstance(oe, APIError):
+                logger.error(f"API error: {oe}")
+            raise
+        except Exception as e:
+            logger.error(f"Unexpected error in ask_tool: {e}")
+            raise

app/logger.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import sys
+from datetime import datetime
+from loguru import logger as _logger
+from app.config import PROJECT_ROOT
+_print_level = "INFO"
+def define_log_level(print_level="INFO", logfile_level="DEBUG", name: str = None):
+    """Adjust the log level to above level"""
+    global _print_level
+    _print_level = print_level
+    current_date = datetime.now()
+    formatted_date = current_date.strftime("%Y%m%d%H%M%S")
+    log_name = (
+        f"{name}_{formatted_date}" if name else formatted_date
+    )  # name a log with prefix name
+    _logger.remove()
+    _logger.add(sys.stderr, level=print_level)
+    _logger.add(PROJECT_ROOT / f"logs/{log_name}.log", level=logfile_level)
+    return _logger
+logger = define_log_level()
+if __name__ == "__main__":
+    logger.info("Starting application")
+    logger.debug("Debug message")
+    logger.warning("Warning message")
+    logger.error("Error message")
+    logger.critical("Critical message")
+    try:
+        raise ValueError("Test error")
+    except Exception as e:
+        logger.exception(f"An error occurred: {e}")

app/mcp/__init__.py ADDED Viewed

File without changes

app/mcp/server.py ADDED Viewed

	@@ -0,0 +1,180 @@

+import logging
+import sys
+logging.basicConfig(level=logging.INFO, handlers=[logging.StreamHandler(sys.stderr)])
+import argparse
+import asyncio
+import atexit
+import json
+from inspect import Parameter, Signature
+from typing import Any, Dict, Optional
+from mcp.server.fastmcp import FastMCP
+from app.logger import logger
+from app.tool.base import BaseTool
+from app.tool.bash import Bash
+from app.tool.browser_use_tool import BrowserUseTool
+from app.tool.str_replace_editor import StrReplaceEditor
+from app.tool.terminate import Terminate
+class MCPServer:
+    """MCP Server implementation with tool registration and management."""
+    def __init__(self, name: str = "openmanus"):
+        self.server = FastMCP(name)
+        self.tools: Dict[str, BaseTool] = {}
+        # Initialize standard tools
+        self.tools["bash"] = Bash()
+        self.tools["browser"] = BrowserUseTool()
+        self.tools["editor"] = StrReplaceEditor()
+        self.tools["terminate"] = Terminate()
+    def register_tool(self, tool: BaseTool, method_name: Optional[str] = None) -> None:
+        """Register a tool with parameter validation and documentation."""
+        tool_name = method_name or tool.name
+        tool_param = tool.to_param()
+        tool_function = tool_param["function"]
+        # Define the async function to be registered
+        async def tool_method(**kwargs):
+            logger.info(f"Executing {tool_name}: {kwargs}")
+            result = await tool.execute(**kwargs)
+            logger.info(f"Result of {tool_name}: {result}")
+            # Handle different types of results (match original logic)
+            if hasattr(result, "model_dump"):
+                return json.dumps(result.model_dump())
+            elif isinstance(result, dict):
+                return json.dumps(result)
+            return result
+        # Set method metadata
+        tool_method.__name__ = tool_name
+        tool_method.__doc__ = self._build_docstring(tool_function)
+        tool_method.__signature__ = self._build_signature(tool_function)
+        # Store parameter schema (important for tools that access it programmatically)
+        param_props = tool_function.get("parameters", {}).get("properties", {})
+        required_params = tool_function.get("parameters", {}).get("required", [])
+        tool_method._parameter_schema = {
+            param_name: {
+                "description": param_details.get("description", ""),
+                "type": param_details.get("type", "any"),
+                "required": param_name in required_params,
+            }
+            for param_name, param_details in param_props.items()
+        }
+        # Register with server
+        self.server.tool()(tool_method)
+        logger.info(f"Registered tool: {tool_name}")
+    def _build_docstring(self, tool_function: dict) -> str:
+        """Build a formatted docstring from tool function metadata."""
+        description = tool_function.get("description", "")
+        param_props = tool_function.get("parameters", {}).get("properties", {})
+        required_params = tool_function.get("parameters", {}).get("required", [])
+        # Build docstring (match original format)
+        docstring = description
+        if param_props:
+            docstring += "\n\nParameters:\n"
+            for param_name, param_details in param_props.items():
+                required_str = (
+                    "(required)" if param_name in required_params else "(optional)"
+                )
+                param_type = param_details.get("type", "any")
+                param_desc = param_details.get("description", "")
+                docstring += (
+                    f"    {param_name} ({param_type}) {required_str}: {param_desc}\n"
+                )
+        return docstring
+    def _build_signature(self, tool_function: dict) -> Signature:
+        """Build a function signature from tool function metadata."""
+        param_props = tool_function.get("parameters", {}).get("properties", {})
+        required_params = tool_function.get("parameters", {}).get("required", [])
+        parameters = []
+        # Follow original type mapping
+        for param_name, param_details in param_props.items():
+            param_type = param_details.get("type", "")
+            default = Parameter.empty if param_name in required_params else None
+            # Map JSON Schema types to Python types (same as original)
+            annotation = Any
+            if param_type == "string":
+                annotation = str
+            elif param_type == "integer":
+                annotation = int
+            elif param_type == "number":
+                annotation = float
+            elif param_type == "boolean":
+                annotation = bool
+            elif param_type == "object":
+                annotation = dict
+            elif param_type == "array":
+                annotation = list
+            # Create parameter with same structure as original
+            param = Parameter(
+                name=param_name,
+                kind=Parameter.KEYWORD_ONLY,
+                default=default,
+                annotation=annotation,
+            )
+            parameters.append(param)
+        return Signature(parameters=parameters)
+    async def cleanup(self) -> None:
+        """Clean up server resources."""
+        logger.info("Cleaning up resources")
+        # Follow original cleanup logic - only clean browser tool
+        if "browser" in self.tools and hasattr(self.tools["browser"], "cleanup"):
+            await self.tools["browser"].cleanup()
+    def register_all_tools(self) -> None:
+        """Register all tools with the server."""
+        for tool in self.tools.values():
+            self.register_tool(tool)
+    def run(self, transport: str = "stdio") -> None:
+        """Run the MCP server."""
+        # Register all tools
+        self.register_all_tools()
+        # Register cleanup function (match original behavior)
+        atexit.register(lambda: asyncio.run(self.cleanup()))
+        # Start server (with same logging as original)
+        logger.info(f"Starting OpenManus server ({transport} mode)")
+        self.server.run(transport=transport)
+def parse_args() -> argparse.Namespace:
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(description="OpenManus MCP Server")
+    parser.add_argument(
+        "--transport",
+        choices=["stdio"],
+        default="stdio",
+        help="Communication method: stdio or http (default: stdio)",
+    )
+    return parser.parse_args()
+if __name__ == "__main__":
+    args = parse_args()
+    # Create and run server (maintaining original flow)
+    server = MCPServer()
+    server.run(transport=args.transport)

app/production_config.py ADDED Viewed

	@@ -0,0 +1,363 @@

+"""
+Complete Configuration for OpenManus Production Deployment
+Includes: All model configurations, agent settings, category mappings, and service configurations
+"""
+import os
+from typing import Dict, List, Optional, Any
+from dataclasses import dataclass
+from enum import Enum
+@dataclass
+class ModelConfig:
+    """Configuration for individual AI models"""
+    name: str
+    category: str
+    api_endpoint: str
+    max_tokens: int = 4096
+    temperature: float = 0.7
+    supported_formats: List[str] = None
+    special_parameters: Dict[str, Any] = None
+    rate_limit: int = 100  # requests per minute
+class CategoryConfig:
+    """Configuration for model categories"""
+    # Core AI Models - Text Generation (Qwen, DeepSeek, etc.)
+    TEXT_GENERATION_MODELS = {
+        # Qwen Models (35 models)
+        "qwen/qwen-2.5-72b-instruct": ModelConfig(
+            name="Qwen 2.5 72B Instruct",
+            category="text-generation",
+            api_endpoint="https://api-inference.huggingface.co/models/Qwen/Qwen2.5-72B-Instruct",
+            max_tokens=8192,
+            temperature=0.7,
+        ),
+        "qwen/qwen-2.5-32b-instruct": ModelConfig(
+            name="Qwen 2.5 32B Instruct",
+            category="text-generation",
+            api_endpoint="https://api-inference.huggingface.co/models/Qwen/Qwen2.5-32B-Instruct",
+            max_tokens=8192,
+        ),
+        "qwen/qwen-2.5-14b-instruct": ModelConfig(
+            name="Qwen 2.5 14B Instruct",
+            category="text-generation",
+            api_endpoint="https://api-inference.huggingface.co/models/Qwen/Qwen2.5-14B-Instruct",
+            max_tokens=8192,
+        ),
+        "qwen/qwen-2.5-7b-instruct": ModelConfig(
+            name="Qwen 2.5 7B Instruct",
+            category="text-generation",
+            api_endpoint="https://api-inference.huggingface.co/models/Qwen/Qwen2.5-7B-Instruct",
+        ),
+        "qwen/qwen-2.5-3b-instruct": ModelConfig(
+            name="Qwen 2.5 3B Instruct",
+            category="text-generation",
+            api_endpoint="https://api-inference.huggingface.co/models/Qwen/Qwen2.5-3B-Instruct",
+        ),
+        "qwen/qwen-2.5-1.5b-instruct": ModelConfig(
+            name="Qwen 2.5 1.5B Instruct",
+            category="text-generation",
+            api_endpoint="https://api-inference.huggingface.co/models/Qwen/Qwen2.5-1.5B-Instruct",
+        ),
+        "qwen/qwen-2.5-0.5b-instruct": ModelConfig(
+            name="Qwen 2.5 0.5B Instruct",
+            category="text-generation",
+            api_endpoint="https://api-inference.huggingface.co/models/Qwen/Qwen2.5-0.5B-Instruct",
+        ),
+        # ... (Add all 35 Qwen models)
+        # DeepSeek Models (17 models)
+        "deepseek-ai/deepseek-coder-33b-instruct": ModelConfig(
+            name="DeepSeek Coder 33B Instruct",
+            category="code-generation",
+            api_endpoint="https://api-inference.huggingface.co/models/deepseek-ai/deepseek-coder-33b-instruct",
+            max_tokens=8192,
+            special_parameters={"code_focused": True},
+        ),
+        "deepseek-ai/deepseek-coder-6.7b-instruct": ModelConfig(
+            name="DeepSeek Coder 6.7B Instruct",
+            category="code-generation",
+            api_endpoint="https://api-inference.huggingface.co/models/deepseek-ai/deepseek-coder-6.7b-instruct",
+        ),
+        # ... (Add all 17 DeepSeek models)
+    }
+    # Image Editing Models (10 models)
+    IMAGE_EDITING_MODELS = {
+        "stabilityai/stable-diffusion-xl-refiner-1.0": ModelConfig(
+            name="SDXL Refiner 1.0",
+            category="image-editing",
+            api_endpoint="https://api-inference.huggingface.co/models/stabilityai/stable-diffusion-xl-refiner-1.0",
+            supported_formats=["image/png", "image/jpeg"],
+        ),
+        "runwayml/stable-diffusion-inpainting": ModelConfig(
+            name="Stable Diffusion Inpainting",
+            category="image-inpainting",
+            api_endpoint="https://api-inference.huggingface.co/models/runwayml/stable-diffusion-inpainting",
+            supported_formats=["image/png", "image/jpeg"],
+        ),
+        # ... (Add all 10 image editing models)
+    }
+    # TTS/STT Models (15 models)
+    SPEECH_MODELS = {
+        "microsoft/speecht5_tts": ModelConfig(
+            name="SpeechT5 TTS",
+            category="text-to-speech",
+            api_endpoint="https://api-inference.huggingface.co/models/microsoft/speecht5_tts",
+            supported_formats=["audio/wav", "audio/mp3"],
+        ),
+        "openai/whisper-large-v3": ModelConfig(
+            name="Whisper Large v3",
+            category="automatic-speech-recognition",
+            api_endpoint="https://api-inference.huggingface.co/models/openai/whisper-large-v3",
+            supported_formats=["audio/wav", "audio/mp3", "audio/flac"],
+        ),
+        # ... (Add all 15 speech models)
+    }
+    # Face Swap Models (6 models)
+    FACE_SWAP_MODELS = {
+        "deepinsight/insightface": ModelConfig(
+            name="InsightFace",
+            category="face-swap",
+            api_endpoint="https://api-inference.huggingface.co/models/deepinsight/insightface",
+            supported_formats=["image/png", "image/jpeg"],
+        ),
+        # ... (Add all 6 face swap models)
+    }
+    # Talking Avatar Models (9 models)
+    AVATAR_MODELS = {
+        "microsoft/DiT-XL-2-512": ModelConfig(
+            name="DiT Avatar Generator",
+            category="talking-avatar",
+            api_endpoint="https://api-inference.huggingface.co/models/microsoft/DiT-XL-2-512",
+            supported_formats=["video/mp4", "image/png"],
+        ),
+        # ... (Add all 9 avatar models)
+    }
+    # Arabic-English Interactive Models (12 models)
+    ARABIC_ENGLISH_MODELS = {
+        "aubmindlab/bert-base-arabertv02": ModelConfig(
+            name="AraBERT v02",
+            category="arabic-text",
+            api_endpoint="https://api-inference.huggingface.co/models/aubmindlab/bert-base-arabertv02",
+            special_parameters={"language": "ar-en"},
+        ),
+        "UBC-NLP/MARBERT": ModelConfig(
+            name="MARBERT",
+            category="arabic-text",
+            api_endpoint="https://api-inference.huggingface.co/models/UBC-NLP/MARBERT",
+            special_parameters={"language": "ar-en"},
+        ),
+        # ... (Add all 12 Arabic-English models)
+    }
+class AgentConfig:
+    """Configuration for AI Agents"""
+    # Manus Agent Configuration
+    MANUS_AGENT = {
+        "name": "Manus",
+        "description": "Versatile AI agent with 200+ models",
+        "max_steps": 20,
+        "max_observe": 10000,
+        "system_prompt_template": """You are Manus, an advanced AI agent with access to 200+ specialized models.
+Available categories:
+- Text Generation (Qwen, DeepSeek, etc.)
+- Image Editing & Generation
+- Speech (TTS/STT)
+- Face Swap & Avatar Generation
+- Arabic-English Interactive Models
+- Code Generation & Review
+- Multimodal AI
+- Document Processing
+- 3D Generation
+- Video Processing
+User workspace: {directory}""",
+        "tools": [
+            "PythonExecute",
+            "BrowserUseTool",
+            "StrReplaceEditor",
+            "AskHuman",
+            "Terminate",
+            "HuggingFaceModels",
+        ],
+        "model_preferences": {
+            "text": "qwen/qwen-2.5-72b-instruct",
+            "code": "deepseek-ai/deepseek-coder-33b-instruct",
+            "image": "stabilityai/stable-diffusion-xl-refiner-1.0",
+            "speech": "microsoft/speecht5_tts",
+            "arabic": "aubmindlab/bert-base-arabertv02",
+        },
+    }
+class ServiceConfig:
+    """Configuration for all services"""
+    # Cloudflare Services
+    CLOUDFLARE_CONFIG = {
+        "d1_database": {
+            "enabled": True,
+            "tables": ["users", "sessions", "agent_interactions", "model_usage"],
+            "auto_migrate": True,
+        },
+        "r2_storage": {
+            "enabled": True,
+            "buckets": ["user-files", "generated-content", "model-cache"],
+            "max_file_size": "100MB",
+        },
+        "kv_storage": {
+            "enabled": True,
+            "namespaces": ["sessions", "model-cache", "user-preferences"],
+            "ttl": 86400,  # 24 hours
+        },
+        "durable_objects": {
+            "enabled": True,
+            "classes": ["ChatSession", "ModelRouter", "UserContext"],
+        },
+    }
+    # Authentication Configuration
+    AUTH_CONFIG = {
+        "method": "mobile_password",
+        "password_min_length": 8,
+        "session_duration": 86400,  # 24 hours
+        "max_concurrent_sessions": 5,
+        "mobile_validation": {
+            "international": True,
+            "formats": ["+1234567890", "01234567890"],
+        },
+    }
+    # Model Usage Configuration
+    MODEL_CONFIG = {
+        "rate_limits": {
+            "free_tier": 100,  # requests per day
+            "premium_tier": 1000,
+            "enterprise_tier": 10000,
+        },
+        "fallback_models": {
+            "text": ["qwen/qwen-2.5-7b-instruct", "qwen/qwen-2.5-3b-instruct"],
+            "image": ["runwayml/stable-diffusion-v1-5"],
+            "code": ["deepseek-ai/deepseek-coder-6.7b-instruct"],
+        },
+        "cache_settings": {"enabled": True, "ttl": 3600, "max_size": "1GB"},  # 1 hour
+    }
+class EnvironmentConfig:
+    """Environment-specific configurations"""
+    @staticmethod
+    def get_production_config():
+        """Get production environment configuration"""
+        return {
+            "environment": "production",
+            "debug": False,
+            "log_level": "INFO",
+            "server": {"host": "0.0.0.0", "port": 7860, "workers": 4},
+            "database": {"type": "sqlite", "url": "auth.db", "pool_size": 10},
+            "security": {
+                "secret_key": os.getenv("SECRET_KEY", "your-secret-key"),
+                "cors_origins": ["*"],
+                "rate_limiting": True,
+            },
+            "monitoring": {"metrics": True, "logging": True, "health_checks": True},
+        }
+    @staticmethod
+    def get_development_config():
+        """Get development environment configuration"""
+        return {
+            "environment": "development",
+            "debug": True,
+            "log_level": "DEBUG",
+            "server": {"host": "127.0.0.1", "port": 7860, "workers": 1},
+            "database": {"type": "sqlite", "url": "auth_dev.db", "pool_size": 2},
+            "security": {
+                "secret_key": "dev-secret-key",
+                "cors_origins": ["http://localhost:*"],
+                "rate_limiting": False,
+            },
+        }
+# Global configuration instance
+class OpenManusConfig:
+    """Main configuration class for OpenManus"""
+    def __init__(self, environment: str = "production"):
+        self.environment = environment
+        self.categories = CategoryConfig()
+        self.agent = AgentConfig()
+        self.services = ServiceConfig()
+        if environment == "production":
+            self.env_config = EnvironmentConfig.get_production_config()
+        else:
+            self.env_config = EnvironmentConfig.get_development_config()
+    def get_model_config(self, model_id: str) -> Optional[ModelConfig]:
+        """Get configuration for a specific model"""
+        all_models = {
+            **self.categories.TEXT_GENERATION_MODELS,
+            **self.categories.IMAGE_EDITING_MODELS,
+            **self.categories.SPEECH_MODELS,
+            **self.categories.FACE_SWAP_MODELS,
+            **self.categories.AVATAR_MODELS,
+            **self.categories.ARABIC_ENGLISH_MODELS,
+        }
+        return all_models.get(model_id)
+    def get_category_models(self, category: str) -> Dict[str, ModelConfig]:
+        """Get all models in a category"""
+        if category == "text-generation":
+            return self.categories.TEXT_GENERATION_MODELS
+        elif category == "image-editing":
+            return self.categories.IMAGE_EDITING_MODELS
+        elif category in ["text-to-speech", "automatic-speech-recognition"]:
+            return self.categories.SPEECH_MODELS
+        elif category == "face-swap":
+            return self.categories.FACE_SWAP_MODELS
+        elif category == "talking-avatar":
+            return self.categories.AVATAR_MODELS
+        elif category == "arabic-text":
+            return self.categories.ARABIC_ENGLISH_MODELS
+        else:
+            return {}
+    def validate_config(self) -> bool:
+        """Validate the configuration"""
+        try:
+            # Check required environment variables
+            required_env = (
+                ["CLOUDFLARE_API_TOKEN", "HF_TOKEN"]
+                if self.environment == "production"
+                else []
+            )
+            missing_env = [var for var in required_env if not os.getenv(var)]
+            if missing_env:
+                print(f"Missing required environment variables: {missing_env}")
+                return False
+            print(f"Configuration validated for {self.environment} environment")
+            return True
+        except Exception as e:
+            print(f"Configuration validation failed: {e}")
+            return False
+# Create global config instance
+config = OpenManusConfig(environment=os.getenv("ENVIRONMENT", "production"))

app/prompt/__init__.py ADDED Viewed

File without changes

app/prompt/browser.py ADDED Viewed

	@@ -0,0 +1,94 @@

+SYSTEM_PROMPT = """\
+You are an AI agent designed to automate browser tasks. Your goal is to accomplish the ultimate task following the rules.
+# Input Format
+Task
+Previous steps
+Current URL
+Open Tabs
+Interactive Elements
+[index]<type>text</type>
+- index: Numeric identifier for interaction
+- type: HTML element type (button, input, etc.)
+- text: Element description
+Example:
+[33]<button>Submit Form</button>
+- Only elements with numeric indexes in [] are interactive
+- elements without [] provide only context
+# Response Rules
+1. RESPONSE FORMAT: You must ALWAYS respond with valid JSON in this exact format:
+{{"current_state": {{"evaluation_previous_goal": "Success|Failed|Unknown - Analyze the current elements and the image to check if the previous goals/actions are successful like intended by the task. Mention if something unexpected happened. Shortly state why/why not",
+"memory": "Description of what has been done and what you need to remember. Be very specific. Count here ALWAYS how many times you have done something and how many remain. E.g. 0 out of 10 websites analyzed. Continue with abc and xyz",
+"next_goal": "What needs to be done with the next immediate action"}},
+"action":[{{"one_action_name": {{// action-specific parameter}}}}, // ... more actions in sequence]}}
+2. ACTIONS: You can specify multiple actions in the list to be executed in sequence. But always specify only one action name per item. Use maximum {{max_actions}} actions per sequence.
+Common action sequences:
+- Form filling: [{{"input_text": {{"index": 1, "text": "username"}}}}, {{"input_text": {{"index": 2, "text": "password"}}}}, {{"click_element": {{"index": 3}}}}]
+- Navigation and extraction: [{{"go_to_url": {{"url": "https://example.com"}}}}, {{"extract_content": {{"goal": "extract the names"}}}}]
+- Actions are executed in the given order
+- If the page changes after an action, the sequence is interrupted and you get the new state.
+- Only provide the action sequence until an action which changes the page state significantly.
+- Try to be efficient, e.g. fill forms at once, or chain actions where nothing changes on the page
+- only use multiple actions if it makes sense.
+3. ELEMENT INTERACTION:
+- Only use indexes of the interactive elements
+- Elements marked with "[]Non-interactive text" are non-interactive
+4. NAVIGATION & ERROR HANDLING:
+- If no suitable elements exist, use other functions to complete the task
+- If stuck, try alternative approaches - like going back to a previous page, new search, new tab etc.
+- Handle popups/cookies by accepting or closing them
+- Use scroll to find elements you are looking for
+- If you want to research something, open a new tab instead of using the current tab
+- If captcha pops up, try to solve it - else try a different approach
+- If the page is not fully loaded, use wait action
+5. TASK COMPLETION:
+- Use the done action as the last action as soon as the ultimate task is complete
+- Dont use "done" before you are done with everything the user asked you, except you reach the last step of max_steps.
+- If you reach your last step, use the done action even if the task is not fully finished. Provide all the information you have gathered so far. If the ultimate task is completly finished set success to true. If not everything the user asked for is completed set success in done to false!
+- If you have to do something repeatedly for example the task says for "each", or "for all", or "x times", count always inside "memory" how many times you have done it and how many remain. Don't stop until you have completed like the task asked you. Only call done after the last step.
+- Don't hallucinate actions
+- Make sure you include everything you found out for the ultimate task in the done text parameter. Do not just say you are done, but include the requested information of the task.
+6. VISUAL CONTEXT:
+- When an image is provided, use it to understand the page layout
+- Bounding boxes with labels on their top right corner correspond to element indexes
+7. Form filling:
+- If you fill an input field and your action sequence is interrupted, most often something changed e.g. suggestions popped up under the field.
+8. Long tasks:
+- Keep track of the status and subresults in the memory.
+9. Extraction:
+- If your task is to find information - call extract_content on the specific pages to get and store the information.
+Your responses must be always JSON with the specified format.
+"""
+NEXT_STEP_PROMPT = """
+What should I do next to achieve my goal?
+When you see [Current state starts here], focus on the following:
+- Current URL and page title{url_placeholder}
+- Available tabs{tabs_placeholder}
+- Interactive elements and their indices
+- Content above{content_above_placeholder} or below{content_below_placeholder} the viewport (if indicated)
+- Any action results or errors{results_placeholder}
+For browser interactions:
+- To navigate: browser_use with action="go_to_url", url="..."
+- To click: browser_use with action="click_element", index=N
+- To type: browser_use with action="input_text", index=N, text="..."
+- To extract: browser_use with action="extract_content", goal="..."
+- To scroll: browser_use with action="scroll_down" or "scroll_up"
+Consider both what's visible and what might be beyond the current viewport.
+Be methodical - remember your progress and what you've learned so far.
+If you want to stop the interaction at any point, use the `terminate` tool/function call.
+"""

app/prompt/manus.py ADDED Viewed

	@@ -0,0 +1,10 @@

+SYSTEM_PROMPT = (
+    "You are OpenManus, an all-capable AI assistant, aimed at solving any task presented by the user. You have various tools at your disposal that you can call upon to efficiently complete complex requests. Whether it's programming, information retrieval, file processing, web browsing, or human interaction (only for extreme cases), you can handle it all."
+    "The initial directory is: {directory}"
+)
+NEXT_STEP_PROMPT = """
+Based on user needs, proactively select the most appropriate tool or combination of tools. For complex tasks, you can break down the problem and use different tools step by step to solve it. After using each tool, clearly explain the execution results and suggest the next steps.
+If you want to stop the interaction at any point, use the `terminate` tool/function call.
+"""

app/prompt/mcp.py ADDED Viewed

	@@ -0,0 +1,43 @@

+"""Prompts for the MCP Agent."""
+SYSTEM_PROMPT = """You are an AI assistant with access to a Model Context Protocol (MCP) server.
+You can use the tools provided by the MCP server to complete tasks.
+The MCP server will dynamically expose tools that you can use - always check the available tools first.
+When using an MCP tool:
+1. Choose the appropriate tool based on your task requirements
+2. Provide properly formatted arguments as required by the tool
+3. Observe the results and use them to determine next steps
+4. Tools may change during operation - new tools might appear or existing ones might disappear
+Follow these guidelines:
+- Call tools with valid parameters as documented in their schemas
+- Handle errors gracefully by understanding what went wrong and trying again with corrected parameters
+- For multimedia responses (like images), you'll receive a description of the content
+- Complete user requests step by step, using the most appropriate tools
+- If multiple tools need to be called in sequence, make one call at a time and wait for results
+Remember to clearly explain your reasoning and actions to the user.
+"""
+NEXT_STEP_PROMPT = """Based on the current state and available tools, what should be done next?
+Think step by step about the problem and identify which MCP tool would be most helpful for the current stage.
+If you've already made progress, consider what additional information you need or what actions would move you closer to completing the task.
+"""
+# Additional specialized prompts
+TOOL_ERROR_PROMPT = """You encountered an error with the tool '{tool_name}'.
+Try to understand what went wrong and correct your approach.
+Common issues include:
+- Missing or incorrect parameters
+- Invalid parameter formats
+- Using a tool that's no longer available
+- Attempting an operation that's not supported
+Please check the tool specifications and try again with corrected parameters.
+"""
+MULTIMEDIA_RESPONSE_PROMPT = """You've received a multimedia response (image, audio, etc.) from the tool '{tool_name}'.
+This content has been processed and described for you.
+Use this information to continue the task or provide insights to the user.
+"""

app/prompt/planning.py ADDED Viewed

	@@ -0,0 +1,27 @@

+PLANNING_SYSTEM_PROMPT = """
+You are an expert Planning Agent tasked with solving problems efficiently through structured plans.
+Your job is:
+1. Analyze requests to understand the task scope
+2. Create a clear, actionable plan that makes meaningful progress with the `planning` tool
+3. Execute steps using available tools as needed
+4. Track progress and adapt plans when necessary
+5. Use `finish` to conclude immediately when the task is complete
+Available tools will vary by task but may include:
+- `planning`: Create, update, and track plans (commands: create, update, mark_step, etc.)
+- `finish`: End the task when complete
+Break tasks into logical steps with clear outcomes. Avoid excessive detail or sub-steps.
+Think about dependencies and verification methods.
+Know when to conclude - don't continue thinking once objectives are met.
+"""
+NEXT_STEP_PROMPT = """
+Based on the current state, what's your next action?
+Choose the most efficient path forward:
+1. Is the plan sufficient, or does it need refinement?
+2. Can you execute the next step immediately?
+3. Is the task complete? If so, use `finish` right away.
+Be concise in your reasoning, then select the appropriate tool or action.
+"""

app/prompt/swe.py ADDED Viewed

	@@ -0,0 +1,22 @@

+SYSTEM_PROMPT = """SETTING: You are an autonomous programmer, and you're working directly in the command line with a special interface.
+The special interface consists of a file editor that shows you {{WINDOW}} lines of a file at a time.
+In addition to typical bash commands, you can also use specific commands to help you navigate and edit files.
+To call a command, you need to invoke it with a function call/tool call.
+Please note that THE EDIT COMMAND REQUIRES PROPER INDENTATION.
+If you'd like to add the line '        print(x)' you must fully write that out, with all those spaces before the code! Indentation is important and code that is not indented correctly will fail and require fixing before it can be run.
+RESPONSE FORMAT:
+Your shell prompt is formatted as follows:
+(Open file: <path>)
+(Current directory: <cwd>)
+bash-$
+First, you should _always_ include a general thought about what you're going to do next.
+Then, for every response, you must include exactly _ONE_ tool call/function call.
+Remember, you should always include a _SINGLE_ tool call/function call and then wait for a response from the shell before continuing with more discussion and commands. Everything you include in the DISCUSSION section will be saved for future reference.
+If you'd like to issue two commands at once, PLEASE DO NOT DO THAT! Please instead first submit just the first tool call, and then after receiving a response you'll be able to issue the second tool call.
+Note that the environment does NOT support interactive session commands (e.g. python, vim), so please do not invoke them.
+"""

app/prompt/toolcall.py ADDED Viewed

	@@ -0,0 +1,5 @@

+SYSTEM_PROMPT = "You are an agent that can execute tool calls"
+NEXT_STEP_PROMPT = (
+    "If you want to stop interaction, use `terminate` tool/function call."
+)

app/prompt/visualization.py ADDED Viewed

	@@ -0,0 +1,10 @@

+SYSTEM_PROMPT = """You are an AI agent designed to data analysis / visualization task. You have various tools at your disposal that you can call upon to efficiently complete complex requests.
+# Note:
+1. The workspace directory is: {directory}; Read / write file in workspace
+2. Generate analysis conclusion report in the end"""
+NEXT_STEP_PROMPT = """Based on user needs, break down the problem and use different tools step by step to solve it.
+# Note
+1. Each step select the most appropriate tool proactively (ONLY ONE).
+2. After using each tool, clearly explain the execution results and suggest the next steps.
+3. When observation with Error, review and fix it."""