import requests
import json
import os
from collections import Counter
from langgraph.graph import StateGraph, END
from typing import TypedDict, Annotated
import operator
from langchain_core.messages import AnyMessage, SystemMessage, HumanMessage, ToolMessage, AIMessage
from langchain.chat_models import init_chat_model
import gradio as gr
from langchain.schema import HumanMessage
from langchain.tools import tool
from elevenlabs import ElevenLabs
import tempfile
import soundfile as sf
import io

MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
ELEVENLABS_API_KEY = os.getenv("ELEVENLABS_API_KEY")

print("ELEVENLABS_API_KEY loaded:", bool(os.getenv("ELEVENLABS_API_KEY")))


class AgentState(TypedDict):
    messages: Annotated[list[AnyMessage], operator.add]


class Agent:

    def __init__(self, model, tools, system=""):
        self.system = system
        graph = StateGraph(AgentState)
        graph.add_node("llm", self.call_mistral_ai)
        graph.add_node("action", self.take_action)
        graph.add_conditional_edges(
            "llm",
            self.exists_action,
            {True: "action", False: END}
        )
        graph.add_edge("action", "llm")
        graph.set_entry_point("llm")
        self.graph = graph.compile()
        self.tools = {t.name: t for t in tools}
        self.model = model.bind_tools(tools)

    def exists_action(self, state: AgentState):
        result = state['messages'][-1]
        return len(result.tool_calls) > 0

    def call_mistral_ai(self, state: AgentState):
        messages = state['messages']
        if self.system:
            messages = [SystemMessage(content=self.system)] + messages
        message = self.model.invoke(messages)
        return {'messages': [message]}

    def take_action(self, state: AgentState):
        tool_calls = state['messages'][-1].tool_calls
        results = []
        for t in tool_calls:
            print(f"Calling: {t}")
            if not t['name'] in self.tools:      # check for bad tool name from LLM
                print("\n ....bad tool name....")
                result = "bad tool name, retry"  # instruct LLM to retry if bad
            else:
                result = self.tools[t['name']].invoke(t['args'])
            results.append(ToolMessage(tool_call_id=t['id'], name=t['name'], content=str(result)))
        print("Back to the model!")
        return {'messages': results}


@tool
def generate_speech_from_text(text: str, voice_id: str = "JBFqnCBsd6RMkjVDRZzb") -> bytes:
    """
    Converts text into speech using ElevenLabs REST API and returns raw audio bytes.
    Fully compatible with Hugging Face Spaces.
    """
    api_key = ELEVENLABS_API_KEY
    if not api_key:
        print("Error: ELEVENLABS_API_KEY not found in environment variables.")
        return b""

    url = f"https://api.elevenlabs.io/v1/text-to-speech/{voice_id}"
    headers = {
        "xi-api-key": api_key,
        "accept": "audio/mpeg",
        "Content-Type": "application/json"
    }
    payload = {
        "text": text,
        "model_id": "eleven_multilingual_v2",
        "voice_settings": {"stability": 0.5, "similarity_boost": 0.7}
    }

    try:
        response = requests.post(url, headers=headers, json=payload, timeout=60)
        response.raise_for_status()
        return response.content  # raw MP3 bytes

    except Exception as e:
        print(f"Error generating speech: {e}")
        return b""


prompt = """You are a poetic reflection expert.
For each sentence the user says, respond with a short, emotionally resonant, poetic line that captures their feeling.
Keep the tone calm and introspective.
Example:
User: "I just woke up."
AI: "The sun rises, but your soul’s still loading."
then Use the available tools to generate speech from text generated.
"""

model = init_chat_model("mistral-large-latest", model_provider="mistralai")
abot = Agent(model, [generate_speech_from_text], system=prompt)


def chat_with_agent(user_input: str):
    messages = [HumanMessage(content=user_input)]
    result = abot.graph.invoke({"messages": messages})

    tool_messages = [m for m in result["messages"] if m.type == "tool"]
    if tool_messages:
        audio_bytes = tool_messages[-1].content
        if isinstance(audio_bytes, (bytes, bytearray)) and len(audio_bytes) > 0:
            # Decode MP3 bytes to numpy for Gradio playback
            audio_np, sr = sf.read(io.BytesIO(audio_bytes), dtype='float32')
            return sr, audio_np

    return None

iface = gr.Interface(
    fn=chat_with_agent,
    inputs=gr.Textbox(label="Your thoughts", placeholder="Type what you're feeling..."),
    outputs=gr.Audio(label="Poetic Reflection", type="numpy", autoplay=True),
    title="🎙️ Poetic Reflection AI",
    description="Type your feelings and hear them reflected poetically through an AI voice.",
)

iface.launch(share=True)