import asyncio import os from langchain.agents import create_agent from langchain_google_genai import ChatGoogleGenerativeAI from langchain_mcp_adapters.client import MultiServerMCPClient from langchain.chat_models import init_chat_model from langchain_openai import ChatOpenAI from dotenv import load_dotenv from langfuse import observe from langfuse.langchain import CallbackHandler import json class LangChainAgent: def __init__(self): print("LangChainAgent initialized.") load_dotenv() self.gaia_system_prompt = """ You are a general AI assistant. I will ask you a question. Report your thoughtsYou are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. You have access to browser tool like playwright through MCP. Use it to search the web when needed. -- MUST TO FOLLOW RULE: ALAWAY WAIT UNTIL THE PAGE IS FULLY LOADED BEFORE EXTRACTING INFORMATION FROM THE PAGE. -- -- Always try to use google or any popular search engine first. If required visit the pages shown in the search result to find proper information. -- Some websites are built using javascript and builds / loads the content completely on browser / client side. In such a case you may have to wait until the page is fully loaded to get the content you want. Use the browser tool to navigate, click, scroll, and interact with the web page as needed to find the information required to answer the question. If you are asked about youtube videos: - Try to get the information from the video description or comments section. - If you are asked any information from the video, try to get the transcript of the video using the youtube-transcript tool available to you. Use the video URL to get the transcript. Sometimes LLM inference APIs may hit rate limit. In such a case, try to understand the response from the API. Often it contains information about when you can retry or what to do next. """ # Initialize Langfuse client # This block sets up the Langfuse callback handler for LangChain. # It initializes the Langfuse client and creates a CallbackHandler instance # only if the required API keys are available. The handler is then added to # a list of callbacks that can be passed to LLM invocations for tracing. langfuse_callback_handler = None self.callbacks = [] langfuse_callback_handler = CallbackHandler() self.callbacks.append(langfuse_callback_handler) @observe() async def __call__(self, question: str, model_name: str, model_provider: str) -> str: print(f"Agent received question (first 50 chars): {question[:50]}...") with open("configurations/mcp-server-config.json", "r") as config_file: mcp_config = json.load(config_file) client = MultiServerMCPClient(mcp_config) tools = await client.get_tools() print(tools) if model_provider == "google_genai": model = init_chat_model(model_name, model_provider=model_provider) elif model_provider == "nvidia": model = ChatOpenAI( model=model_name, openai_api_key=os.getenv("NVIDIA_API_KEY"), openai_api_base="https://integrate.api.nvidia.com/v1" ) else: # Default to nvidia if provider is not specified model = ChatOpenAI( model=model_name, openai_api_key=os.getenv("NVIDIA_API_KEY"), openai_api_base="https://integrate.api.nvidia.com/v1" ) agent = create_agent(model, tools) answer = await agent.ainvoke({ "messages": [ {"role": "system", "content": self.gaia_system_prompt}, {"role": "user", "content": question} ] }, config={"callbacks": self.callbacks}) final_answer = self.extract_final_answer(answer) print(f"Extracted final answer: {final_answer}") return final_answer def extract_final_answer(self, answer: dict) -> str: """Extracts the final answer from the agent's response.""" # The response can be in multiple formats. We'll try to parse them. #Nested inside 'messages' as per user's example try: last_message = answer.get('messages', [])[-1] content = getattr(last_message, 'content', None) text_content = "" if isinstance(content, list): for block in content: if isinstance(block, dict) and block.get('type') == 'text': text_content += block.get('text', '') elif isinstance(content, str): text_content = content if "FINAL ANSWER:" in text_content: return text_content.split("FINAL ANSWER:")[1].strip() # If we found text content but not the marker, return the text. if text_content: return text_content except (IndexError, AttributeError, KeyError, TypeError): # This structure was not found, continue to other fallbacks. pass return str(answer) # If nothing works, return the string representation.