from openai import OpenAI


class NVIDIAInferenceService:
    def __init__(self, api_key: str, model_name: str = "deepseek-ai/deepseek-v3.1"):
        self.client = OpenAI(
            base_url="https://integrate.api.nvidia.com/v1",
            api_key=api_key
        )
        self.model_name = model_name

    def generate_response(self, prompt: str):
        completion = self.client.chat.completions.create(
            model=self.model_name,
            messages=[{"role": "user", "content": prompt}],
            temperature=0.2,
            top_p=0.7,
            max_tokens=8192,
            extra_body={"chat_template_kwargs": {"thinking": True}},
            stream=True
        )

        response = ""
        for chunk in completion:
            reasoning = getattr(chunk.choices[0].delta, "reasoning_content", None)
            if reasoning:
                response += reasoning
            if chunk.choices[0].delta.content is not None:
                response += chunk.choices[0].delta.content

        return response