from openai import OpenAI class NVIDIAInferenceService: def __init__(self, api_key: str, model_name: str = "deepseek-ai/deepseek-v3.1"): self.client = OpenAI( base_url="https://integrate.api.nvidia.com/v1", api_key=api_key ) self.model_name = model_name def generate_response(self, prompt: str): completion = self.client.chat.completions.create( model=self.model_name, messages=[{"role": "user", "content": prompt}], temperature=0.2, top_p=0.7, max_tokens=8192, extra_body={"chat_template_kwargs": {"thinking": True}}, stream=True ) response = "" for chunk in completion: reasoning = getattr(chunk.choices[0].delta, "reasoning_content", None) if reasoning: response += reasoning if chunk.choices[0].delta.content is not None: response += chunk.choices[0].delta.content return response