hf-agent-course-final-assignment / nvidia_inference_service.py
frkhan's picture
-- Developed Basic Langchain agent consuming Gemini Inference API.
2329760
from openai import OpenAI
class NVIDIAInferenceService:
def __init__(self, api_key: str, model_name: str = "deepseek-ai/deepseek-v3.1"):
self.client = OpenAI(
base_url="https://integrate.api.nvidia.com/v1",
api_key=api_key
)
self.model_name = model_name
def generate_response(self, prompt: str):
completion = self.client.chat.completions.create(
model=self.model_name,
messages=[{"role": "user", "content": prompt}],
temperature=0.2,
top_p=0.7,
max_tokens=8192,
extra_body={"chat_template_kwargs": {"thinking": True}},
stream=True
)
response = ""
for chunk in completion:
reasoning = getattr(chunk.choices[0].delta, "reasoning_content", None)
if reasoning:
response += reasoning
if chunk.choices[0].delta.content is not None:
response += chunk.choices[0].delta.content
return response