Spaces:

frkhan
/

hf-agent-course-final-assignment

Paused

hf-agent-course-final-assignment / nvidia_inference_service.py

-- Developed Basic Langchain agent consuming Gemini Inference API.

2329760 20 days ago

1.05 kB

	from openai import OpenAI


	class NVIDIAInferenceService:
	def __init__(self, api_key: str, model_name: str = "deepseek-ai/deepseek-v3.1"):
	self.client = OpenAI(
	base_url="https://integrate.api.nvidia.com/v1",
	api_key=api_key
	)
	self.model_name = model_name

	def generate_response(self, prompt: str):
	completion = self.client.chat.completions.create(
	model=self.model_name,
	messages=[{"role": "user", "content": prompt}],
	temperature=0.2,
	top_p=0.7,
	max_tokens=8192,
	extra_body={"chat_template_kwargs": {"thinking": True}},
	stream=True
	)

	response = ""
	for chunk in completion:
	reasoning = getattr(chunk.choices[0].delta, "reasoning_content", None)
	if reasoning:
	response += reasoning
	if chunk.choices[0].delta.content is not None:
	response += chunk.choices[0].delta.content

	return response