Saivamsim26 commited on
Commit
ddec471
·
verified ·
1 Parent(s): 8fb9d4d

Update handler.py

Browse files
Files changed (1) hide show
  1. handler.py +7 -20
handler.py CHANGED
@@ -19,14 +19,10 @@ class EndpointHandler:
19
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
20
  self.dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
21
 
22
- # Load processor and model
23
  self.processor = AutoProcessor.from_pretrained(path)
24
- self.model = AutoModelForVision2Seq.from_pretrained(
25
- path,
26
- torch_dtype=self.dtype,
27
- device_map="auto"
28
- )
29
- self.model.eval()
30
 
31
  # System instruction (same as training)
32
  self.system_instruction = (
@@ -108,7 +104,7 @@ class EndpointHandler:
108
  # Get generation parameters
109
  parameters = data.get("parameters", {})
110
  max_new_tokens = parameters.get("max_new_tokens", 100)
111
- temperature = parameters.get("temperature", 0.1) # Low temp for structured output
112
 
113
  # Format messages (same structure as training)
114
  messages = [
@@ -127,7 +123,7 @@ class EndpointHandler:
127
  }
128
  ]
129
 
130
- # Process inputs using chat template
131
  text_input = self.processor.apply_chat_template(
132
  messages,
133
  add_generation_prompt=True,
@@ -143,22 +139,13 @@ class EndpointHandler:
143
  )
144
 
145
  # Move to device
146
- model_inputs = {
147
- k: v.to(self.device) if torch.is_tensor(v) else v
148
- for k, v in model_inputs.items()
149
- }
150
-
151
- # Cast pixel_values to correct dtype
152
- if "pixel_values" in model_inputs:
153
- model_inputs["pixel_values"] = model_inputs["pixel_values"].to(self.dtype)
154
 
155
  # Generate prediction
156
  with torch.no_grad():
157
  generated_ids = self.model.generate(
158
  **model_inputs,
159
- max_new_tokens=max_new_tokens,
160
- temperature=temperature,
161
- do_sample=temperature > 0,
162
  )
163
 
164
  # Decode only the generated tokens (skip the input prompt)
 
19
  self.device = "cuda" if torch.cuda.is_available() else "cpu"
20
  self.dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
21
 
22
+ # Load processor and model - EXACTLY like the notebook
23
  self.processor = AutoProcessor.from_pretrained(path)
24
+ self.model = AutoModelForVision2Seq.from_pretrained(path)
25
+ self.model.to(self.device)
 
 
 
 
26
 
27
  # System instruction (same as training)
28
  self.system_instruction = (
 
104
  # Get generation parameters
105
  parameters = data.get("parameters", {})
106
  max_new_tokens = parameters.get("max_new_tokens", 100)
107
+ temperature = parameters.get("temperature", 0.1)
108
 
109
  # Format messages (same structure as training)
110
  messages = [
 
123
  }
124
  ]
125
 
126
+ # Process inputs using chat template - EXACTLY like notebook
127
  text_input = self.processor.apply_chat_template(
128
  messages,
129
  add_generation_prompt=True,
 
139
  )
140
 
141
  # Move to device
142
+ model_inputs = {k: v.to(self.device) for k, v in model_inputs.items()}
 
 
 
 
 
 
 
143
 
144
  # Generate prediction
145
  with torch.no_grad():
146
  generated_ids = self.model.generate(
147
  **model_inputs,
148
+ max_new_tokens=max_new_tokens
 
 
149
  )
150
 
151
  # Decode only the generated tokens (skip the input prompt)