ltx-video-distilled-test

Running on Zero

App Files Files Community

ford442 commited on 1 day ago

Commit

59f891f

verified ·

1 Parent(s): 54992f8

Update inference.py

Browse files

Files changed (1) hide show

inference.py +16 -11

inference.py CHANGED Viewed

@@ -66,14 +66,7 @@ def load_image_to_tensor_with_resize_and_crop(
     target_width: int = 768,
     just_crop: bool = False,
 ) -> torch.Tensor:
-    """Load and process an image into a tensor.
-    Args:
-        image_input: Either a file path (str) or a PIL Image object
-        target_height: Desired height of output tensor
-        target_width: Desired width of output tensor
-        just_crop: If True, only crop the image to the target size without resizing
-    """
     if isinstance(image_input, str):
         image = Image.open(image_input).convert("RGB")
     elif isinstance(image_input, Image.Image):
@@ -84,6 +77,7 @@ def load_image_to_tensor_with_resize_and_crop(
     input_width, input_height = image.size
     aspect_ratio_target = target_width / target_height
     aspect_ratio_frame = input_width / input_height
     if aspect_ratio_frame > aspect_ratio_target:
         new_width = int(input_height * aspect_ratio_target)
         new_height = input_height
@@ -95,16 +89,27 @@ def load_image_to_tensor_with_resize_and_crop(
         x_start = 0
         y_start = (input_height - new_height) // 2
     image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
     if not just_crop:
-        image = image.resize((target_width, target_height))
     image = np.array(image)
-    image = cv2.GaussianBlur(image, (3, 3), 0)
     frame_tensor = torch.from_numpy(image).float()
-    frame_tensor = crf_compressor.compress(frame_tensor / 255.0) * 255.0
     frame_tensor = frame_tensor.permute(2, 0, 1)
     frame_tensor = (frame_tensor / 127.5) - 1.0
     # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
     return frame_tensor.unsqueeze(0).unsqueeze(2)

     target_width: int = 768,
     just_crop: bool = False,
 ) -> torch.Tensor:
+    """Load and process an image into a tensor with high-quality scaling."""
     if isinstance(image_input, str):
         image = Image.open(image_input).convert("RGB")
     elif isinstance(image_input, Image.Image):
     input_width, input_height = image.size
     aspect_ratio_target = target_width / target_height
     aspect_ratio_frame = input_width / input_height
     if aspect_ratio_frame > aspect_ratio_target:
         new_width = int(input_height * aspect_ratio_target)
         new_height = input_height
         x_start = 0
         y_start = (input_height - new_height) // 2
+    # Crop the center of the image
     image = image.crop((x_start, y_start, x_start + new_width, y_start + new_height))
     if not just_crop:
+        # Use LANCZOS for high-quality downscaling/upscaling
+        image = image.resize((target_width, target_height), Image.LANCZOS)
+    # Convert to numpy and standard processing WITHOUT blur or crf_compression
     image = np.array(image)
+    # REMOVED: cv2.GaussianBlur(image, (3, 3), 0)
     frame_tensor = torch.from_numpy(image).float()
+    # REMOVED: crf_compressor.compress(...)
+    # Normalize to [-1, 1] range expected by the VAE
+    # Note: The tensor is in (H, W, C) from numpy, we need (C, H, W)
     frame_tensor = frame_tensor.permute(2, 0, 1)
     frame_tensor = (frame_tensor / 127.5) - 1.0
     # Create 5D tensor: (batch_size=1, channels=3, num_frames=1, height, width)
     return frame_tensor.unsqueeze(0).unsqueeze(2)