ltx-video-distilled-test

Running on Zero

App Files Files Community

ford442 commited on Oct 13

Commit

a70e549

verified ·

1 Parent(s): 792ec5a

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -5

app.py CHANGED Viewed

@@ -100,7 +100,6 @@ def teacache_wrapper_forward(self, hidden_states: torch.Tensor, **kwargs):
         # COMPUTE: Call the original, stored method, passing 'self' explicitly
         self.previous_hidden_states = hidden_states.clone()
         output = original_transformer_forward(self, hidden_states=hidden_states, **kwargs)
         # Handle both tuple and object return types from the original function
         if isinstance(output, tuple):
             output_tensor = output[0]
@@ -110,13 +109,9 @@ def teacache_wrapper_forward(self, hidden_states: torch.Tensor, **kwargs):
         self.previous_residual = output_tensor - hidden_states
         return output
-# 3. Apply the patch
 Transformer3DModel.forward = teacache_wrapper_forward
 print("✅ Transformer3DModel patched with robust TeaCache Wrapper.")
-# --- End TeaCache Integration ---
 MAX_SEED = np.iinfo(np.int32).max
 upscaler = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
@@ -159,6 +154,45 @@ print(f"Target inference device: {target_inference_device}")
 pipeline_instance.to(target_inference_device)
 if latent_upsampler_instance: latent_upsampler_instance.to(target_inference_device)
 def upload_to_sftp(local_filepath):
     if not all([FTP_HOST, FTP_USER, FTP_PASS, FTP_DIR]):

         # COMPUTE: Call the original, stored method, passing 'self' explicitly
         self.previous_hidden_states = hidden_states.clone()
         output = original_transformer_forward(self, hidden_states=hidden_states, **kwargs)
         # Handle both tuple and object return types from the original function
         if isinstance(output, tuple):
             output_tensor = output[0]
         self.previous_residual = output_tensor - hidden_states
         return output
 Transformer3DModel.forward = teacache_wrapper_forward
 print("✅ Transformer3DModel patched with robust TeaCache Wrapper.")
 MAX_SEED = np.iinfo(np.int32).max
 upscaler = UpscaleWithModel.from_pretrained("Kim2091/ClearRealityV1").to(torch.device("cuda:0"))
 pipeline_instance.to(target_inference_device)
 if latent_upsampler_instance: latent_upsampler_instance.to(target_inference_device)
+from diffusers.models.attention_processor import AttnProcessor2_0
+from kernels import get_kernel
+fa3_kernel = get_kernel("kernels-community/flash-attn3")
+class FlashAttentionProcessor(AttnProcessor2_0):
+    def __call__(
+        self,
+        attn,
+        hidden_states,
+        encoder_hidden_states=None,
+        attention_mask=None,
+        **kwargs,
+    ):
+        query = attn.to_q(hidden_states)
+        encoder_hidden_states = encoder_hidden_states if encoder_hidden_states is not None else hidden_states
+        key = attn.to_k(encoder_hidden_states)
+        value = attn.to_v(encoder_hidden_states)
+        scale = attn.scale
+        query = query * scale
+        b, t, c = query.shape
+        h = attn.heads
+        d = c // h
+        q_reshaped = query.reshape(b, t, h, d).permute(0, 2, 1, 3)
+        k_reshaped = key.reshape(b, t, h, d).permute(0, 2, 1, 3)
+        v_reshaped = value.reshape(b, t, h, d).permute(0, 2, 1, 3)
+        out_reshaped = torch.empty_like(q_reshaped)
+        fa3_kernel.attention(q_reshaped, k_reshaped, v_reshaped, out_reshaped)
+        out = out_reshaped.permute(0, 2, 1, 3).reshape(b, t, c)
+        out = attn.to_out(out)
+        return out
+fa_processor = FlashAttentionProcessor()
+# Iterate through the pipeline's UNet and apply the custom processor
+for name, module in pipeline_instance.transformer.named_modules():
+    if isinstance(module, AttnProcessor2_0):
+        module.processor = fa_processor
 def upload_to_sftp(local_filepath):
     if not all([FTP_HOST, FTP_USER, FTP_PASS, FTP_DIR]):