NON_WORKING_matrix_game_2

Paused

App Files Files Community

Julian Bilcke commited on Aug 14

Commit

0ffe757

1 Parent(s): 5393526

let's try again

Browse files

Files changed (3) hide show

api_engine.py +30 -1
api_server.py +2 -2
api_utils.py +25 -5

api_engine.py CHANGED Viewed

@@ -113,8 +113,13 @@ class MatrixGameEngine:
     def _init_models(self):
         """Initialize Matrix-Game V2 models"""
         try:
             # Load configuration
             self.config = OmegaConf.load(self.config_path)
             # Initialize generator
             generator = WanDiffusionWrapper(
@@ -175,10 +180,14 @@ class MatrixGameEngine:
             logger.info("Models loaded successfully")
             # Preprocess initial images for all scenes
             for scene_name, frames in self.scenes.items():
                 if frames and len(frames) > 0:
                     # Prepare the first frame as initial latent
                     self._prepare_scene_latent(scene_name, frames[0])
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
@@ -264,6 +273,8 @@ class MatrixGameEngine:
             raise RuntimeError(error_msg)
         try:
             # Map scene name to mode
             mode_map = {
                 'universal': 'universal',
@@ -272,15 +283,19 @@ class MatrixGameEngine:
                 'templerun': 'templerun'
             }
             mode = mode_map.get(scene_name, 'universal')
             # Get cached latent or prepare new one
             if scene_name not in self.scene_latents:
                 scene_frames = self.scenes.get(scene_name, self.scenes.get('universal', []))
                 if scene_frames:
                     self._prepare_scene_latent(scene_name, scene_frames[0])
                 else:
                     error_msg = f"No initial frames available for scene: {scene_name}"
                     logger.error(error_msg)
                     raise ValueError(error_msg)
             scene_data = self.scene_latents.get(scene_name)
@@ -289,6 +304,8 @@ class MatrixGameEngine:
                 logger.error(error_msg)
                 raise ValueError(error_msg)
             # Prepare conditions
             if keyboard_condition is None:
                 keyboard_condition = [[0, 0, 0, 0, 0, 0]]
@@ -321,6 +338,10 @@ class MatrixGameEngine:
             # Generate frames with streaming pipeline
             with torch.no_grad():
                 # Set seed for reproducibility
                 set_seed(self.seed + self.frame_count)
@@ -334,6 +355,8 @@ class MatrixGameEngine:
                     mode=mode
                 )
                 # Decode first frame from latent
                 if outputs is not None and len(outputs) > 0:
                     # Extract first frame
@@ -353,7 +376,13 @@ class MatrixGameEngine:
         except Exception as e:
             error_msg = f"Error generating frame with Matrix-Game V2 model: {str(e)}"
-            logger.error(error_msg)
             raise RuntimeError(error_msg)
         # Add visualization of input controls

     def _init_models(self):
         """Initialize Matrix-Game V2 models"""
         try:
+            logger.info(f"Loading configuration from: {self.config_path}")
             # Load configuration
+            if not os.path.exists(self.config_path):
+                logger.error(f"Config file not found: {self.config_path}")
+                raise FileNotFoundError(f"Config file not found: {self.config_path}")
             self.config = OmegaConf.load(self.config_path)
+            logger.debug(f"Configuration loaded: {self.config}")
             # Initialize generator
             generator = WanDiffusionWrapper(
             logger.info("Models loaded successfully")
             # Preprocess initial images for all scenes
+            logger.info("Preprocessing initial images for scenes...")
             for scene_name, frames in self.scenes.items():
                 if frames and len(frames) > 0:
+                    logger.debug(f"Preparing latent for scene: {scene_name} ({len(frames)} frames)")
                     # Prepare the first frame as initial latent
                     self._prepare_scene_latent(scene_name, frames[0])
+                else:
+                    logger.warning(f"No frames found for scene: {scene_name}")
         except Exception as e:
             logger.error(f"Error loading models: {str(e)}")
             raise RuntimeError(error_msg)
         try:
+            logger.debug(f"Starting frame generation for scene: {scene_name}")
             # Map scene name to mode
             mode_map = {
                 'universal': 'universal',
                 'templerun': 'templerun'
             }
             mode = mode_map.get(scene_name, 'universal')
+            logger.debug(f"Using mode: {mode} for scene: {scene_name}")
             # Get cached latent or prepare new one
             if scene_name not in self.scene_latents:
+                logger.debug(f"Scene latents not cached for {scene_name}, preparing...")
                 scene_frames = self.scenes.get(scene_name, self.scenes.get('universal', []))
                 if scene_frames:
+                    logger.debug(f"Found {len(scene_frames)} frames for scene {scene_name}")
                     self._prepare_scene_latent(scene_name, scene_frames[0])
                 else:
                     error_msg = f"No initial frames available for scene: {scene_name}"
                     logger.error(error_msg)
+                    logger.error(f"Available scenes: {list(self.scenes.keys())}")
                     raise ValueError(error_msg)
             scene_data = self.scene_latents.get(scene_name)
                 logger.error(error_msg)
                 raise ValueError(error_msg)
+            logger.debug(f"Scene data prepared for {scene_name}")
             # Prepare conditions
             if keyboard_condition is None:
                 keyboard_condition = [[0, 0, 0, 0, 0, 0]]
             # Generate frames with streaming pipeline
             with torch.no_grad():
+                logger.debug(f"Starting inference with mode: {mode}")
+                logger.debug(f"Conditional dict keys: {list(conditional_dict.keys())}")
+                logger.debug(f"Noise shape: {sampled_noise.shape}")
                 # Set seed for reproducibility
                 set_seed(self.seed + self.frame_count)
                     mode=mode
                 )
+                logger.debug(f"Inference completed, outputs type: {type(outputs)}")
                 # Decode first frame from latent
                 if outputs is not None and len(outputs) > 0:
                     # Extract first frame
         except Exception as e:
             error_msg = f"Error generating frame with Matrix-Game V2 model: {str(e)}"
+            logger.error(error_msg, exc_info=True)
+            logger.error(f"Scene: {scene_name}, Mode: {mode if 'mode' in locals() else 'unknown'}")
+            logger.error(f"Keyboard condition: {keyboard_condition}")
+            logger.error(f"Mouse condition: {mouse_condition}")
+            logger.error(f"Frame count: {self.frame_count}")
+            logger.error(f"Device: {self.device}")
+            logger.error(f"Weight dtype: {self.weight_dtype}")
             raise RuntimeError(error_msg)
         # Add visualization of input controls

api_server.py CHANGED Viewed

@@ -41,8 +41,8 @@ class GameSession:
         self.created_at = time.time()
         self.last_activity = time.time()
-        # Game state
-        self.current_scene = "forest"  # Default scene
         self.is_streaming = False
         self.stream_task = None

         self.created_at = time.time()
         self.last_activity = time.time()
+        # Game state
+        self.current_scene = "universal"  # Default scene
         self.is_streaming = False
         self.stream_task = None

api_utils.py CHANGED Viewed

@@ -18,7 +18,7 @@ from typing import Dict, List, Tuple, Any, Optional, Union
 # Configure logging
 logging.basicConfig(
-    level=logging.INFO,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
@@ -178,22 +178,42 @@ def load_scene_frames(scene_name: str, frame_width: int, frame_height: int) -> L
         List[np.ndarray]: List of frames as numpy arrays
     """
     frames = []
-    scene_dir = f"./GameWorldScore/asset/init_image/{scene_name}"
-    if os.path.exists(scene_dir):
-        image_files = sorted([f for f in os.listdir(scene_dir) if f.endswith('.png') or f.endswith('.jpg')])
         for img_file in image_files:
             try:
                 img_path = os.path.join(scene_dir, img_file)
                 img = Image.open(img_path).convert("RGB")
                 img = img.resize((frame_width, frame_height))
                 frames.append(np.array(img))
             except Exception as e:
                 logger.error(f"Error loading image {img_file}: {str(e)}")
     # If no frames were loaded, create a default colored frame with text
     if not frames:
-        frame = np.ones((frame_height, frame_height, 3), dtype=np.uint8) * 100
         # Add scene name as text
         cv2.putText(frame, f"Scene: {scene_name}", (50, 180),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

 # Configure logging
 logging.basicConfig(
+    level=logging.DEBUG,
     format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
         List[np.ndarray]: List of frames as numpy arrays
     """
     frames = []
+    # Try multiple possible directories for scene images
+    scene_dirs = [
+        f"./GameWorldScore/asset/init_image/{scene_name}",
+        f"./demo_images/{scene_name}",
+        f"./demo_images/{scene_name.replace('_', '-')}",  # Handle gta_drive -> gta-drive
+    ]
+    scene_dir = None
+    logger.debug(f"Looking for scene images for '{scene_name}' in: {scene_dirs}")
+    for potential_dir in scene_dirs:
+        logger.debug(f"Checking directory: {potential_dir}")
+        if os.path.exists(potential_dir):
+            scene_dir = potential_dir
+            logger.debug(f"Found scene directory: {scene_dir}")
+            break
+    if not scene_dir:
+        logger.warning(f"No scene directory found for '{scene_name}'")
+    if scene_dir and os.path.exists(scene_dir):
+        image_files = sorted([f for f in os.listdir(scene_dir) if f.endswith('.png') or f.endswith('.jpg') or f.endswith('.webp')])
+        logger.debug(f"Found {len(image_files)} image files in {scene_dir}: {image_files}")
         for img_file in image_files:
             try:
                 img_path = os.path.join(scene_dir, img_file)
                 img = Image.open(img_path).convert("RGB")
                 img = img.resize((frame_width, frame_height))
                 frames.append(np.array(img))
+                logger.debug(f"Successfully loaded image: {img_file}")
             except Exception as e:
                 logger.error(f"Error loading image {img_file}: {str(e)}")
     # If no frames were loaded, create a default colored frame with text
     if not frames:
+        logger.warning(f"No frames loaded for scene '{scene_name}', creating default frame")
+        frame = np.ones((frame_height, frame_width, 3), dtype=np.uint8) * 100
         # Add scene name as text
         cv2.putText(frame, f"Scene: {scene_name}", (50, 180),
                    cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)