#!/usr/bin/env python3 """ Storage cleanup script for Hugging Face Spaces Removes old/unused models and cache to prevent storage limit errors """ import os import shutil from pathlib import Path def cleanup_storage(): """Remove unnecessary files to reduce storage usage""" # Define paths cache_dir = Path("/data/.cache/huggingface") models_dir = Path("/data/models") # 1. Clean up duplicate model downloads in cache if cache_dir.exists(): # Remove old versions of models (keep only latest) for subdir in ["models", "hub"]: target_dir = cache_dir / subdir if target_dir.exists(): # Keep only the most recent 2 model versions model_dirs = sorted(target_dir.glob("**/snapshots/*"), key=os.path.getmtime, reverse=True) for old_model in model_dirs[2:]: # Keep 2 most recent, delete rest if old_model.is_dir(): try: shutil.rmtree(old_model) print(f"Cleaned up old model cache: {old_model}") except Exception as e: print(f"Error cleaning {old_model}: {e}") # 2. Clean up old fine-tuned models (keep only active ones) if models_dir.exists(): finetuned_dir = models_dir / "finetuned" if finetuned_dir.exists(): # This would require database access to know which models are active # For now, just report the size total_size = sum(f.stat().st_size for f in finetuned_dir.rglob('*') if f.is_file()) print(f"Fine-tuned models size: {total_size / (1024**3):.2f} GB") # 3. Report storage usage if Path("/data").exists(): total_size = sum(f.stat().st_size for f in Path("/data").rglob('*') if f.is_file()) print(f"Total /data storage: {total_size / (1024**3):.2f} GB") # Breakdown by directory for subdir in [".cache", "models"]: dir_path = Path("/data") / subdir if dir_path.exists(): dir_size = sum(f.stat().st_size for f in dir_path.rglob('*') if f.is_file()) print(f" {subdir}: {dir_size / (1024**3):.2f} GB") if __name__ == "__main__": cleanup_storage()