Spaces:
Sleeping
Sleeping
Update hf_utils.py
Browse files- hf_utils.py +15 -16
hf_utils.py
CHANGED
|
@@ -17,29 +17,28 @@ def download_filtered_space_files(space_id: str, local_dir: str = "repo_files",
|
|
| 17 |
|
| 18 |
print(f"Downloading Space '{space_id}' and filtering for: {', '.join(file_extensions)}")
|
| 19 |
|
| 20 |
-
# Download the full snapshot to a temp directory
|
| 21 |
-
repo_path = snapshot_download(repo_id=space_id, repo_type="space")
|
| 22 |
-
|
| 23 |
# Clear out local_dir if it already exists
|
| 24 |
if os.path.exists(local_dir):
|
| 25 |
shutil.rmtree(local_dir)
|
| 26 |
|
| 27 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
copied_files = 0
|
| 29 |
-
|
| 30 |
-
# Walk through the snapshot and copy only files with desired extensions
|
| 31 |
-
for root, _, files in os.walk(repo_path):
|
| 32 |
for file in files:
|
| 33 |
if any(file.endswith(ext) for ext in file_extensions):
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
dest_file = os.path.join(local_dir, rel_path)
|
| 37 |
-
os.makedirs(os.path.dirname(dest_file), exist_ok=True)
|
| 38 |
-
|
| 39 |
-
# Debug: Show exactly which file is being downloaded
|
| 40 |
-
print(f"DEBUG: Downloading file: {rel_path}")
|
| 41 |
-
|
| 42 |
-
shutil.copy2(src_file, dest_file)
|
| 43 |
copied_files += 1
|
| 44 |
|
| 45 |
print(f"Downloaded {copied_files} filtered file(s) to: {local_dir}")
|
|
|
|
| 17 |
|
| 18 |
print(f"Downloading Space '{space_id}' and filtering for: {', '.join(file_extensions)}")
|
| 19 |
|
|
|
|
|
|
|
|
|
|
| 20 |
# Clear out local_dir if it already exists
|
| 21 |
if os.path.exists(local_dir):
|
| 22 |
shutil.rmtree(local_dir)
|
| 23 |
|
| 24 |
+
# Convert file extensions to allow_patterns format (e.g., ['.py', '.md'] -> ['*.py', '*.md'])
|
| 25 |
+
allow_patterns = [f"*{ext}" for ext in file_extensions]
|
| 26 |
+
|
| 27 |
+
# Download directly to local_dir with filtering during download
|
| 28 |
+
repo_path = snapshot_download(
|
| 29 |
+
repo_id=space_id,
|
| 30 |
+
repo_type="space",
|
| 31 |
+
local_dir=local_dir,
|
| 32 |
+
allow_patterns=allow_patterns
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
# Count downloaded files for feedback
|
| 36 |
copied_files = 0
|
| 37 |
+
for root, _, files in os.walk(local_dir):
|
|
|
|
|
|
|
| 38 |
for file in files:
|
| 39 |
if any(file.endswith(ext) for ext in file_extensions):
|
| 40 |
+
rel_path = os.path.relpath(os.path.join(root, file), local_dir)
|
| 41 |
+
print(f"DEBUG: Downloaded file: {rel_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
copied_files += 1
|
| 43 |
|
| 44 |
print(f"Downloaded {copied_files} filtered file(s) to: {local_dir}")
|