Spaces:
Sleeping
Sleeping
| from datasets import load_dataset | |
| import json | |
| import os | |
| from pathlib import Path | |
| def test_medical_dataset(): | |
| try: | |
| # Load a small sample of the medical questions dataset | |
| dataset = load_dataset("medical_questions_pairs", split="train[:100]") | |
| print(f"Successfully loaded {len(dataset)} samples from medical_questions_pairs") | |
| # Print sample structure | |
| print("\nSample structure:") | |
| print(json.dumps(dataset[0], indent=2)) | |
| return True | |
| except Exception as e: | |
| print(f"Error loading dataset: {str(e)}") | |
| return False | |
| def verify_data_directory(): | |
| data_dir = Path("data/raw") | |
| if not data_dir.exists(): | |
| print(f"Creating data directory: {data_dir}") | |
| data_dir.mkdir(parents=True, exist_ok=True) | |
| # Check for JSON files | |
| json_files = list(data_dir.glob("*.json")) | |
| if json_files: | |
| print(f"\nFound {len(json_files)} JSON files in data/raw:") | |
| for file in json_files: | |
| print(f"- {file.name}") | |
| else: | |
| print("\nNo JSON files found in data/raw directory") | |
| if __name__ == "__main__": | |
| print("Testing Hugging Face dataset loading...") | |
| test_medical_dataset() | |
| print("\nVerifying data directory structure...") | |
| verify_data_directory() |