import os import shutil from huggingface_hub import snapshot_download def download_files(repo_id, target_base_dir, patterns): """ Downloads files matching patterns from a Hugging Face repository and organizes them in a directory structure. :param repo_id: Hugging Face repository ID. :param target_base_dir: Base directory where files should be stored. :param patterns: A dictionary mapping subdirectories to file patterns. Example: {"root": ["config.json", "*.pth"], "voices": ["*.pt"]} """ # Ensure target base directory exists if not os.path.exists(target_base_dir): os.makedirs(target_base_dir) # Download the snapshot containing all matching files snapshot_dir = snapshot_download(repo_id=repo_id, allow_patterns="*") # Loop through patterns and subdirectories for subdir, file_patterns in patterns.items(): # Set target directory for root-level files target_dir = ( target_base_dir if subdir == "root" else os.path.join(target_base_dir, subdir) ) os.makedirs(target_dir, exist_ok=True) for file_pattern in file_patterns: # Walk through the snapshot directory to find matching files for root, _, files in os.walk(snapshot_dir): for file in files: if file.endswith(file_pattern.lstrip("*")): # Match pattern source_path = os.path.join(root, file) target_file_path = os.path.join(target_dir, file) # Check if file already exists if not os.path.exists(target_file_path): # Copy the file to the target directory shutil.copy(source_path, target_file_path) print(f"Downloaded and saved: {file} to {target_dir}") else: print(f"File already exists, skipping: {target_file_path}") def initialize_files(): repo_id = "hexgrad/Kokoro-82M" target_base_dir = "pretrained_models/Kokoro" # Base directory for files file_patterns = { "root": ["config.json", "*.pth"], # Files for the root directory "voices": ["*.pt"], # Wildcard for voice pack files } download_files(repo_id, target_base_dir, file_patterns) if __name__ == "__main__": initialize_files()