#!/usr/bin/env python3 """ Download model weights listed in a config YAML (replicates Dockerfile download behavior without Docker). Usage: python download_models.py --config config.production.yaml This script uses huggingface_hub.hf_hub_download to download specified .pth files to the model's DOWNLOAD_MODEL_DIR (or ./models by default). """ import argparse import os import yaml import time from huggingface_hub import hf_hub_download def main(): parser = argparse.ArgumentParser() parser.add_argument("--config", default="config.production.yaml") parser.add_argument("--token", default=None, help="Hugging Face token (optional)") args = parser.parse_args() with open(args.config, "r", encoding="utf-8") as f: cfg = yaml.safe_load(f.read()) models = cfg.get("MODELS", []) if len(models) == 0: print("No models found in config. Nothing to download.") return for m in models: repo_id = m.get("DOWNLOAD_MODEL_REPO_ID") filename = m.get("DOWNLOAD_MODEL_FILE_NAME") local_dir = m.get("DOWNLOAD_MODEL_DIR", "./models") if repo_id is None or filename is None: print(f"Skipping model with incomplete download info: {m}") continue os.makedirs(local_dir, exist_ok=True) print(f"Downloading {filename} from repo {repo_id} into {local_dir} ...") os.makedirs(local_dir, exist_ok=True) # Add retry logic max_attempts = 5 for attempt in range(1, max_attempts + 1): try: path = hf_hub_download(repo_id=repo_id, filename=filename, local_dir=local_dir, token=args.token) print(f"Downloaded file to {path}") break except Exception as e: print(f"Attempt {attempt} failed to download {filename} from {repo_id}: {e}") if attempt < max_attempts: print(f"Retrying in {attempt*5} seconds...") time.sleep(attempt * 5) else: print(f"Failed after {max_attempts} attempts. Skipping {filename}.") if __name__ == "__main__": main()