Spaces:

wzy013
/

hunyuanvideo-foley

Running

App Files Files Community

hunyuanvideo-foley / hunyuanvideo_foley /constants.py

wzy013

Add HunyuanVideo-Foley source code and dependencies

f424901 3 months ago

raw

history blame contribute delete

1.67 kB

	"""Constants used throughout the HunyuanVideo-Foley project."""

	from typing import Dict, List

	# Model configuration
	DEFAULT_AUDIO_SAMPLE_RATE = 48000
	DEFAULT_VIDEO_FPS = 25
	DEFAULT_AUDIO_CHANNELS = 2

	# Video processing
	MAX_VIDEO_DURATION_SECONDS = 15.0
	MIN_VIDEO_DURATION_SECONDS = 1.0

	# Audio processing
	AUDIO_VAE_LATENT_DIM = 128
	AUDIO_FRAME_RATE = 75 # frames per second in latent space

	# Visual features
	FPS_VISUAL: Dict[str, int] = {
	"siglip2": 8,
	"synchformer": 25
	}

	# Model paths (can be overridden by environment variables)
	DEFAULT_MODEL_PATH = "./pretrained_models/"
	DEFAULT_CONFIG_PATH = "configs/hunyuanvideo-foley-xxl.yaml"

	# Inference parameters
	DEFAULT_GUIDANCE_SCALE = 4.5
	DEFAULT_NUM_INFERENCE_STEPS = 50
	MIN_GUIDANCE_SCALE = 1.0
	MAX_GUIDANCE_SCALE = 10.0
	MIN_INFERENCE_STEPS = 10
	MAX_INFERENCE_STEPS = 100

	# Text processing
	MAX_TEXT_LENGTH = 100
	DEFAULT_NEGATIVE_PROMPT = "noisy, harsh"

	# File extensions
	SUPPORTED_VIDEO_EXTENSIONS: List[str] = [".mp4", ".avi", ".mov", ".mkv", ".webm"]
	SUPPORTED_AUDIO_EXTENSIONS: List[str] = [".wav", ".mp3", ".flac", ".aac"]

	# Quality settings
	AUDIO_QUALITY_SETTINGS: Dict[str, List[str]] = {
	"high": ["-b:a", "192k"],
	"medium": ["-b:a", "128k"],
	"low": ["-b:a", "96k"]
	}

	# Error messages
	ERROR_MESSAGES: Dict[str, str] = {
	"model_not_loaded": "Model is not loaded. Please load the model first.",
	"invalid_video_format": "Unsupported video format. Supported formats: {formats}",
	"video_too_long": f"Video duration exceeds maximum of {MAX_VIDEO_DURATION_SECONDS} seconds",
	"ffmpeg_not_found": "ffmpeg not found. Please install ffmpeg: https://ffmpeg.org/download.html"
	}