Spaces:
Running
Running
Add HF Dataset reference audio support
Browse files- Download reference audio from Akatuki25/seed-vc-ref-audios
- Add download_ref_preset() function
- Support preset ref audio without binary files in Space repo
app.py
CHANGED
|
@@ -18,6 +18,7 @@ import torchaudio
|
|
| 18 |
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
| 19 |
from fastapi.responses import Response
|
| 20 |
from pydantic import BaseModel
|
|
|
|
| 21 |
|
| 22 |
# Seed-VC
|
| 23 |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'seed-vc'))
|
|
@@ -45,10 +46,15 @@ DEFAULT_OVERLAP_MS = 200
|
|
| 45 |
SESSION_EXPIRE_SEC = 600
|
| 46 |
|
| 47 |
# model_ref.md Section 3.1
|
| 48 |
-
|
|
|
|
|
|
|
| 49 |
REF_PRESETS = {
|
| 50 |
-
|
|
|
|
| 51 |
}
|
|
|
|
|
|
|
| 52 |
|
| 53 |
# =============================================================================
|
| 54 |
# Global Variables
|
|
@@ -169,6 +175,31 @@ def crossfade(prev_tail: Optional[np.ndarray], new_chunk: np.ndarray, fade_len:
|
|
| 169 |
tail = new_chunk[fade_len:]
|
| 170 |
return np.concatenate([mixed_head, tail])
|
| 171 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
def prepare_reference_audio(audio_path: str, state: SessionState):
|
| 173 |
"""
|
| 174 |
参照音声を準備 (model_ref.md Section 3)
|
|
@@ -295,7 +326,9 @@ async def create_session(body: SessionCreateRequest):
|
|
| 295 |
# 参照音声設定 (model_ref.md Section 3.2)
|
| 296 |
if not body.use_uploaded_ref:
|
| 297 |
preset_id = body.ref_preset_id or DEFAULT_REF_PRESET
|
| 298 |
-
|
|
|
|
|
|
|
| 299 |
prepare_reference_audio(wav_path, state)
|
| 300 |
|
| 301 |
SESSIONS[session_id] = state
|
|
|
|
| 18 |
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
| 19 |
from fastapi.responses import Response
|
| 20 |
from pydantic import BaseModel
|
| 21 |
+
from huggingface_hub import hf_hub_download
|
| 22 |
|
| 23 |
# Seed-VC
|
| 24 |
sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'seed-vc'))
|
|
|
|
| 46 |
SESSION_EXPIRE_SEC = 600
|
| 47 |
|
| 48 |
# model_ref.md Section 3.1
|
| 49 |
+
# Hugging Face Hubから参照音声をダウンロード
|
| 50 |
+
# リポジトリ: Akatuki25/seed-vc-ref-audios (dataset)
|
| 51 |
+
DEFAULT_REF_PRESET = "default_female"
|
| 52 |
REF_PRESETS = {
|
| 53 |
+
"default_female": ("Akatuki25/seed-vc-ref-audios", "default_female.wav"),
|
| 54 |
+
"default_male": ("Akatuki25/seed-vc-ref-audios", "default_male.wav"),
|
| 55 |
}
|
| 56 |
+
# ダウンロード済み参照音声のキャッシュ
|
| 57 |
+
downloaded_ref_cache = {}
|
| 58 |
|
| 59 |
# =============================================================================
|
| 60 |
# Global Variables
|
|
|
|
| 175 |
tail = new_chunk[fade_len:]
|
| 176 |
return np.concatenate([mixed_head, tail])
|
| 177 |
|
| 178 |
+
def download_ref_preset(preset_id: str) -> str:
|
| 179 |
+
"""
|
| 180 |
+
Hugging Face Hubから参照音声をダウンロード
|
| 181 |
+
Returns: ローカルファイルパス
|
| 182 |
+
"""
|
| 183 |
+
if preset_id in downloaded_ref_cache:
|
| 184 |
+
return downloaded_ref_cache[preset_id]
|
| 185 |
+
|
| 186 |
+
if preset_id not in REF_PRESETS:
|
| 187 |
+
raise ValueError(f"Unknown preset_id: {preset_id}")
|
| 188 |
+
|
| 189 |
+
repo_id, filename = REF_PRESETS[preset_id]
|
| 190 |
+
print(f"Downloading reference audio from {repo_id}/{filename}...")
|
| 191 |
+
|
| 192 |
+
local_path = hf_hub_download(
|
| 193 |
+
repo_id=repo_id,
|
| 194 |
+
filename=filename,
|
| 195 |
+
repo_type="dataset",
|
| 196 |
+
cache_dir=cache_dir
|
| 197 |
+
)
|
| 198 |
+
|
| 199 |
+
downloaded_ref_cache[preset_id] = local_path
|
| 200 |
+
print(f"Downloaded to {local_path}")
|
| 201 |
+
return local_path
|
| 202 |
+
|
| 203 |
def prepare_reference_audio(audio_path: str, state: SessionState):
|
| 204 |
"""
|
| 205 |
参照音声を準備 (model_ref.md Section 3)
|
|
|
|
| 326 |
# 参照音声設定 (model_ref.md Section 3.2)
|
| 327 |
if not body.use_uploaded_ref:
|
| 328 |
preset_id = body.ref_preset_id or DEFAULT_REF_PRESET
|
| 329 |
+
if preset_id is None:
|
| 330 |
+
raise HTTPException(status_code=400, detail="ref_preset_id or use_uploaded_ref=true required")
|
| 331 |
+
wav_path = download_ref_preset(preset_id)
|
| 332 |
prepare_reference_audio(wav_path, state)
|
| 333 |
|
| 334 |
SESSIONS[session_id] = state
|