Spaces:

Akatuki25
/

seed-vc-streaming

Running

Akatuki25 commited on 21 days ago

Commit

ab1cada

1 Parent(s): e68e1b7

Add HF Dataset reference audio support

- Download reference audio from Akatuki25/seed-vc-ref-audios
- Add download_ref_preset() function
- Support preset ref audio without binary files in Space repo

Files changed (1) hide show

app.py +36 -3

app.py CHANGED Viewed

@@ -18,6 +18,7 @@ import torchaudio
 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.responses import Response
 from pydantic import BaseModel
 # Seed-VC
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'seed-vc'))
@@ -45,10 +46,15 @@ DEFAULT_OVERLAP_MS = 200
 SESSION_EXPIRE_SEC = 600
 # model_ref.md Section 3.1
-DEFAULT_REF_PRESET = None  # プリセットなし（use_uploaded_refを使用してください）
 REF_PRESETS = {
-    # "default_01": "source_original.wav",  # 参照音声が必要な場合はここに追加
 }
 # =============================================================================
 # Global Variables
@@ -169,6 +175,31 @@ def crossfade(prev_tail: Optional[np.ndarray], new_chunk: np.ndarray, fade_len:
     tail = new_chunk[fade_len:]
     return np.concatenate([mixed_head, tail])
 def prepare_reference_audio(audio_path: str, state: SessionState):
     """
     参照音声を準備 (model_ref.md Section 3)
@@ -295,7 +326,9 @@ async def create_session(body: SessionCreateRequest):
     # 参照音声設定 (model_ref.md Section 3.2)
     if not body.use_uploaded_ref:
         preset_id = body.ref_preset_id or DEFAULT_REF_PRESET
-        wav_path = REF_PRESETS.get(preset_id, REF_PRESETS[DEFAULT_REF_PRESET])
         prepare_reference_audio(wav_path, state)
     SESSIONS[session_id] = state

 from fastapi import FastAPI, UploadFile, File, Form, HTTPException
 from fastapi.responses import Response
 from pydantic import BaseModel
+from huggingface_hub import hf_hub_download
 # Seed-VC
 sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'seed-vc'))
 SESSION_EXPIRE_SEC = 600
 # model_ref.md Section 3.1
+# Hugging Face Hubから参照音声をダウンロード
+# リポジトリ: Akatuki25/seed-vc-ref-audios (dataset)
+DEFAULT_REF_PRESET = "default_female"
 REF_PRESETS = {
+    "default_female": ("Akatuki25/seed-vc-ref-audios", "default_female.wav"),
+    "default_male": ("Akatuki25/seed-vc-ref-audios", "default_male.wav"),
 }
+# ダウンロード済み参照音声のキャッシュ
+downloaded_ref_cache = {}
 # =============================================================================
 # Global Variables
     tail = new_chunk[fade_len:]
     return np.concatenate([mixed_head, tail])
+def download_ref_preset(preset_id: str) -> str:
+    """
+    Hugging Face Hubから参照音声をダウンロード
+    Returns: ローカルファイルパス
+    """
+    if preset_id in downloaded_ref_cache:
+        return downloaded_ref_cache[preset_id]
+    if preset_id not in REF_PRESETS:
+        raise ValueError(f"Unknown preset_id: {preset_id}")
+    repo_id, filename = REF_PRESETS[preset_id]
+    print(f"Downloading reference audio from {repo_id}/{filename}...")
+    local_path = hf_hub_download(
+        repo_id=repo_id,
+        filename=filename,
+        repo_type="dataset",
+        cache_dir=cache_dir
+    )
+    downloaded_ref_cache[preset_id] = local_path
+    print(f"Downloaded to {local_path}")
+    return local_path
 def prepare_reference_audio(audio_path: str, state: SessionState):
     """
     参照音声を準備 (model_ref.md Section 3)
     # 参照音声設定 (model_ref.md Section 3.2)
     if not body.use_uploaded_ref:
         preset_id = body.ref_preset_id or DEFAULT_REF_PRESET
+        if preset_id is None:
+            raise HTTPException(status_code=400, detail="ref_preset_id or use_uploaded_ref=true required")
+        wav_path = download_ref_preset(preset_id)
         prepare_reference_audio(wav_path, state)
     SESSIONS[session_id] = state