Akatuki25 commited on
Commit
ab1cada
·
1 Parent(s): e68e1b7

Add HF Dataset reference audio support

Browse files

- Download reference audio from Akatuki25/seed-vc-ref-audios
- Add download_ref_preset() function
- Support preset ref audio without binary files in Space repo

Files changed (1) hide show
  1. app.py +36 -3
app.py CHANGED
@@ -18,6 +18,7 @@ import torchaudio
18
  from fastapi import FastAPI, UploadFile, File, Form, HTTPException
19
  from fastapi.responses import Response
20
  from pydantic import BaseModel
 
21
 
22
  # Seed-VC
23
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'seed-vc'))
@@ -45,10 +46,15 @@ DEFAULT_OVERLAP_MS = 200
45
  SESSION_EXPIRE_SEC = 600
46
 
47
  # model_ref.md Section 3.1
48
- DEFAULT_REF_PRESET = None # プリセットなし(use_uploaded_refを使用してください)
 
 
49
  REF_PRESETS = {
50
- # "default_01": "source_original.wav", # 参照音声が必要な場合はここに追加
 
51
  }
 
 
52
 
53
  # =============================================================================
54
  # Global Variables
@@ -169,6 +175,31 @@ def crossfade(prev_tail: Optional[np.ndarray], new_chunk: np.ndarray, fade_len:
169
  tail = new_chunk[fade_len:]
170
  return np.concatenate([mixed_head, tail])
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  def prepare_reference_audio(audio_path: str, state: SessionState):
173
  """
174
  参照音声を準備 (model_ref.md Section 3)
@@ -295,7 +326,9 @@ async def create_session(body: SessionCreateRequest):
295
  # 参照音声設定 (model_ref.md Section 3.2)
296
  if not body.use_uploaded_ref:
297
  preset_id = body.ref_preset_id or DEFAULT_REF_PRESET
298
- wav_path = REF_PRESETS.get(preset_id, REF_PRESETS[DEFAULT_REF_PRESET])
 
 
299
  prepare_reference_audio(wav_path, state)
300
 
301
  SESSIONS[session_id] = state
 
18
  from fastapi import FastAPI, UploadFile, File, Form, HTTPException
19
  from fastapi.responses import Response
20
  from pydantic import BaseModel
21
+ from huggingface_hub import hf_hub_download
22
 
23
  # Seed-VC
24
  sys.path.insert(0, os.path.join(os.path.dirname(__file__), 'seed-vc'))
 
46
  SESSION_EXPIRE_SEC = 600
47
 
48
  # model_ref.md Section 3.1
49
+ # Hugging Face Hubから参照音声をダウンロード
50
+ # リポジトリ: Akatuki25/seed-vc-ref-audios (dataset)
51
+ DEFAULT_REF_PRESET = "default_female"
52
  REF_PRESETS = {
53
+ "default_female": ("Akatuki25/seed-vc-ref-audios", "default_female.wav"),
54
+ "default_male": ("Akatuki25/seed-vc-ref-audios", "default_male.wav"),
55
  }
56
+ # ダウンロード済み参照音声のキャッシュ
57
+ downloaded_ref_cache = {}
58
 
59
  # =============================================================================
60
  # Global Variables
 
175
  tail = new_chunk[fade_len:]
176
  return np.concatenate([mixed_head, tail])
177
 
178
+ def download_ref_preset(preset_id: str) -> str:
179
+ """
180
+ Hugging Face Hubから参照音声をダウンロード
181
+ Returns: ローカルファイルパス
182
+ """
183
+ if preset_id in downloaded_ref_cache:
184
+ return downloaded_ref_cache[preset_id]
185
+
186
+ if preset_id not in REF_PRESETS:
187
+ raise ValueError(f"Unknown preset_id: {preset_id}")
188
+
189
+ repo_id, filename = REF_PRESETS[preset_id]
190
+ print(f"Downloading reference audio from {repo_id}/{filename}...")
191
+
192
+ local_path = hf_hub_download(
193
+ repo_id=repo_id,
194
+ filename=filename,
195
+ repo_type="dataset",
196
+ cache_dir=cache_dir
197
+ )
198
+
199
+ downloaded_ref_cache[preset_id] = local_path
200
+ print(f"Downloaded to {local_path}")
201
+ return local_path
202
+
203
  def prepare_reference_audio(audio_path: str, state: SessionState):
204
  """
205
  参照音声を準備 (model_ref.md Section 3)
 
326
  # 参照音声設定 (model_ref.md Section 3.2)
327
  if not body.use_uploaded_ref:
328
  preset_id = body.ref_preset_id or DEFAULT_REF_PRESET
329
+ if preset_id is None:
330
+ raise HTTPException(status_code=400, detail="ref_preset_id or use_uploaded_ref=true required")
331
+ wav_path = download_ref_preset(preset_id)
332
  prepare_reference_audio(wav_path, state)
333
 
334
  SESSIONS[session_id] = state