Spaces:

zhendery
/

voxcpm

Running

App Files Files Community

zhendery commited on 5 days ago

Commit

1102626

1 Parent(s): 2603711

feat: 接口修改为异步

Browse files

Files changed (2) hide show

Dockerfile +2 -2
api.py +95 -27

Dockerfile CHANGED Viewed

@@ -4,13 +4,13 @@ RUN ln -sf /share/zoneinfo/Asia/Shanghai /etc/localtime && \
     echo "Asia/Shanghai" > /etc/timezone
 RUN apt-get update && apt-get install -y \
-    curl wget unzip git git-lfs ffmpeg && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
 RUN pip install voxcpm && pip cache purge
 WORKDIR /workspace
-COPY . .
 ENV API_TOKEN my_secret_token
 ENV VOICE_DOWNLOAD_URL http://localhost/voices.zip

     echo "Asia/Shanghai" > /etc/timezone
 RUN apt-get update && apt-get install -y \
+    unzip ffmpeg && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
 RUN pip install voxcpm && pip cache purge
 WORKDIR /workspace
+COPY utils.py api.py ./
 ENV API_TOKEN my_secret_token
 ENV VOICE_DOWNLOAD_URL http://localhost/voices.zip

api.py CHANGED Viewed

@@ -8,10 +8,11 @@ import os
 import requests
 import zipfile
 from utils import *
-print_with_time("Loading VoxCPM model...")
-model = VoxCPM.from_pretrained("openbmb/VoxCPM-0.5B")
-print_with_time("VoxCPM model loaded.")
 security = HTTPBearer()
 app = FastAPI()
@@ -32,28 +33,91 @@ class GenerateRequest(BaseModel):
     do_normalize: bool = True
     denoise: bool = True
 @app.post("/generate")
-def generate_tts(request: GenerateRequest, token: str = Depends(verify_token)):
-    download_voices()
-    text = (request.text or "").strip()
-    if len(text) == 0:
-        raise ValueError("Please input text to synthesize.")
-    print_with_time(f"Generating audio for text: '{text[:60]}...'")
-    with open(f"/workspace/voices/{request.voice}.pmt", 'r', encoding='utf-8') as f:
-        wav = model.generate(
-            text=text,
-            prompt_wav_path=f"/workspace/voices/{request.voice}.wav",
-            prompt_text=f.read(),
-            cfg_value=request.cfg_value,
-            inference_timesteps=request.inference_timesteps,
-            normalize=request.do_normalize,
-            denoise=request.denoise
-        )
-        sf.write("output.wav", wav, 16000)
-        print_with_time(f"Audio generated, saving to output.wav")
-        return Response(content=open("output.wav", 'rb').read(), media_type="audio/wav")
 def download_voices():
@@ -118,22 +182,26 @@ def delete_voice(name: str, token: str = Depends(verify_token)):
 def get_voices(token: str = Depends(verify_token)):
     download_voices()
     # 获取所有 .pmt 文件
-    pmt_files = [f for f in os.listdir("/workspace/voices") if f.endswith(".pmt")]
     # 提取文件名（去掉 .pmt 后缀）
     voices = [f.split(".")[0] for f in pmt_files]
     # 确保对应的 .wav 文件也存在
     valid_voices = []
     for voice in voices:
-        if os.path.exists(f"/workspace/voices/{voice}.wav"):
             valid_voices.append(voice)
     return {"voices": valid_voices}
 # ↓↓↓↓↓↓↓↓↓无需验证↓↓↓↓↓↓↓↓
 @app.get("/")
 def health_check():
     return {"status": "health"}
-if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860, workers=1)

 import requests
 import zipfile
 from utils import *
+import uuid
+import queue
+import threading
+import asyncio
 security = HTTPBearer()
 app = FastAPI()
     do_normalize: bool = True
     denoise: bool = True
+# 队列相关变量
+task_queue = queue.Queue()
+output_dir = "./output"
+max_output_files = 10
+# 确保输出目录存在
+os.makedirs(output_dir, exist_ok=True)
+# 处理函数
+def cleanup_old_files():
+    """清理最老的文件，保持最多10个"""
+    try:
+        files = [(f, os.path.getctime(os.path.join(output_dir, f))) for f in os.listdir(output_dir) if f.endswith('.wav')]
+        files.sort(key=lambda x: x[1])  # 按创建时间排序
+        # 删除最老的文件直到只剩10个
+        while len(files) > max_output_files:
+            oldest_file = files.pop(0)[0]
+            os.remove(os.path.join(output_dir, oldest_file))
+    except Exception as e:
+        print_with_time(f"Error cleaning up old files: {e}")
+async def process_queue():
+    print_with_time("Loading VoxCPM model...")
+    model = VoxCPM.from_pretrained("openbmb/VoxCPM-0.5B")
+    print_with_time("VoxCPM model loaded.")
+    while True:
+        try:
+            task_data = task_queue.get_nowait()
+            request = task_data["request"]
+            text = (request.text or "").strip()
+            if len(text) == 0:
+                continue
+            if model is None:
+                raise RuntimeError("Failed to initialize model")
+            print_with_time(f"Generating audio for : '{text[:60]}...'")
+            with open(f"./voices/{request.voice}.pmt", 'r', encoding='utf-8') as f:
+                wav = model.generate(
+                    text=text,
+                    prompt_wav_path=f"./voices/{request.voice}.wav",
+                    prompt_text=f.read(),
+                    cfg_value=request.cfg_value,
+                    inference_timesteps=request.inference_timesteps,
+                    normalize=request.do_normalize,
+                    denoise=request.denoise
+                )
+                sf.write(os.path.join(output_dir, f"{task_data['task_id']}.wav"), wav, 16000)
+                # 清理旧文件
+                cleanup_old_files()
+            task_queue.task_done()
+            await asyncio.sleep(0.6)
+        except queue.Empty:
+            await asyncio.sleep(0.6)
+        except Exception as e:
+            print_with_time(f"Error processing queue item: {e}")
+            await asyncio.sleep(0.6)
 @app.post("/generate")
+async def generate_tts_async(request: GenerateRequest, token: str = Depends(verify_token)):
+    task_id = str(uuid.uuid4())
+    # 将任务添加到队列
+    task_data = {"task_id": task_id, "request": request}
+    task_queue.put(task_data)
+    return {"task_id": task_id}
+@app.get("/tts/{task_id}")
+async def get_generate_result(task_id: str, token: str = Depends(verify_token)):
+    filepath = os.path.join(output_dir, f"{task_id}.wav")
+    if not os.path.exists(filepath):
+        raise HTTPException(status_code=404, detail="Result file not found")
+    try:
+        with open(filepath, 'rb') as f:
+            content = f.read()
+        return Response(content=content, media_type="audio/wav")
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"Failed to read result file: {str(e)}")
 def download_voices():
 def get_voices(token: str = Depends(verify_token)):
     download_voices()
     # 获取所有 .pmt 文件
+    pmt_files = [f for f in os.listdir("./voices") if f.endswith(".pmt")]
     # 提取文件名（去掉 .pmt 后缀）
     voices = [f.split(".")[0] for f in pmt_files]
     # 确保对应的 .wav 文件也存在
     valid_voices = []
     for voice in voices:
+        if os.path.exists(f"./voices/{voice}.wav"):
             valid_voices.append(voice)
     return {"voices": valid_voices}
 # ↓↓↓↓↓↓↓↓↓无需验证↓↓↓↓↓↓↓↓
 @app.get("/")
+@app.get("/health")
 def health_check():
     return {"status": "health"}
+def start_api_server():
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)
+threading.Thread(target=start_api_server, daemon=True).start()
+asyncio.run(process_queue())