cwchang's picture
feat: 配置 Hugging Face Spaces 部署
e067d0c
from flask import Flask, request, jsonify, render_template
from flask_cors import CORS
from faster_whisper import WhisperModel
from opencc import OpenCC
import os
import tempfile
from werkzeug.utils import secure_filename
app = Flask(__name__)
CORS(app)
# 配置
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'mp3', 'wav', 'ogg', 'flac', 'm4a', 'webm'}
MAX_FILE_SIZE = 100 * 1024 * 1024 # 100MB
os.makedirs(UPLOAD_FOLDER, exist_ok=True)
# 全局模型變數
model = None
# OpenCC 轉換器(簡體轉繁體台灣)
cc_s2tw = OpenCC('s2tw') # 簡體到繁體(台灣)
cc_s2twp = OpenCC('s2twp') # 簡體到繁體(台灣)含常用詞彙轉換
def convert_to_traditional(text, use_phrases=True):
"""將簡體中文轉換為繁體中文(台灣)"""
if use_phrases:
return cc_s2twp.convert(text)
return cc_s2tw.convert(text)
def allowed_file(filename):
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
def load_model():
"""延迟加载模型"""
global model
if model is None:
print("正在加载 Whisper 模型(CPU 模式)...")
# 明確指定使用 CPU,compute_type="int8" 適合 CPU 運算
model = WhisperModel(
"dropbox-dash/faster-whisper-large-v3-turbo",
device="cpu",
compute_type="int8",
cpu_threads=4, # 限制 CPU 線程數,避免佔用過多資源
num_workers=1 # 減少並行工作數
)
print("模型加载完成!")
return model
@app.route('/')
def index():
return render_template('index.html')
@app.route('/api/transcribe', methods=['POST'])
def transcribe():
try:
# 檢查是否有檔案
if 'audio' not in request.files:
return jsonify({'error': '沒有上傳檔案'}), 400
file = request.files['audio']
if file.filename == '':
return jsonify({'error': '沒有選擇檔案'}), 400
if not allowed_file(file.filename):
return jsonify({'error': f'不支援的檔案格式。支援的格式: {", ".join(ALLOWED_EXTENSIONS)}'}), 400
# 獲取參數
language = request.form.get('language', None)
beam_size = int(request.form.get('beam_size', 5))
to_traditional = request.form.get('to_traditional', 'true').lower() == 'true'
# 儲存臨時檔案
filename = secure_filename(file.filename)
filepath = os.path.join(UPLOAD_FOLDER, filename)
file.save(filepath)
try:
# 載入模型並轉錄
whisper_model = load_model()
# 使用 initial_prompt 引導輸出繁體中文
initial_prompt = "以下是普通話的轉錄內容。" if language == 'zh' else None
segments, info = whisper_model.transcribe(
filepath,
language=language if language else None,
beam_size=beam_size,
vad_filter=True,
initial_prompt=initial_prompt
)
# 收集結果
results = []
full_text = ""
for segment in segments:
text = segment.text.strip()
# 如果是中文且啟用繁體轉換
if to_traditional and info.language == 'zh':
text = convert_to_traditional(text)
segment_data = {
'start': round(segment.start, 2),
'end': round(segment.end, 2),
'text': text
}
results.append(segment_data)
full_text += text + " "
# 刪除臨時檔案
os.remove(filepath)
return jsonify({
'success': True,
'language': info.language,
'duration': round(info.duration, 2),
'full_text': full_text.strip(),
'segments': results
})
except Exception as e:
# 清理臨時檔案
if os.path.exists(filepath):
os.remove(filepath)
raise e
except Exception as e:
return jsonify({'error': f'轉錄失敗: {str(e)}'}), 500
@app.route('/api/health', methods=['GET'])
def health():
return jsonify({'status': 'ok', 'model_loaded': model is not None})
if __name__ == '__main__':
# 從環境變數讀取端口,預設為 7860(Hugging Face Spaces)
port = int(os.environ.get('PORT', 7860))
print("=" * 50)
print("Whisper 語音轉文字服務")
print("=" * 50)
print(f"服務將在 http://0.0.0.0:{port} 啟動")
print("首次轉錄時會自動下載模型,請耐心等待...")
print("=" * 50)
app.run(debug=False, host='0.0.0.0', port=port)