Spaces:

chienweichang
/

faster_whisper_ui

Sleeping

File size: 4,796 Bytes

from flask import Flask, request, jsonify, render_template
from flask_cors import CORS
from faster_whisper import WhisperModel
from opencc import OpenCC
import os
import tempfile
from werkzeug.utils import secure_filename

app = Flask(__name__)
CORS(app)

# 配置
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'mp3', 'wav', 'ogg', 'flac', 'm4a', 'webm'}
MAX_FILE_SIZE = 100 * 1024 * 1024  # 100MB

os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# 全局模型變數
model = None

# OpenCC 轉換器（簡體轉繁體台灣）
cc_s2tw = OpenCC('s2tw')  # 簡體到繁體（台灣）
cc_s2twp = OpenCC('s2twp')  # 簡體到繁體（台灣）含常用詞彙轉換


def convert_to_traditional(text, use_phrases=True):
    """將簡體中文轉換為繁體中文（台灣）"""
    if use_phrases:
        return cc_s2twp.convert(text)
    return cc_s2tw.convert(text)

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def load_model():
    """延迟加载模型"""
    global model
    if model is None:
        print("正在加载 Whisper 模型（CPU 模式）...")
        # 明確指定使用 CPU，compute_type="int8" 適合 CPU 運算
        model = WhisperModel(
            "dropbox-dash/faster-whisper-large-v3-turbo",
            device="cpu",
            compute_type="int8",
            cpu_threads=4,  # 限制 CPU 線程數，避免佔用過多資源
            num_workers=1   # 減少並行工作數
        )
        print("模型加载完成！")
    return model

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/api/transcribe', methods=['POST'])
def transcribe():
    try:
        # 檢查是否有檔案
        if 'audio' not in request.files:
            return jsonify({'error': '沒有上傳檔案'}), 400

        file = request.files['audio']

        if file.filename == '':
            return jsonify({'error': '沒有選擇檔案'}), 400

        if not allowed_file(file.filename):
            return jsonify({'error': f'不支援的檔案格式。支援的格式: {", ".join(ALLOWED_EXTENSIONS)}'}), 400

        # 獲取參數
        language = request.form.get('language', None)
        beam_size = int(request.form.get('beam_size', 5))
        to_traditional = request.form.get('to_traditional', 'true').lower() == 'true'

        # 儲存臨時檔案
        filename = secure_filename(file.filename)
        filepath = os.path.join(UPLOAD_FOLDER, filename)
        file.save(filepath)

        try:
            # 載入模型並轉錄
            whisper_model = load_model()

            # 使用 initial_prompt 引導輸出繁體中文
            initial_prompt = "以下是普通話的轉錄內容。" if language == 'zh' else None

            segments, info = whisper_model.transcribe(
                filepath,
                language=language if language else None,
                beam_size=beam_size,
                vad_filter=True,
                initial_prompt=initial_prompt
            )

            # 收集結果
            results = []
            full_text = ""

            for segment in segments:
                text = segment.text.strip()

                # 如果是中文且啟用繁體轉換
                if to_traditional and info.language == 'zh':
                    text = convert_to_traditional(text)

                segment_data = {
                    'start': round(segment.start, 2),
                    'end': round(segment.end, 2),
                    'text': text
                }
                results.append(segment_data)
                full_text += text + " "

            # 刪除臨時檔案
            os.remove(filepath)

            return jsonify({
                'success': True,
                'language': info.language,
                'duration': round(info.duration, 2),
                'full_text': full_text.strip(),
                'segments': results
            })

        except Exception as e:
            # 清理臨時檔案
            if os.path.exists(filepath):
                os.remove(filepath)
            raise e

    except Exception as e:
        return jsonify({'error': f'轉錄失敗: {str(e)}'}), 500

@app.route('/api/health', methods=['GET'])
def health():
    return jsonify({'status': 'ok', 'model_loaded': model is not None})

if __name__ == '__main__':
    # 從環境變數讀取端口，預設為 7860（Hugging Face Spaces）
    port = int(os.environ.get('PORT', 7860))

    print("=" * 50)
    print("Whisper 語音轉文字服務")
    print("=" * 50)
    print(f"服務將在 http://0.0.0.0:{port} 啟動")
    print("首次轉錄時會自動下載模型，請耐心等待...")
    print("=" * 50)
    app.run(debug=False, host='0.0.0.0', port=port)