File size: 4,796 Bytes
8c3fde1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e067d0c
 
 
 
 
 
 
 
 
8c3fde1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e067d0c
 
 
8c3fde1
 
 
e067d0c
8c3fde1
 
e067d0c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from flask import Flask, request, jsonify, render_template
from flask_cors import CORS
from faster_whisper import WhisperModel
from opencc import OpenCC
import os
import tempfile
from werkzeug.utils import secure_filename

app = Flask(__name__)
CORS(app)

# 配置
UPLOAD_FOLDER = 'uploads'
ALLOWED_EXTENSIONS = {'mp3', 'wav', 'ogg', 'flac', 'm4a', 'webm'}
MAX_FILE_SIZE = 100 * 1024 * 1024  # 100MB

os.makedirs(UPLOAD_FOLDER, exist_ok=True)

# 全局模型變數
model = None

# OpenCC 轉換器(簡體轉繁體台灣)
cc_s2tw = OpenCC('s2tw')  # 簡體到繁體(台灣)
cc_s2twp = OpenCC('s2twp')  # 簡體到繁體(台灣)含常用詞彙轉換


def convert_to_traditional(text, use_phrases=True):
    """將簡體中文轉換為繁體中文(台灣)"""
    if use_phrases:
        return cc_s2twp.convert(text)
    return cc_s2tw.convert(text)

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

def load_model():
    """延迟加载模型"""
    global model
    if model is None:
        print("正在加载 Whisper 模型(CPU 模式)...")
        # 明確指定使用 CPU,compute_type="int8" 適合 CPU 運算
        model = WhisperModel(
            "dropbox-dash/faster-whisper-large-v3-turbo",
            device="cpu",
            compute_type="int8",
            cpu_threads=4,  # 限制 CPU 線程數,避免佔用過多資源
            num_workers=1   # 減少並行工作數
        )
        print("模型加载完成!")
    return model

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/api/transcribe', methods=['POST'])
def transcribe():
    try:
        # 檢查是否有檔案
        if 'audio' not in request.files:
            return jsonify({'error': '沒有上傳檔案'}), 400

        file = request.files['audio']

        if file.filename == '':
            return jsonify({'error': '沒有選擇檔案'}), 400

        if not allowed_file(file.filename):
            return jsonify({'error': f'不支援的檔案格式。支援的格式: {", ".join(ALLOWED_EXTENSIONS)}'}), 400

        # 獲取參數
        language = request.form.get('language', None)
        beam_size = int(request.form.get('beam_size', 5))
        to_traditional = request.form.get('to_traditional', 'true').lower() == 'true'

        # 儲存臨時檔案
        filename = secure_filename(file.filename)
        filepath = os.path.join(UPLOAD_FOLDER, filename)
        file.save(filepath)

        try:
            # 載入模型並轉錄
            whisper_model = load_model()

            # 使用 initial_prompt 引導輸出繁體中文
            initial_prompt = "以下是普通話的轉錄內容。" if language == 'zh' else None

            segments, info = whisper_model.transcribe(
                filepath,
                language=language if language else None,
                beam_size=beam_size,
                vad_filter=True,
                initial_prompt=initial_prompt
            )

            # 收集結果
            results = []
            full_text = ""

            for segment in segments:
                text = segment.text.strip()

                # 如果是中文且啟用繁體轉換
                if to_traditional and info.language == 'zh':
                    text = convert_to_traditional(text)

                segment_data = {
                    'start': round(segment.start, 2),
                    'end': round(segment.end, 2),
                    'text': text
                }
                results.append(segment_data)
                full_text += text + " "

            # 刪除臨時檔案
            os.remove(filepath)

            return jsonify({
                'success': True,
                'language': info.language,
                'duration': round(info.duration, 2),
                'full_text': full_text.strip(),
                'segments': results
            })

        except Exception as e:
            # 清理臨時檔案
            if os.path.exists(filepath):
                os.remove(filepath)
            raise e

    except Exception as e:
        return jsonify({'error': f'轉錄失敗: {str(e)}'}), 500

@app.route('/api/health', methods=['GET'])
def health():
    return jsonify({'status': 'ok', 'model_loaded': model is not None})

if __name__ == '__main__':
    # 從環境變數讀取端口,預設為 7860(Hugging Face Spaces)
    port = int(os.environ.get('PORT', 7860))

    print("=" * 50)
    print("Whisper 語音轉文字服務")
    print("=" * 50)
    print(f"服務將在 http://0.0.0.0:{port} 啟動")
    print("首次轉錄時會自動下載模型,請耐心等待...")
    print("=" * 50)
    app.run(debug=False, host='0.0.0.0', port=port)