Spaces:

chienweichang
/

faster_whisper_ui

Sleeping

App Files Files Community

faster_whisper_ui / app.py

cwchang

feat: 配置 Hugging Face Spaces 部署

e067d0c 12 days ago

raw

history blame contribute delete

4.8 kB

	from flask import Flask, request, jsonify, render_template
	from flask_cors import CORS
	from faster_whisper import WhisperModel
	from opencc import OpenCC
	import os
	import tempfile
	from werkzeug.utils import secure_filename

	app = Flask(__name__)
	CORS(app)

	# 配置
	UPLOAD_FOLDER = 'uploads'
	ALLOWED_EXTENSIONS = {'mp3', 'wav', 'ogg', 'flac', 'm4a', 'webm'}
	MAX_FILE_SIZE = 100 * 1024 * 1024 # 100MB

	os.makedirs(UPLOAD_FOLDER, exist_ok=True)

	# 全局模型變數
	model = None

	# OpenCC 轉換器（簡體轉繁體台灣）
	cc_s2tw = OpenCC('s2tw') # 簡體到繁體（台灣）
	cc_s2twp = OpenCC('s2twp') # 簡體到繁體（台灣）含常用詞彙轉換


	def convert_to_traditional(text, use_phrases=True):
	"""將簡體中文轉換為繁體中文（台灣）"""
	if use_phrases:
	return cc_s2twp.convert(text)
	return cc_s2tw.convert(text)

	def allowed_file(filename):
	return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

	def load_model():
	"""延迟加载模型"""
	global model
	if model is None:
	print("正在加载 Whisper 模型（CPU 模式）...")
	# 明確指定使用 CPU，compute_type="int8" 適合 CPU 運算
	model = WhisperModel(
	"dropbox-dash/faster-whisper-large-v3-turbo",
	device="cpu",
	compute_type="int8",
	cpu_threads=4, # 限制 CPU 線程數，避免佔用過多資源
	num_workers=1 # 減少並行工作數
	)
	print("模型加载完成！")
	return model

	@app.route('/')
	def index():
	return render_template('index.html')

	@app.route('/api/transcribe', methods=['POST'])
	def transcribe():
	try:
	# 檢查是否有檔案
	if 'audio' not in request.files:
	return jsonify({'error': '沒有上傳檔案'}), 400

	file = request.files['audio']

	if file.filename == '':
	return jsonify({'error': '沒有選擇檔案'}), 400

	if not allowed_file(file.filename):
	return jsonify({'error': f'不支援的檔案格式。支援的格式: {", ".join(ALLOWED_EXTENSIONS)}'}), 400

	# 獲取參數
	language = request.form.get('language', None)
	beam_size = int(request.form.get('beam_size', 5))
	to_traditional = request.form.get('to_traditional', 'true').lower() == 'true'

	# 儲存臨時檔案
	filename = secure_filename(file.filename)
	filepath = os.path.join(UPLOAD_FOLDER, filename)
	file.save(filepath)

	try:
	# 載入模型並轉錄
	whisper_model = load_model()

	# 使用 initial_prompt 引導輸出繁體中文
	initial_prompt = "以下是普通話的轉錄內容。" if language == 'zh' else None

	segments, info = whisper_model.transcribe(
	filepath,
	language=language if language else None,
	beam_size=beam_size,
	vad_filter=True,
	initial_prompt=initial_prompt
	)

	# 收集結果
	results = []
	full_text = ""

	for segment in segments:
	text = segment.text.strip()

	# 如果是中文且啟用繁體轉換
	if to_traditional and info.language == 'zh':
	text = convert_to_traditional(text)

	segment_data = {
	'start': round(segment.start, 2),
	'end': round(segment.end, 2),
	'text': text
	}
	results.append(segment_data)
	full_text += text + " "

	# 刪除臨時檔案
	os.remove(filepath)

	return jsonify({
	'success': True,
	'language': info.language,
	'duration': round(info.duration, 2),
	'full_text': full_text.strip(),
	'segments': results
	})

	except Exception as e:
	# 清理臨時檔案
	if os.path.exists(filepath):
	os.remove(filepath)
	raise e

	except Exception as e:
	return jsonify({'error': f'轉錄失敗: {str(e)}'}), 500

	@app.route('/api/health', methods=['GET'])
	def health():
	return jsonify({'status': 'ok', 'model_loaded': model is not None})

	if __name__ == '__main__':
	# 從環境變數讀取端口，預設為 7860（Hugging Face Spaces）
	port = int(os.environ.get('PORT', 7860))

	print("=" * 50)
	print("Whisper 語音轉文字服務")
	print("=" * 50)
	print(f"服務將在 http://0.0.0.0:{port} 啟動")
	print("首次轉錄時會自動下載模型，請耐心等待...")
	print("=" * 50)
	app.run(debug=False, host='0.0.0.0', port=port)