Spaces:
Sleeping
Sleeping
Commit
·
1b5f64b
1
Parent(s):
8b58140
Fix: is_english_text -> is_korean_text (30% threshold)
Browse files
app.py
CHANGED
|
@@ -69,6 +69,34 @@ import functools
|
|
| 69 |
import time # Added for potential retries
|
| 70 |
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
def get_client_ip():
|
| 73 |
"""Get the client's IP address, handling proxies and load balancers."""
|
| 74 |
# Check for forwarded headers first (common with reverse proxies)
|
|
@@ -555,9 +583,9 @@ def generate_tts():
|
|
| 555 |
if not text or len(text) > 1000:
|
| 556 |
return jsonify({"error": "Invalid or too long text"}), 400
|
| 557 |
|
| 558 |
-
# Check if text
|
| 559 |
-
if not
|
| 560 |
-
return jsonify({"error": "
|
| 561 |
|
| 562 |
# Check if sentence has already been consumed
|
| 563 |
if is_sentence_consumed(text):
|
|
|
|
| 69 |
import time # Added for potential retries
|
| 70 |
|
| 71 |
|
| 72 |
+
def is_korean_text(text: str, threshold: float = 0.3) -> bool:
|
| 73 |
+
"""
|
| 74 |
+
Check if text contains sufficient Korean characters.
|
| 75 |
+
Returns True if Korean character ratio >= threshold (default 30%).
|
| 76 |
+
Also returns True for empty text or text with mostly numbers/punctuation.
|
| 77 |
+
"""
|
| 78 |
+
if not text:
|
| 79 |
+
return True
|
| 80 |
+
|
| 81 |
+
korean_count = 0
|
| 82 |
+
letter_count = 0
|
| 83 |
+
|
| 84 |
+
for char in text:
|
| 85 |
+
# Check if it's a letter (Korean, English, etc.)
|
| 86 |
+
if char.isalpha():
|
| 87 |
+
letter_count += 1
|
| 88 |
+
# Korean Unicode ranges: Hangul Syllables, Hangul Jamo, Hangul Compatibility Jamo
|
| 89 |
+
if '\uAC00' <= char <= '\uD7AF' or '\u1100' <= char <= '\u11FF' or '\u3130' <= char <= '\u318F':
|
| 90 |
+
korean_count += 1
|
| 91 |
+
|
| 92 |
+
# If no letters, allow (might be numbers, punctuation, etc.)
|
| 93 |
+
if letter_count == 0:
|
| 94 |
+
return True
|
| 95 |
+
|
| 96 |
+
korean_ratio = korean_count / letter_count
|
| 97 |
+
return korean_ratio >= threshold
|
| 98 |
+
|
| 99 |
+
|
| 100 |
def get_client_ip():
|
| 101 |
"""Get the client's IP address, handling proxies and load balancers."""
|
| 102 |
# Check for forwarded headers first (common with reverse proxies)
|
|
|
|
| 583 |
if not text or len(text) > 1000:
|
| 584 |
return jsonify({"error": "Invalid or too long text"}), 400
|
| 585 |
|
| 586 |
+
# Check if text contains Korean (at least 30% Korean characters)
|
| 587 |
+
if not is_korean_text(text):
|
| 588 |
+
return jsonify({"error": "한국어 텍스트를 입력해주세요. 최소 30% 이상의 한국어가 포함되어야 합니다."}), 400
|
| 589 |
|
| 590 |
# Check if sentence has already been consumed
|
| 591 |
if is_sentence_consumed(text):
|