blackhole1218 commited on
Commit
1b5f64b
·
1 Parent(s): 8b58140

Fix: is_english_text -> is_korean_text (30% threshold)

Browse files
Files changed (1) hide show
  1. app.py +31 -3
app.py CHANGED
@@ -69,6 +69,34 @@ import functools
69
  import time # Added for potential retries
70
 
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  def get_client_ip():
73
  """Get the client's IP address, handling proxies and load balancers."""
74
  # Check for forwarded headers first (common with reverse proxies)
@@ -555,9 +583,9 @@ def generate_tts():
555
  if not text or len(text) > 1000:
556
  return jsonify({"error": "Invalid or too long text"}), 400
557
 
558
- # Check if text is in English
559
- if not is_english_text(text):
560
- return jsonify({"error": "Only English language text is supported for now. Please provide text in English. A multilingual Arena is coming soon!"}), 400
561
 
562
  # Check if sentence has already been consumed
563
  if is_sentence_consumed(text):
 
69
  import time # Added for potential retries
70
 
71
 
72
+ def is_korean_text(text: str, threshold: float = 0.3) -> bool:
73
+ """
74
+ Check if text contains sufficient Korean characters.
75
+ Returns True if Korean character ratio >= threshold (default 30%).
76
+ Also returns True for empty text or text with mostly numbers/punctuation.
77
+ """
78
+ if not text:
79
+ return True
80
+
81
+ korean_count = 0
82
+ letter_count = 0
83
+
84
+ for char in text:
85
+ # Check if it's a letter (Korean, English, etc.)
86
+ if char.isalpha():
87
+ letter_count += 1
88
+ # Korean Unicode ranges: Hangul Syllables, Hangul Jamo, Hangul Compatibility Jamo
89
+ if '\uAC00' <= char <= '\uD7AF' or '\u1100' <= char <= '\u11FF' or '\u3130' <= char <= '\u318F':
90
+ korean_count += 1
91
+
92
+ # If no letters, allow (might be numbers, punctuation, etc.)
93
+ if letter_count == 0:
94
+ return True
95
+
96
+ korean_ratio = korean_count / letter_count
97
+ return korean_ratio >= threshold
98
+
99
+
100
  def get_client_ip():
101
  """Get the client's IP address, handling proxies and load balancers."""
102
  # Check for forwarded headers first (common with reverse proxies)
 
583
  if not text or len(text) > 1000:
584
  return jsonify({"error": "Invalid or too long text"}), 400
585
 
586
+ # Check if text contains Korean (at least 30% Korean characters)
587
+ if not is_korean_text(text):
588
+ return jsonify({"error": "한국어 텍스트를 입력해주세요. 최소 30% 이상의 한국어가 포함되어야 합니다."}), 400
589
 
590
  # Check if sentence has already been consumed
591
  if is_sentence_consumed(text):