Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import torch | |
| import numpy as np | |
| import re | |
| from turkish.deasciifier import Deasciifier | |
| # Model ve tokenizer initialization | |
| tokenizer = AutoTokenizer.from_pretrained("TURKCELL/bert-offensive-lang-detection-tr") | |
| model = AutoModelForSequenceClassification.from_pretrained("TURKCELL/bert-offensive-lang-detection-tr") | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| model.to(device) | |
| def deasciifier(text): | |
| deasciifier = Deasciifier(text) | |
| return deasciifier.convert_to_turkish() | |
| def remove_circumflex(text): | |
| circumflex_map = { | |
| 'â': 'a', | |
| 'î': 'i', | |
| 'û': 'u', | |
| 'ô': 'o', | |
| 'Â': 'A', | |
| 'Î': 'I', | |
| 'Û': 'U', | |
| 'Ô': 'O' | |
| } | |
| return ''.join(circumflex_map.get(c, c) for c in text) | |
| def turkish_lower(text): | |
| turkish_map = { | |
| 'I': 'ı', | |
| 'İ': 'i', | |
| 'Ç': 'ç', | |
| 'Ş': 'ş', | |
| 'Ğ': 'ğ', | |
| 'Ü': 'ü', | |
| 'Ö': 'ö' | |
| } | |
| return ''.join(turkish_map.get(c, c).lower() for c in text) | |
| def clean_text(text): | |
| # Metindeki şapkalı harfleri kaldırma | |
| text = remove_circumflex(text) | |
| # Metni küçük harfe dönüştürme | |
| text = turkish_lower(text) | |
| # deasciifier | |
| text = deasciifier(text) | |
| # Kullanıcı adlarını kaldırma | |
| text = re.sub(r"@\S*", " ", text) | |
| # Hashtag'leri kaldırma | |
| text = re.sub(r'#\S+', ' ', text) | |
| # URL'leri kaldırma | |
| text = re.sub(r"http\S+|www\S+|https\S+", ' ', text, flags=re.MULTILINE) | |
| # Noktalama işaretlerini ve metin tabanlı emojileri kaldırma | |
| text = re.sub(r'[^\w\s]|(:\)|:\(|:D|:P|:o|:O|;\))', ' ', text) | |
| # Emojileri kaldırma | |
| emoji_pattern = re.compile("[" | |
| u"\U0001F600-\U0001F64F" # emoticons | |
| u"\U0001F300-\U0001F5FF" # symbols & pictographs | |
| u"\U0001F680-\U0001F6FF" # transport & map symbols | |
| u"\U0001F1E0-\U0001F1FF" # flags (iOS) | |
| u"\U00002702-\U000027B0" | |
| u"\U000024C2-\U0001F251" | |
| "]+", flags=re.UNICODE) | |
| text = emoji_pattern.sub(r' ', text) | |
| # Birden fazla boşluğu tek boşlukla değiştirme | |
| text = re.sub(r'\s+', ' ', text).strip() | |
| return text | |
| def is_offensive(sentence): | |
| normalize_text = clean_text(sentence) | |
| test_sample = tokenizer(normalize_text, padding=True, truncation=True, max_length=256, return_tensors='pt') | |
| test_sample = {k: v.to(device) for k, v in test_sample.items()} | |
| output = model(**test_sample) | |
| y_pred = np.argmax(output.logits.detach().cpu().numpy(), axis=1) | |
| d = {0: 'non-offensive', 1: 'offensive'} | |
| return d[y_pred[0]] | |
| iface = gr.Interface( | |
| fn=is_offensive, | |
| inputs=gr.Textbox(lines=2, placeholder="Enter sentence here..."), | |
| outputs="text", | |
| title="Offensive Language Detection", | |
| description="Offensive language detection for Turkish" | |
| ) | |
| iface.launch() | |