|
|
|
|
|
"""Multilingual summarization & title tests (LLM heavy by default). |
|
|
|
|
|
Set VOXSUM_SKIP_LLM_TESTS=1 to skip these tests (mocked LLM in conftest). |
|
|
Optionally set VOXSUM_GGUF_MODEL to force a specific GGUF model. |
|
|
""" |
|
|
from __future__ import annotations |
|
|
|
|
|
import os |
|
|
import sys |
|
|
import pytest |
|
|
from pathlib import Path |
|
|
|
|
|
if os.getenv("VOXSUM_SKIP_LLM_TESTS") == "1": |
|
|
pytest.skip("LLM tests skipped (unset VOXSUM_SKIP_LLM_TESTS to run)", allow_module_level=True) |
|
|
|
|
|
|
|
|
ROOT = Path(__file__).resolve().parent.parent |
|
|
if str(ROOT) not in sys.path: |
|
|
sys.path.insert(0, str(ROOT)) |
|
|
|
|
|
from src.summarization import summarize_transcript, generate_title |
|
|
from src.utils import available_gguf_llms |
|
|
|
|
|
|
|
|
def _select_model(): |
|
|
env_choice = os.getenv("VOXSUM_GGUF_MODEL") |
|
|
if env_choice and env_choice in available_gguf_llms: |
|
|
return env_choice |
|
|
for cand in ["Gemma-3-270M", "Gemma-3-3N-E2B", "Gemma-3-3N-E4B", "Gemma-3-1B"]: |
|
|
if cand in available_gguf_llms: |
|
|
return cand |
|
|
return next(iter(available_gguf_llms)) |
|
|
|
|
|
|
|
|
|
|
|
TEST_TRANSCRIPTS = { |
|
|
"english": """ |
|
|
Hello everyone, today we're going to discuss artificial intelligence and its impact on modern society. |
|
|
AI has become increasingly important in our daily lives, from voice assistants like Siri and Alexa, |
|
|
to recommendation systems on Netflix and YouTube. The technology is advancing rapidly, with machine |
|
|
learning algorithms becoming more sophisticated every day. However, we must also consider the ethical |
|
|
implications of AI development, including privacy concerns, job displacement, and the potential for bias |
|
|
in automated decision-making systems. It's crucial that we develop AI responsibly to ensure it benefits |
|
|
all of humanity rather than just a select few. |
|
|
""", |
|
|
"french": """ |
|
|
Bonjour à tous, aujourd'hui nous allons discuter de l'intelligence artificielle et de son impact sur la société moderne. |
|
|
L'IA est devenue de plus en plus importante dans notre vie quotidienne, des assistants vocaux comme Siri et Alexa, |
|
|
aux systèmes de recommandation sur Netflix et YouTube. La technologie progresse rapidement, avec des algorithmes |
|
|
d'apprentissage automatique devenant plus sophistiqués chaque jour. Cependant, nous devons également considérer |
|
|
les implications éthiques du développement de l'IA, y compris les préoccupations de confidentialité, le déplacement |
|
|
d'emplois, et le potentiel de biais dans les systèmes de prise de décision automatisée. Il est crucial que nous |
|
|
développions l'IA de manière responsable pour assurer qu'elle bénéficie à toute l'humanité plutôt qu'à une élite. |
|
|
""", |
|
|
} |
|
|
|
|
|
|
|
|
def test_multilingual_summarization(): |
|
|
model_name = _select_model() |
|
|
for language, transcript in TEST_TRANSCRIPTS.items(): |
|
|
parts = list(summarize_transcript(transcript, model_name, "Summarize this transcript")) |
|
|
summary = "".join(parts) |
|
|
assert summary, f"Empty summary for {language}" |
|
|
|
|
|
|
|
|
def test_language_consistency(): |
|
|
model_name = _select_model() |
|
|
for language, transcript in TEST_TRANSCRIPTS.items(): |
|
|
title = generate_title(transcript, model_name) |
|
|
parts = list(summarize_transcript(transcript, model_name, "Summarize this transcript")) |
|
|
summary = "".join(parts) |
|
|
assert title and summary |
|
|
assert len(title) < 120 |
|
|
|