Spaces:

OpenEvals
/

open_benchmark_index

Running

App Files Files Community

Linker1907 commited on Oct 16

Commit

8628943

1 Parent(s): aef0678

init

Browse files

Files changed (2) hide show

app.py +302 -0
tasks_index.json +0 -0

app.py ADDED Viewed

	@@ -0,0 +1,302 @@

+"""
+Gradio dashboard to explore Lighteval tasks.
+Scans `src/lighteval/tasks/tasks` and `src/lighteval/tasks/multilingual/tasks`
+for module-level docstrings with this format:
+name: <task display name>
+dataset: <dataset id(s)>
+abstract: <free text>
+languages: <comma/newline separated language codes or names>
+tags: <comma/newline separated tags>
+paper: <url>
+This file stays outside the lighteval src tree, per request.
+"""
+import ast
+import json
+import os
+import re
+from collections import Counter
+from dataclasses import asdict, dataclass
+from typing import Dict, List, Optional, Tuple
+import gradio as gr
+REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
+TASK_DIRS = [
+    os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "tasks"),
+    os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "multilingual", "tasks"),
+]
+CACHE_PATH = os.path.join(REPO_ROOT, "benchmark_finder", "tasks_index.json")
+@dataclass
+class TaskDoc:
+    file_path: str
+    module: str
+    abstract: str
+    languages: List[str]
+    tags: List[str]
+    paper: Optional[str]
+    dataset: Optional[str]
+    name: Optional[str] = None
+def read_file_text(path: str) -> Optional[str]:
+    try:
+        with open(path, "r", encoding="utf-8") as f:
+            return f.read()
+    except Exception:
+        return None
+def parse_module_docstring(text: str) -> Optional[str]:
+    try:
+        mod = ast.parse(text)
+        return ast.get_docstring(mod)
+    except Exception:
+        # Fallback: naive regex for triple-quoted string at top
+        m = re.match(r"^\s*([\'\"])\1\1([\s\S]*?)\1\1\1", text)
+        return m.group(2).strip() if m else None
+def parse_sections(doc: str) -> Dict[str, str]:
+    # Very simple section parser keyed by lines ending with ':' on their own
+    # Expected keys: name, dataset, abstract, languages, tags, paper
+    out: Dict[str, str] = {"name": "", "dataset": "", "abstract": "", "languages": "", "tags": "", "paper": ""}
+    current_key: Optional[str] = None
+    for raw_line in doc.splitlines():
+        line = raw_line.rstrip()
+        if line.endswith(":") and line.strip().lower() in {"name:", "dataset:", "abstract:", "languages:", "tags:", "paper:"}:
+            current_key = line[:-1].strip().lower()
+            continue
+        if current_key is not None:
+            # Preserve paragraphs; we will normalize later
+            out[current_key] = (out[current_key] + ("\n" if out[current_key] else "") + line).strip()
+    return out
+def split_list_field(value: str) -> List[str]:
+    if not value:
+        return []
+    # Support comma and newline separated values
+    parts = re.split(r"[\n,]", value)
+    cleaned: List[str] = []
+    for p in parts:
+        token = p.strip()
+        if not token:
+            continue
+        cleaned.append(token)
+    return cleaned
+def discover_task_files() -> List[str]:
+    files: List[str] = []
+    for base in TASK_DIRS:
+        if not os.path.isdir(base):
+            continue
+        # Top-level python files in the directory
+        for name in os.listdir(base):
+            if name.endswith(".py"):
+                files.append(os.path.join(base, name))
+        # Also include subdirectory main.py files
+        for dirpath, dirnames, filenames in os.walk(base):
+            if dirpath == base:
+                continue
+            if "main.py" in filenames:
+                files.append(os.path.join(dirpath, "main.py"))
+    # Deduplicate while preserving order
+    seen: set = set()
+    unique_files: List[str] = []
+    for p in files:
+        if p in seen:
+            continue
+        seen.add(p)
+        unique_files.append(p)
+    return sorted(unique_files)
+def index_tasks() -> Tuple[List[TaskDoc], List[str], List[str]]:
+    docs: List[TaskDoc] = []
+    language_counts: Counter = Counter()
+    tag_set: set = set()
+    for path in discover_task_files():
+        text = read_file_text(path)
+        if not text:
+            continue
+        doc = parse_module_docstring(text)
+        if not doc:
+            continue
+        sections = parse_sections(doc)
+        abstract = sections.get("abstract", "").strip()
+        langs = [lang.lower() for lang in split_list_field(sections.get("languages", ""))]
+        tgs = [t.lower() for t in split_list_field(sections.get("tags", ""))]
+        paper = sections.get("paper", "").strip() or None
+        dataset = sections.get("dataset", "").strip() or None
+        name = sections.get("name", "").strip() or None
+        for lang in langs:
+            language_counts[lang] += 1
+        for t in tgs:
+            tag_set.add(t)
+        module = os.path.relpath(path, REPO_ROOT)
+        docs.append(TaskDoc(file_path=path, module=module, abstract=abstract, languages=langs, tags=tgs, paper=paper, dataset=dataset, name=name))
+    languages_sorted = [
+        lang for lang, _ in sorted(language_counts.items(), key=lambda kv: (-kv[1], kv[0]))
+    ]
+    tags_sorted = sorted(tag_set)
+    return docs, languages_sorted, tags_sorted
+def save_index(path: str, tasks: List[TaskDoc], langs: List[str], tags: List[str]) -> None:
+    data = {
+        "tasks": [asdict(t) for t in tasks],
+        "languages": list(langs),
+        "tags": list(tags),
+    }
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(data, f, ensure_ascii=False, indent=2)
+def load_index(path: str) -> Optional[Tuple[List[TaskDoc], List[str], List[str]]]:
+    if not os.path.exists(path):
+        return None
+    with open(path, "r", encoding="utf-8") as f:
+        data = json.load(f)
+    tasks = [TaskDoc(**t) for t in data.get("tasks", [])]
+    langs = list(data.get("languages", []))
+    tags = list(data.get("tags", []))
+    return tasks, langs, tags
+def build_and_cache_index() -> Tuple[List[TaskDoc], List[str], List[str]]:
+    tasks, langs, tags = index_tasks()
+    os.makedirs(os.path.dirname(CACHE_PATH), exist_ok=True)
+    save_index(CACHE_PATH, tasks, langs, tags)
+    return tasks, langs, tags
+#_loaded = load_index(CACHE_PATH)
+#if _loaded is None:
+ALL_TASKS, ALL_LANGS, ALL_TAGS = build_and_cache_index()
+# else:
+#     ALL_TASKS, ALL_LANGS, ALL_TAGS = _loaded
+TOP_LANGS = ALL_LANGS[:5]
+def filter_tasks(languages: List[str], tags: List[str], search: str) -> List[TaskDoc]:
+    selected_langs = [lang.lower() for lang in (languages or [])]
+    selected_tags = [t.lower() for t in (tags or [])]
+    search_lc = (search or "").strip().lower()
+    out: List[TaskDoc] = []
+    for td in ALL_TASKS:
+        if selected_langs and not any(lang in td.languages for lang in selected_langs):
+            continue
+        if selected_tags and not any(t in td.tags for t in selected_tags):
+            continue
+        if search_lc:
+            # Search module path or abstract or tags
+            hay = " ".join([td.module, td.abstract, ", ".join(td.tags)]).lower()
+            if search_lc not in hay:
+                continue
+        out.append(td)
+    return out
+def render_cards(tasks: List[TaskDoc]) -> str:
+    # Responsive grid of pretty cards; show all details without clicks
+    items: List[str] = []
+    for t in tasks:
+        parts = t.module.split("/")
+        base_no_ext = parts[-1].rsplit(".", 1)[0]
+        fallback_name = parts[-2] if base_no_ext == "main" and len(parts) >= 2 else base_no_ext
+        task_name = t.name or fallback_name
+        paper_html = f'<a href="{t.paper}" target="_blank">paper</a>' if t.paper else ""
+        tags_html = " ".join([f'<span class="chip">{tag}</span>' for tag in t.tags]) if t.tags else ""
+        langs_html = " ".join([f'<span class="chip chip-lang">{lang}</span>' for lang in t.languages]) if t.languages else ""
+        abstract_html = (t.abstract or "-")
+        dataset_links = []
+        if t.dataset:
+            for ds in [d.strip() for d in t.dataset.split(",") if d.strip()]:
+                dataset_links.append(f'<a class="dataset" href="https://huggingface.co/datasets/{ds}" target="_blank">{ds}</a>')
+        dataset_html = " ".join(dataset_links) if dataset_links else ""
+        items.append(
+            f"""
+            <div class=\"card\">\n  <div class=\"title\"><span class=\"title-text\">{task_name}</span> {dataset_html}</div>\n  <div class=\"chips\">{tags_html} {langs_html}</div>\n  <div class=\"abstract\">{abstract_html}</div>\n  <div class=\"links\">{paper_html}</div>\n</div>
+            """
+        )
+    style = """
+    <style>
+      .cards-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 14px; }
+      .card { border: 1px solid #dbeafe; border-radius: 12px; padding: 12px 14px; background: #ffffff; transition: box-shadow 120ms ease, border-color 120ms ease; }
+      .card:hover { box-shadow: 0 6px 20px rgba(2, 132, 199, 0.08); border-color: #60a5fa; }
+      .title-text { font-weight: 600; font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell; color: #1d4ed8; }
+      .dataset { margin-left: 8px; font-size: 12px; color: #0ea5e9; background: #ecfeff; padding: 2px 6px; border-radius: 6px; text-decoration: none; }
+      .chips { margin: 8px 0 0 0; }
+      .chip { display: inline-block; margin-right: 6px; margin-bottom: 4px; padding: 2px 8px; border-radius: 9999px; background: #eff6ff; font-size: 12px; color: #1d4ed8; }
+      .chip-lang { background: #eef2ff; color: #3730a3; }
+      .abstract { color: #334155; font-size: 14px; line-height: 1.35; margin-top: 6px; }
+      .links a { font-size: 12px; color: #2563eb; text-decoration: none; }
+    </style>
+    """
+    return style + "<div class=\"cards-grid\">" + "\n".join(items) + "</div>"
+def on_filter(languages: List[str], tags: List[str], search: str):
+    tasks = filter_tasks(languages, tags, search)
+    return render_cards(tasks)
+def on_toggle_language_choices(show_all: bool, selected_langs: List[str], tags: List[str], search: str):
+    choices = ALL_LANGS if show_all else TOP_LANGS
+    kept = [lang for lang in (selected_langs or []) if lang in choices]
+    tasks = filter_tasks(kept, tags, search)
+    return gr.update(choices=choices, value=kept), render_cards(tasks)
+def on_toggle_tags_visibility(show: bool, selected_tags: List[str], languages: List[str], search: str):
+    # Only toggle visibility; preserve current tag selections and keep them active in filtering
+    tags_value: List[str] = selected_tags or []
+    tasks = filter_tasks(languages, tags_value, search)
+    return gr.update(visible=show, value=tags_value), render_cards(tasks)
+with gr.Blocks(title="Lighteval Tasks Explorer") as demo:
+    gr.Markdown("""
+    ### lighteval tasks explorer
+    Explore tasks by language and tag. Use the search to quickly narrow down.
+    """)
+    # Search on top
+    search_tb = gr.Textbox(label="Search", placeholder="Search in module path, tags, abstract…")
+    # Filters on two separate stacked levels
+    gr.Markdown("**Languages**")
+    show_all_langs = gr.Checkbox(label="Show all languages", value=False)
+    lang_dd = gr.CheckboxGroup(choices=TOP_LANGS)
+    gr.Markdown("**Tags**")
+    show_tags_filters = gr.Checkbox(label="Show tag checkboxes", value=False)
+    tag_dd = gr.CheckboxGroup(choices=ALL_TAGS, visible=False)
+    cards = gr.HTML()
+    # Toggle to expand/collapse language choices
+    show_all_langs.change(on_toggle_language_choices, inputs=[show_all_langs, lang_dd, tag_dd, search_tb], outputs=[lang_dd, cards])
+    # Toggle to show/hide tag filters (clears selections when hiding)
+    show_tags_filters.change(on_toggle_tags_visibility, inputs=[show_tags_filters, tag_dd, lang_dd, search_tb], outputs=[tag_dd, cards])
+    # Live filtering as user interacts
+    lang_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
+    tag_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
+    search_tb.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
+    # Initial load: display all tasks
+    cards.value = render_cards(ALL_TASKS)
+if __name__ == "__main__":
+    # Run with `python benchmark_finder/app.py`
+    demo.launch()

tasks_index.json ADDED Viewed

The diff for this file is too large to render. See raw diff