Spaces:
Running
Running
Commit
·
8628943
1
Parent(s):
aef0678
init
Browse files- app.py +302 -0
- tasks_index.json +0 -0
app.py
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Gradio dashboard to explore Lighteval tasks.
|
| 3 |
+
|
| 4 |
+
Scans `src/lighteval/tasks/tasks` and `src/lighteval/tasks/multilingual/tasks`
|
| 5 |
+
for module-level docstrings with this format:
|
| 6 |
+
|
| 7 |
+
name: <task display name>
|
| 8 |
+
dataset: <dataset id(s)>
|
| 9 |
+
abstract: <free text>
|
| 10 |
+
languages: <comma/newline separated language codes or names>
|
| 11 |
+
tags: <comma/newline separated tags>
|
| 12 |
+
paper: <url>
|
| 13 |
+
|
| 14 |
+
This file stays outside the lighteval src tree, per request.
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
import ast
|
| 18 |
+
import json
|
| 19 |
+
import os
|
| 20 |
+
import re
|
| 21 |
+
from collections import Counter
|
| 22 |
+
from dataclasses import asdict, dataclass
|
| 23 |
+
from typing import Dict, List, Optional, Tuple
|
| 24 |
+
|
| 25 |
+
import gradio as gr
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
|
| 29 |
+
TASK_DIRS = [
|
| 30 |
+
os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "tasks"),
|
| 31 |
+
os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "multilingual", "tasks"),
|
| 32 |
+
]
|
| 33 |
+
CACHE_PATH = os.path.join(REPO_ROOT, "benchmark_finder", "tasks_index.json")
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
@dataclass
|
| 37 |
+
class TaskDoc:
|
| 38 |
+
file_path: str
|
| 39 |
+
module: str
|
| 40 |
+
abstract: str
|
| 41 |
+
languages: List[str]
|
| 42 |
+
tags: List[str]
|
| 43 |
+
paper: Optional[str]
|
| 44 |
+
dataset: Optional[str]
|
| 45 |
+
name: Optional[str] = None
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def read_file_text(path: str) -> Optional[str]:
|
| 49 |
+
try:
|
| 50 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 51 |
+
return f.read()
|
| 52 |
+
except Exception:
|
| 53 |
+
return None
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
def parse_module_docstring(text: str) -> Optional[str]:
|
| 57 |
+
try:
|
| 58 |
+
mod = ast.parse(text)
|
| 59 |
+
return ast.get_docstring(mod)
|
| 60 |
+
except Exception:
|
| 61 |
+
# Fallback: naive regex for triple-quoted string at top
|
| 62 |
+
m = re.match(r"^\s*([\'\"])\1\1([\s\S]*?)\1\1\1", text)
|
| 63 |
+
return m.group(2).strip() if m else None
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def parse_sections(doc: str) -> Dict[str, str]:
|
| 67 |
+
# Very simple section parser keyed by lines ending with ':' on their own
|
| 68 |
+
# Expected keys: name, dataset, abstract, languages, tags, paper
|
| 69 |
+
out: Dict[str, str] = {"name": "", "dataset": "", "abstract": "", "languages": "", "tags": "", "paper": ""}
|
| 70 |
+
current_key: Optional[str] = None
|
| 71 |
+
for raw_line in doc.splitlines():
|
| 72 |
+
line = raw_line.rstrip()
|
| 73 |
+
if line.endswith(":") and line.strip().lower() in {"name:", "dataset:", "abstract:", "languages:", "tags:", "paper:"}:
|
| 74 |
+
current_key = line[:-1].strip().lower()
|
| 75 |
+
continue
|
| 76 |
+
if current_key is not None:
|
| 77 |
+
# Preserve paragraphs; we will normalize later
|
| 78 |
+
out[current_key] = (out[current_key] + ("\n" if out[current_key] else "") + line).strip()
|
| 79 |
+
return out
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def split_list_field(value: str) -> List[str]:
|
| 83 |
+
if not value:
|
| 84 |
+
return []
|
| 85 |
+
# Support comma and newline separated values
|
| 86 |
+
parts = re.split(r"[\n,]", value)
|
| 87 |
+
cleaned: List[str] = []
|
| 88 |
+
for p in parts:
|
| 89 |
+
token = p.strip()
|
| 90 |
+
if not token:
|
| 91 |
+
continue
|
| 92 |
+
cleaned.append(token)
|
| 93 |
+
return cleaned
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def discover_task_files() -> List[str]:
|
| 97 |
+
files: List[str] = []
|
| 98 |
+
for base in TASK_DIRS:
|
| 99 |
+
if not os.path.isdir(base):
|
| 100 |
+
continue
|
| 101 |
+
# Top-level python files in the directory
|
| 102 |
+
for name in os.listdir(base):
|
| 103 |
+
if name.endswith(".py"):
|
| 104 |
+
files.append(os.path.join(base, name))
|
| 105 |
+
# Also include subdirectory main.py files
|
| 106 |
+
for dirpath, dirnames, filenames in os.walk(base):
|
| 107 |
+
if dirpath == base:
|
| 108 |
+
continue
|
| 109 |
+
if "main.py" in filenames:
|
| 110 |
+
files.append(os.path.join(dirpath, "main.py"))
|
| 111 |
+
# Deduplicate while preserving order
|
| 112 |
+
seen: set = set()
|
| 113 |
+
unique_files: List[str] = []
|
| 114 |
+
for p in files:
|
| 115 |
+
if p in seen:
|
| 116 |
+
continue
|
| 117 |
+
seen.add(p)
|
| 118 |
+
unique_files.append(p)
|
| 119 |
+
return sorted(unique_files)
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
def index_tasks() -> Tuple[List[TaskDoc], List[str], List[str]]:
|
| 123 |
+
docs: List[TaskDoc] = []
|
| 124 |
+
language_counts: Counter = Counter()
|
| 125 |
+
tag_set: set = set()
|
| 126 |
+
for path in discover_task_files():
|
| 127 |
+
text = read_file_text(path)
|
| 128 |
+
if not text:
|
| 129 |
+
continue
|
| 130 |
+
doc = parse_module_docstring(text)
|
| 131 |
+
if not doc:
|
| 132 |
+
continue
|
| 133 |
+
sections = parse_sections(doc)
|
| 134 |
+
abstract = sections.get("abstract", "").strip()
|
| 135 |
+
langs = [lang.lower() for lang in split_list_field(sections.get("languages", ""))]
|
| 136 |
+
tgs = [t.lower() for t in split_list_field(sections.get("tags", ""))]
|
| 137 |
+
paper = sections.get("paper", "").strip() or None
|
| 138 |
+
dataset = sections.get("dataset", "").strip() or None
|
| 139 |
+
name = sections.get("name", "").strip() or None
|
| 140 |
+
for lang in langs:
|
| 141 |
+
language_counts[lang] += 1
|
| 142 |
+
for t in tgs:
|
| 143 |
+
tag_set.add(t)
|
| 144 |
+
module = os.path.relpath(path, REPO_ROOT)
|
| 145 |
+
docs.append(TaskDoc(file_path=path, module=module, abstract=abstract, languages=langs, tags=tgs, paper=paper, dataset=dataset, name=name))
|
| 146 |
+
languages_sorted = [
|
| 147 |
+
lang for lang, _ in sorted(language_counts.items(), key=lambda kv: (-kv[1], kv[0]))
|
| 148 |
+
]
|
| 149 |
+
tags_sorted = sorted(tag_set)
|
| 150 |
+
return docs, languages_sorted, tags_sorted
|
| 151 |
+
|
| 152 |
+
|
| 153 |
+
def save_index(path: str, tasks: List[TaskDoc], langs: List[str], tags: List[str]) -> None:
|
| 154 |
+
data = {
|
| 155 |
+
"tasks": [asdict(t) for t in tasks],
|
| 156 |
+
"languages": list(langs),
|
| 157 |
+
"tags": list(tags),
|
| 158 |
+
}
|
| 159 |
+
with open(path, "w", encoding="utf-8") as f:
|
| 160 |
+
json.dump(data, f, ensure_ascii=False, indent=2)
|
| 161 |
+
|
| 162 |
+
|
| 163 |
+
def load_index(path: str) -> Optional[Tuple[List[TaskDoc], List[str], List[str]]]:
|
| 164 |
+
if not os.path.exists(path):
|
| 165 |
+
return None
|
| 166 |
+
with open(path, "r", encoding="utf-8") as f:
|
| 167 |
+
data = json.load(f)
|
| 168 |
+
tasks = [TaskDoc(**t) for t in data.get("tasks", [])]
|
| 169 |
+
langs = list(data.get("languages", []))
|
| 170 |
+
tags = list(data.get("tags", []))
|
| 171 |
+
return tasks, langs, tags
|
| 172 |
+
|
| 173 |
+
|
| 174 |
+
def build_and_cache_index() -> Tuple[List[TaskDoc], List[str], List[str]]:
|
| 175 |
+
tasks, langs, tags = index_tasks()
|
| 176 |
+
os.makedirs(os.path.dirname(CACHE_PATH), exist_ok=True)
|
| 177 |
+
save_index(CACHE_PATH, tasks, langs, tags)
|
| 178 |
+
return tasks, langs, tags
|
| 179 |
+
|
| 180 |
+
|
| 181 |
+
#_loaded = load_index(CACHE_PATH)
|
| 182 |
+
#if _loaded is None:
|
| 183 |
+
ALL_TASKS, ALL_LANGS, ALL_TAGS = build_and_cache_index()
|
| 184 |
+
# else:
|
| 185 |
+
# ALL_TASKS, ALL_LANGS, ALL_TAGS = _loaded
|
| 186 |
+
TOP_LANGS = ALL_LANGS[:5]
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
def filter_tasks(languages: List[str], tags: List[str], search: str) -> List[TaskDoc]:
|
| 190 |
+
selected_langs = [lang.lower() for lang in (languages or [])]
|
| 191 |
+
selected_tags = [t.lower() for t in (tags or [])]
|
| 192 |
+
search_lc = (search or "").strip().lower()
|
| 193 |
+
out: List[TaskDoc] = []
|
| 194 |
+
for td in ALL_TASKS:
|
| 195 |
+
if selected_langs and not any(lang in td.languages for lang in selected_langs):
|
| 196 |
+
continue
|
| 197 |
+
if selected_tags and not any(t in td.tags for t in selected_tags):
|
| 198 |
+
continue
|
| 199 |
+
if search_lc:
|
| 200 |
+
# Search module path or abstract or tags
|
| 201 |
+
hay = " ".join([td.module, td.abstract, ", ".join(td.tags)]).lower()
|
| 202 |
+
if search_lc not in hay:
|
| 203 |
+
continue
|
| 204 |
+
out.append(td)
|
| 205 |
+
return out
|
| 206 |
+
|
| 207 |
+
|
| 208 |
+
def render_cards(tasks: List[TaskDoc]) -> str:
|
| 209 |
+
# Responsive grid of pretty cards; show all details without clicks
|
| 210 |
+
items: List[str] = []
|
| 211 |
+
for t in tasks:
|
| 212 |
+
parts = t.module.split("/")
|
| 213 |
+
base_no_ext = parts[-1].rsplit(".", 1)[0]
|
| 214 |
+
fallback_name = parts[-2] if base_no_ext == "main" and len(parts) >= 2 else base_no_ext
|
| 215 |
+
task_name = t.name or fallback_name
|
| 216 |
+
paper_html = f'<a href="{t.paper}" target="_blank">paper</a>' if t.paper else ""
|
| 217 |
+
tags_html = " ".join([f'<span class="chip">{tag}</span>' for tag in t.tags]) if t.tags else ""
|
| 218 |
+
langs_html = " ".join([f'<span class="chip chip-lang">{lang}</span>' for lang in t.languages]) if t.languages else ""
|
| 219 |
+
abstract_html = (t.abstract or "-")
|
| 220 |
+
dataset_links = []
|
| 221 |
+
if t.dataset:
|
| 222 |
+
for ds in [d.strip() for d in t.dataset.split(",") if d.strip()]:
|
| 223 |
+
dataset_links.append(f'<a class="dataset" href="https://huggingface.co/datasets/{ds}" target="_blank">{ds}</a>')
|
| 224 |
+
dataset_html = " ".join(dataset_links) if dataset_links else ""
|
| 225 |
+
items.append(
|
| 226 |
+
f"""
|
| 227 |
+
<div class=\"card\">\n <div class=\"title\"><span class=\"title-text\">{task_name}</span> {dataset_html}</div>\n <div class=\"chips\">{tags_html} {langs_html}</div>\n <div class=\"abstract\">{abstract_html}</div>\n <div class=\"links\">{paper_html}</div>\n</div>
|
| 228 |
+
"""
|
| 229 |
+
)
|
| 230 |
+
style = """
|
| 231 |
+
<style>
|
| 232 |
+
.cards-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 14px; }
|
| 233 |
+
.card { border: 1px solid #dbeafe; border-radius: 12px; padding: 12px 14px; background: #ffffff; transition: box-shadow 120ms ease, border-color 120ms ease; }
|
| 234 |
+
.card:hover { box-shadow: 0 6px 20px rgba(2, 132, 199, 0.08); border-color: #60a5fa; }
|
| 235 |
+
.title-text { font-weight: 600; font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell; color: #1d4ed8; }
|
| 236 |
+
.dataset { margin-left: 8px; font-size: 12px; color: #0ea5e9; background: #ecfeff; padding: 2px 6px; border-radius: 6px; text-decoration: none; }
|
| 237 |
+
.chips { margin: 8px 0 0 0; }
|
| 238 |
+
.chip { display: inline-block; margin-right: 6px; margin-bottom: 4px; padding: 2px 8px; border-radius: 9999px; background: #eff6ff; font-size: 12px; color: #1d4ed8; }
|
| 239 |
+
.chip-lang { background: #eef2ff; color: #3730a3; }
|
| 240 |
+
.abstract { color: #334155; font-size: 14px; line-height: 1.35; margin-top: 6px; }
|
| 241 |
+
.links a { font-size: 12px; color: #2563eb; text-decoration: none; }
|
| 242 |
+
</style>
|
| 243 |
+
"""
|
| 244 |
+
return style + "<div class=\"cards-grid\">" + "\n".join(items) + "</div>"
|
| 245 |
+
|
| 246 |
+
|
| 247 |
+
def on_filter(languages: List[str], tags: List[str], search: str):
|
| 248 |
+
tasks = filter_tasks(languages, tags, search)
|
| 249 |
+
return render_cards(tasks)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def on_toggle_language_choices(show_all: bool, selected_langs: List[str], tags: List[str], search: str):
|
| 253 |
+
choices = ALL_LANGS if show_all else TOP_LANGS
|
| 254 |
+
kept = [lang for lang in (selected_langs or []) if lang in choices]
|
| 255 |
+
tasks = filter_tasks(kept, tags, search)
|
| 256 |
+
return gr.update(choices=choices, value=kept), render_cards(tasks)
|
| 257 |
+
|
| 258 |
+
|
| 259 |
+
def on_toggle_tags_visibility(show: bool, selected_tags: List[str], languages: List[str], search: str):
|
| 260 |
+
# Only toggle visibility; preserve current tag selections and keep them active in filtering
|
| 261 |
+
tags_value: List[str] = selected_tags or []
|
| 262 |
+
tasks = filter_tasks(languages, tags_value, search)
|
| 263 |
+
return gr.update(visible=show, value=tags_value), render_cards(tasks)
|
| 264 |
+
|
| 265 |
+
|
| 266 |
+
with gr.Blocks(title="Lighteval Tasks Explorer") as demo:
|
| 267 |
+
gr.Markdown("""
|
| 268 |
+
### lighteval tasks explorer
|
| 269 |
+
Explore tasks by language and tag. Use the search to quickly narrow down.
|
| 270 |
+
""")
|
| 271 |
+
|
| 272 |
+
# Search on top
|
| 273 |
+
search_tb = gr.Textbox(label="Search", placeholder="Search in module path, tags, abstract…")
|
| 274 |
+
|
| 275 |
+
# Filters on two separate stacked levels
|
| 276 |
+
gr.Markdown("**Languages**")
|
| 277 |
+
show_all_langs = gr.Checkbox(label="Show all languages", value=False)
|
| 278 |
+
lang_dd = gr.CheckboxGroup(choices=TOP_LANGS)
|
| 279 |
+
|
| 280 |
+
gr.Markdown("**Tags**")
|
| 281 |
+
show_tags_filters = gr.Checkbox(label="Show tag checkboxes", value=False)
|
| 282 |
+
tag_dd = gr.CheckboxGroup(choices=ALL_TAGS, visible=False)
|
| 283 |
+
cards = gr.HTML()
|
| 284 |
+
|
| 285 |
+
# Toggle to expand/collapse language choices
|
| 286 |
+
show_all_langs.change(on_toggle_language_choices, inputs=[show_all_langs, lang_dd, tag_dd, search_tb], outputs=[lang_dd, cards])
|
| 287 |
+
|
| 288 |
+
# Toggle to show/hide tag filters (clears selections when hiding)
|
| 289 |
+
show_tags_filters.change(on_toggle_tags_visibility, inputs=[show_tags_filters, tag_dd, lang_dd, search_tb], outputs=[tag_dd, cards])
|
| 290 |
+
|
| 291 |
+
# Live filtering as user interacts
|
| 292 |
+
lang_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
|
| 293 |
+
tag_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
|
| 294 |
+
search_tb.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
|
| 295 |
+
|
| 296 |
+
# Initial load: display all tasks
|
| 297 |
+
cards.value = render_cards(ALL_TASKS)
|
| 298 |
+
|
| 299 |
+
|
| 300 |
+
if __name__ == "__main__":
|
| 301 |
+
# Run with `python benchmark_finder/app.py`
|
| 302 |
+
demo.launch()
|
tasks_index.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|