Spaces:
Running
Running
Commit
·
290947d
1
Parent(s):
b63b8ed
init
Browse files
app.py
CHANGED
|
@@ -30,7 +30,8 @@ TASK_DIRS = [
|
|
| 30 |
os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "tasks"),
|
| 31 |
os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "multilingual", "tasks"),
|
| 32 |
]
|
| 33 |
-
|
|
|
|
| 34 |
|
| 35 |
|
| 36 |
@dataclass
|
|
@@ -156,6 +157,7 @@ def save_index(path: str, tasks: List[TaskDoc], langs: List[str], tags: List[str
|
|
| 156 |
"languages": list(langs),
|
| 157 |
"tags": list(tags),
|
| 158 |
}
|
|
|
|
| 159 |
with open(path, "w", encoding="utf-8") as f:
|
| 160 |
json.dump(data, f, ensure_ascii=False, indent=2)
|
| 161 |
|
|
@@ -173,7 +175,6 @@ def load_index(path: str) -> Optional[Tuple[List[TaskDoc], List[str], List[str]]
|
|
| 173 |
|
| 174 |
def build_and_cache_index() -> Tuple[List[TaskDoc], List[str], List[str]]:
|
| 175 |
tasks, langs, tags = index_tasks()
|
| 176 |
-
os.makedirs(os.path.dirname(CACHE_PATH), exist_ok=True)
|
| 177 |
save_index(CACHE_PATH, tasks, langs, tags)
|
| 178 |
return tasks, langs, tags
|
| 179 |
|
|
@@ -186,7 +187,7 @@ else:
|
|
| 186 |
print("Loading index from cache...")
|
| 187 |
ALL_TASKS, ALL_LANGS, ALL_TAGS = _loaded
|
| 188 |
print(f"Loaded {len(ALL_TASKS)} tasks from cache")
|
| 189 |
-
TOP_LANGS = ALL_LANGS[:
|
| 190 |
|
| 191 |
|
| 192 |
def filter_tasks(languages: List[str], tags: List[str], search: str) -> List[TaskDoc]:
|
|
@@ -200,8 +201,8 @@ def filter_tasks(languages: List[str], tags: List[str], search: str) -> List[Tas
|
|
| 200 |
if selected_tags and not any(t in td.tags for t in selected_tags):
|
| 201 |
continue
|
| 202 |
if search_lc:
|
| 203 |
-
# Search module path
|
| 204 |
-
hay = " ".join([td.module, td.abstract, ", ".join(td.tags)]).lower()
|
| 205 |
if search_lc not in hay:
|
| 206 |
continue
|
| 207 |
out.append(td)
|
|
@@ -212,36 +213,101 @@ def render_cards(tasks: List[TaskDoc]) -> str:
|
|
| 212 |
# Responsive grid of pretty cards; show all details without clicks
|
| 213 |
items: List[str] = []
|
| 214 |
for t in tasks:
|
| 215 |
-
parts = t.module.split("/")
|
| 216 |
base_no_ext = parts[-1].rsplit(".", 1)[0]
|
| 217 |
fallback_name = parts[-2] if base_no_ext == "main" and len(parts) >= 2 else base_no_ext
|
| 218 |
-
task_name = t.name or fallback_name
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
dataset_links = []
|
| 224 |
if t.dataset:
|
| 225 |
for ds in [d.strip() for d in t.dataset.split(",") if d.strip()]:
|
| 226 |
-
dataset_links.append(f'<a class="dataset" href="https://huggingface.co/datasets/{ds}" target="_blank">{ds}</a>')
|
| 227 |
dataset_html = " ".join(dataset_links) if dataset_links else ""
|
| 228 |
items.append(
|
| 229 |
f"""
|
| 230 |
-
<
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
"""
|
| 232 |
)
|
|
|
|
| 233 |
style = """
|
| 234 |
<style>
|
| 235 |
-
|
| 236 |
-
.
|
| 237 |
-
|
| 238 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 239 |
.dataset { margin-left: 8px; font-size: 12px; color: #0ea5e9; background: #ecfeff; padding: 2px 6px; border-radius: 6px; text-decoration: none; }
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
|
| 243 |
-
|
| 244 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 245 |
</style>
|
| 246 |
"""
|
| 247 |
return style + "<div class=\"cards-grid\">" + "\n".join(items) + "</div>"
|
|
@@ -263,38 +329,54 @@ def on_toggle_tags_visibility(show: bool, selected_tags: List[str], languages: L
|
|
| 263 |
# Only toggle visibility; preserve current tag selections and keep them active in filtering
|
| 264 |
tags_value: List[str] = selected_tags or []
|
| 265 |
tasks = filter_tasks(languages, tags_value, search)
|
|
|
|
| 266 |
return gr.update(visible=show, value=tags_value), render_cards(tasks)
|
| 267 |
|
| 268 |
|
| 269 |
-
with gr.Blocks(title="Lighteval Tasks Explorer") as demo:
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
# Filters on two separate stacked levels
|
| 279 |
-
gr.Markdown("**Languages**")
|
| 280 |
-
show_all_langs = gr.Checkbox(label="Show all languages", value=False)
|
| 281 |
-
lang_dd = gr.CheckboxGroup(choices=TOP_LANGS)
|
| 282 |
-
|
| 283 |
-
gr.Markdown("**Tags**")
|
| 284 |
-
show_tags_filters = gr.Checkbox(label="Show tag checkboxes", value=False)
|
| 285 |
-
tag_dd = gr.CheckboxGroup(choices=ALL_TAGS, visible=False)
|
| 286 |
-
cards = gr.HTML()
|
| 287 |
|
| 288 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
show_all_langs.change(on_toggle_language_choices, inputs=[show_all_langs, lang_dd, tag_dd, search_tb], outputs=[lang_dd, cards])
|
| 290 |
-
|
| 291 |
-
# Toggle to show/hide tag filters (clears selections when hiding)
|
| 292 |
show_tags_filters.change(on_toggle_tags_visibility, inputs=[show_tags_filters, tag_dd, lang_dd, search_tb], outputs=[tag_dd, cards])
|
| 293 |
|
| 294 |
-
# Live filtering
|
|
|
|
|
|
|
| 295 |
lang_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
|
| 296 |
tag_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
|
| 297 |
-
search_tb.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
|
| 298 |
|
| 299 |
# Initial load: display all tasks
|
| 300 |
cards.value = render_cards(ALL_TASKS)
|
|
|
|
| 30 |
os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "tasks"),
|
| 31 |
os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "multilingual", "tasks"),
|
| 32 |
]
|
| 33 |
+
# place cache inside repo root to avoid dirname('') issue
|
| 34 |
+
CACHE_PATH = os.path.join(REPO_ROOT, "tasks_index.json")
|
| 35 |
|
| 36 |
|
| 37 |
@dataclass
|
|
|
|
| 157 |
"languages": list(langs),
|
| 158 |
"tags": list(tags),
|
| 159 |
}
|
| 160 |
+
os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
|
| 161 |
with open(path, "w", encoding="utf-8") as f:
|
| 162 |
json.dump(data, f, ensure_ascii=False, indent=2)
|
| 163 |
|
|
|
|
| 175 |
|
| 176 |
def build_and_cache_index() -> Tuple[List[TaskDoc], List[str], List[str]]:
|
| 177 |
tasks, langs, tags = index_tasks()
|
|
|
|
| 178 |
save_index(CACHE_PATH, tasks, langs, tags)
|
| 179 |
return tasks, langs, tags
|
| 180 |
|
|
|
|
| 187 |
print("Loading index from cache...")
|
| 188 |
ALL_TASKS, ALL_LANGS, ALL_TAGS = _loaded
|
| 189 |
print(f"Loaded {len(ALL_TASKS)} tasks from cache")
|
| 190 |
+
TOP_LANGS = ALL_LANGS[:8] # show more by default
|
| 191 |
|
| 192 |
|
| 193 |
def filter_tasks(languages: List[str], tags: List[str], search: str) -> List[TaskDoc]:
|
|
|
|
| 201 |
if selected_tags and not any(t in td.tags for t in selected_tags):
|
| 202 |
continue
|
| 203 |
if search_lc:
|
| 204 |
+
# Search module path, abstract, tags, and dataset names
|
| 205 |
+
hay = " ".join([td.module, td.abstract, ", ".join(td.tags), (td.dataset or "")]).lower()
|
| 206 |
if search_lc not in hay:
|
| 207 |
continue
|
| 208 |
out.append(td)
|
|
|
|
| 213 |
# Responsive grid of pretty cards; show all details without clicks
|
| 214 |
items: List[str] = []
|
| 215 |
for t in tasks:
|
| 216 |
+
parts = t.module.replace("\\", "/").split("/")
|
| 217 |
base_no_ext = parts[-1].rsplit(".", 1)[0]
|
| 218 |
fallback_name = parts[-2] if base_no_ext == "main" and len(parts) >= 2 else base_no_ext
|
| 219 |
+
task_name = (t.name or fallback_name).replace("_", " ").title()
|
| 220 |
+
mod_path = t.module.replace("\\", "/")
|
| 221 |
+
source_html = f'<a href="https://github.com/huggingface/lighteval/blob/main/{mod_path}" target="_blank" rel="noopener">source</a>'
|
| 222 |
+
paper_html = f'<a href="{t.paper}" target="_blank" rel="noopener">paper</a>' if t.paper else ""
|
| 223 |
+
tags_html = " ".join([f'<span class="chip" title="tag: {tag}">{tag}</span>' for tag in t.tags]) if t.tags else ""
|
| 224 |
+
langs_html = " ".join([f'<span class="chip chip-lang" title="language: {lang}">{lang}</span>' for lang in t.languages]) if t.languages else ""
|
| 225 |
+
abstract_html = (t.abstract or "-").replace("\n", "<br/>")
|
| 226 |
+
sep_html = ' <span class="sep">|</span> ' if paper_html else ""
|
| 227 |
+
links_html = f"{source_html}{sep_html}{paper_html}"
|
| 228 |
dataset_links = []
|
| 229 |
if t.dataset:
|
| 230 |
for ds in [d.strip() for d in t.dataset.split(",") if d.strip()]:
|
| 231 |
+
dataset_links.append(f'<a class="dataset" href="https://huggingface.co/datasets/{ds}" target="_blank" rel="noopener">{ds}</a>')
|
| 232 |
dataset_html = " ".join(dataset_links) if dataset_links else ""
|
| 233 |
items.append(
|
| 234 |
f"""
|
| 235 |
+
<article class="card" tabindex="0" aria-label="Task {task_name}">
|
| 236 |
+
<div class="title"><span class="title-text">{task_name}</span> <span class="dataset-inline">{dataset_html}</span></div>
|
| 237 |
+
<div class="chips">{tags_html} {langs_html}</div>
|
| 238 |
+
<div class="abstract">{abstract_html}</div>
|
| 239 |
+
<div class="links">{links_html}</div>
|
| 240 |
+
</article>
|
| 241 |
"""
|
| 242 |
)
|
| 243 |
+
# CSS includes light and dark mode support
|
| 244 |
style = """
|
| 245 |
<style>
|
| 246 |
+
/* layout */
|
| 247 |
+
.cards-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 16px; margin-top: 10px; }
|
| 248 |
+
|
| 249 |
+
/* card base */
|
| 250 |
+
.card { border-radius: 12px; padding: 14px; transition: box-shadow 160ms ease, transform 120ms ease, border-color 120ms ease; outline: none; }
|
| 251 |
+
.card:hover, .card:focus { transform: translateY(-4px); box-shadow: 0 10px 30px rgba(2,6,23,0.08); }
|
| 252 |
+
|
| 253 |
+
.title { display:flex; align-items:center; gap:8px; flex-wrap:wrap; }
|
| 254 |
+
.title-text { font-weight: 600; font-size: 16px; font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; }
|
| 255 |
+
.dataset-inline { font-size: 12px; }
|
| 256 |
+
|
| 257 |
+
.chips { margin: 8px 0 6px 0; display:flex; gap:8px; flex-wrap:wrap; }
|
| 258 |
+
.chip { display:inline-block; padding:4px 8px; border-radius:999px; font-size:12px; background: rgba(99,102,241,0.08); color: #3730a3; }
|
| 259 |
+
.chip-lang { background: rgba(14,165,233,0.08); color: #0369a1; }
|
| 260 |
+
|
| 261 |
+
.abstract { color: #475569; font-size: 13.5px; line-height: 1.35; margin-top: 6px; min-height: 48px; }
|
| 262 |
+
.links { margin-top: 10px; font-size:12px; }
|
| 263 |
+
.links a { text-decoration: none; font-weight: 600; }
|
| 264 |
+
|
| 265 |
+
.sep { color: #94a3b8; margin: 0 8px; }
|
| 266 |
+
|
| 267 |
.dataset { margin-left: 8px; font-size: 12px; color: #0ea5e9; background: #ecfeff; padding: 2px 6px; border-radius: 6px; text-decoration: none; }
|
| 268 |
+
|
| 269 |
+
/* Light mode */
|
| 270 |
+
:root {
|
| 271 |
+
--bg: #f8fafc;
|
| 272 |
+
--card-bg: #ffffff;
|
| 273 |
+
--card-border: #e6f2ff;
|
| 274 |
+
--title-color: #1e3a8a;
|
| 275 |
+
--text-color: #0f172a;
|
| 276 |
+
--muted: #475569;
|
| 277 |
+
--link: #2563eb;
|
| 278 |
+
}
|
| 279 |
+
|
| 280 |
+
/* Dark mode overrides */
|
| 281 |
+
@media (prefers-color-scheme: dark) {
|
| 282 |
+
:root {
|
| 283 |
+
--bg: #0b1220;
|
| 284 |
+
--card-bg: #071022;
|
| 285 |
+
--card-border: #0f2a44;
|
| 286 |
+
--title-color: #93c5fd;
|
| 287 |
+
--text-color: #e6eef8;
|
| 288 |
+
--muted: #cbd5e1;
|
| 289 |
+
--link: #6ea8ff;
|
| 290 |
+
}
|
| 291 |
+
}
|
| 292 |
+
|
| 293 |
+
/* apply */
|
| 294 |
+
body { background: var(--bg); color: var(--text-color); }
|
| 295 |
+
.card { background: var(--card-bg); border: 1px solid var(--card-border); color: var(--text-color); }
|
| 296 |
+
.title-text { color: var(--title-color); }
|
| 297 |
+
.abstract { color: var(--muted); }
|
| 298 |
+
.links a { color: var(--link); }
|
| 299 |
+
.chip { background: rgba(255,255,255,0.03); }
|
| 300 |
+
/* tweak chips for dark mode for better contrast */
|
| 301 |
+
@media (prefers-color-scheme: dark) {
|
| 302 |
+
.chip { background: rgba(255,255,255,0.04); color: var(--text-color); border: 1px solid rgba(255,255,255,0.02); }
|
| 303 |
+
.chip-lang { background: rgba(255,255,255,0.02); }
|
| 304 |
+
}
|
| 305 |
+
|
| 306 |
+
/* small screens adjustments */
|
| 307 |
+
@media (max-width: 520px) {
|
| 308 |
+
.cards-grid { gap: 10px; }
|
| 309 |
+
.title-text { font-size: 15px; }
|
| 310 |
+
}
|
| 311 |
</style>
|
| 312 |
"""
|
| 313 |
return style + "<div class=\"cards-grid\">" + "\n".join(items) + "</div>"
|
|
|
|
| 329 |
# Only toggle visibility; preserve current tag selections and keep them active in filtering
|
| 330 |
tags_value: List[str] = selected_tags or []
|
| 331 |
tasks = filter_tasks(languages, tags_value, search)
|
| 332 |
+
# keep selections when showing; when hiding we keep value but component hidden (so filter still uses them)
|
| 333 |
return gr.update(visible=show, value=tags_value), render_cards(tasks)
|
| 334 |
|
| 335 |
|
| 336 |
+
with gr.Blocks(title="Lighteval Tasks Explorer", css=None) as demo:
|
| 337 |
+
# Header / hero
|
| 338 |
+
with gr.Row():
|
| 339 |
+
gr.Markdown(
|
| 340 |
+
"""
|
| 341 |
+
<h2 style="margin:6px 0 2px 0;">Lighteval Tasks Explorer</h2>
|
| 342 |
+
<p style="margin:0 0 12px 0; color:var(--muted);">Browse tasks by language, tags and search the task descriptions.</p>
|
| 343 |
+
"""
|
| 344 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
+
# Controls and results in two columns (left: controls, right: cards)
|
| 347 |
+
with gr.Row(equal_height=False):
|
| 348 |
+
with gr.Column(scale=2):
|
| 349 |
+
# Search with interactive debounce
|
| 350 |
+
search_tb = gr.Textbox(label="Search", placeholder="Search in module path, tags, abstract…", value="", interactive=True)
|
| 351 |
+
# We want debounce behavior: use .change with every character by setting interactive=True and triggering on input
|
| 352 |
+
# Filters
|
| 353 |
+
with gr.Group():
|
| 354 |
+
gr.Markdown("**Languages**")
|
| 355 |
+
show_all_langs = gr.Checkbox(label="Show all languages", value=False)
|
| 356 |
+
lang_dd = gr.CheckboxGroup(choices=TOP_LANGS, value=[]) # default none selected
|
| 357 |
+
with gr.Group():
|
| 358 |
+
gr.Markdown("**Benchmark type**")
|
| 359 |
+
show_tags_filters = gr.Checkbox(label="Show tag checkboxes", value=False)
|
| 360 |
+
tag_dd = gr.CheckboxGroup(choices=ALL_TAGS, value=[], visible=False)
|
| 361 |
+
# small hint
|
| 362 |
+
gr.Markdown("Tip: use the filters and search together. Results update live.")
|
| 363 |
+
|
| 364 |
+
with gr.Column(scale=5):
|
| 365 |
+
cards = gr.HTML()
|
| 366 |
+
# put an initially visible loading placeholder
|
| 367 |
+
cards.value = "<div style='padding:18px'>Loading tasks…</div>"
|
| 368 |
+
|
| 369 |
+
# Wire interactions
|
| 370 |
+
# Toggle expand/collapse language choices
|
| 371 |
show_all_langs.change(on_toggle_language_choices, inputs=[show_all_langs, lang_dd, tag_dd, search_tb], outputs=[lang_dd, cards])
|
| 372 |
+
# Toggle tag filter visibility (keeps values)
|
|
|
|
| 373 |
show_tags_filters.change(on_toggle_tags_visibility, inputs=[show_tags_filters, tag_dd, lang_dd, search_tb], outputs=[tag_dd, cards])
|
| 374 |
|
| 375 |
+
# Live filtering: wire change events on controls to update cards.
|
| 376 |
+
# Textbox: trigger on every change (interactive True). If Gradio runtime has debounce param, it's used internally.
|
| 377 |
+
search_tb.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
|
| 378 |
lang_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
|
| 379 |
tag_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
|
|
|
|
| 380 |
|
| 381 |
# Initial load: display all tasks
|
| 382 |
cards.value = render_cards(ALL_TASKS)
|