Linker1907 commited on
Commit
8628943
·
1 Parent(s): aef0678
Files changed (2) hide show
  1. app.py +302 -0
  2. tasks_index.json +0 -0
app.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Gradio dashboard to explore Lighteval tasks.
3
+
4
+ Scans `src/lighteval/tasks/tasks` and `src/lighteval/tasks/multilingual/tasks`
5
+ for module-level docstrings with this format:
6
+
7
+ name: <task display name>
8
+ dataset: <dataset id(s)>
9
+ abstract: <free text>
10
+ languages: <comma/newline separated language codes or names>
11
+ tags: <comma/newline separated tags>
12
+ paper: <url>
13
+
14
+ This file stays outside the lighteval src tree, per request.
15
+ """
16
+
17
+ import ast
18
+ import json
19
+ import os
20
+ import re
21
+ from collections import Counter
22
+ from dataclasses import asdict, dataclass
23
+ from typing import Dict, List, Optional, Tuple
24
+
25
+ import gradio as gr
26
+
27
+
28
+ REPO_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
29
+ TASK_DIRS = [
30
+ os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "tasks"),
31
+ os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "multilingual", "tasks"),
32
+ ]
33
+ CACHE_PATH = os.path.join(REPO_ROOT, "benchmark_finder", "tasks_index.json")
34
+
35
+
36
+ @dataclass
37
+ class TaskDoc:
38
+ file_path: str
39
+ module: str
40
+ abstract: str
41
+ languages: List[str]
42
+ tags: List[str]
43
+ paper: Optional[str]
44
+ dataset: Optional[str]
45
+ name: Optional[str] = None
46
+
47
+
48
+ def read_file_text(path: str) -> Optional[str]:
49
+ try:
50
+ with open(path, "r", encoding="utf-8") as f:
51
+ return f.read()
52
+ except Exception:
53
+ return None
54
+
55
+
56
+ def parse_module_docstring(text: str) -> Optional[str]:
57
+ try:
58
+ mod = ast.parse(text)
59
+ return ast.get_docstring(mod)
60
+ except Exception:
61
+ # Fallback: naive regex for triple-quoted string at top
62
+ m = re.match(r"^\s*([\'\"])\1\1([\s\S]*?)\1\1\1", text)
63
+ return m.group(2).strip() if m else None
64
+
65
+
66
+ def parse_sections(doc: str) -> Dict[str, str]:
67
+ # Very simple section parser keyed by lines ending with ':' on their own
68
+ # Expected keys: name, dataset, abstract, languages, tags, paper
69
+ out: Dict[str, str] = {"name": "", "dataset": "", "abstract": "", "languages": "", "tags": "", "paper": ""}
70
+ current_key: Optional[str] = None
71
+ for raw_line in doc.splitlines():
72
+ line = raw_line.rstrip()
73
+ if line.endswith(":") and line.strip().lower() in {"name:", "dataset:", "abstract:", "languages:", "tags:", "paper:"}:
74
+ current_key = line[:-1].strip().lower()
75
+ continue
76
+ if current_key is not None:
77
+ # Preserve paragraphs; we will normalize later
78
+ out[current_key] = (out[current_key] + ("\n" if out[current_key] else "") + line).strip()
79
+ return out
80
+
81
+
82
+ def split_list_field(value: str) -> List[str]:
83
+ if not value:
84
+ return []
85
+ # Support comma and newline separated values
86
+ parts = re.split(r"[\n,]", value)
87
+ cleaned: List[str] = []
88
+ for p in parts:
89
+ token = p.strip()
90
+ if not token:
91
+ continue
92
+ cleaned.append(token)
93
+ return cleaned
94
+
95
+
96
+ def discover_task_files() -> List[str]:
97
+ files: List[str] = []
98
+ for base in TASK_DIRS:
99
+ if not os.path.isdir(base):
100
+ continue
101
+ # Top-level python files in the directory
102
+ for name in os.listdir(base):
103
+ if name.endswith(".py"):
104
+ files.append(os.path.join(base, name))
105
+ # Also include subdirectory main.py files
106
+ for dirpath, dirnames, filenames in os.walk(base):
107
+ if dirpath == base:
108
+ continue
109
+ if "main.py" in filenames:
110
+ files.append(os.path.join(dirpath, "main.py"))
111
+ # Deduplicate while preserving order
112
+ seen: set = set()
113
+ unique_files: List[str] = []
114
+ for p in files:
115
+ if p in seen:
116
+ continue
117
+ seen.add(p)
118
+ unique_files.append(p)
119
+ return sorted(unique_files)
120
+
121
+
122
+ def index_tasks() -> Tuple[List[TaskDoc], List[str], List[str]]:
123
+ docs: List[TaskDoc] = []
124
+ language_counts: Counter = Counter()
125
+ tag_set: set = set()
126
+ for path in discover_task_files():
127
+ text = read_file_text(path)
128
+ if not text:
129
+ continue
130
+ doc = parse_module_docstring(text)
131
+ if not doc:
132
+ continue
133
+ sections = parse_sections(doc)
134
+ abstract = sections.get("abstract", "").strip()
135
+ langs = [lang.lower() for lang in split_list_field(sections.get("languages", ""))]
136
+ tgs = [t.lower() for t in split_list_field(sections.get("tags", ""))]
137
+ paper = sections.get("paper", "").strip() or None
138
+ dataset = sections.get("dataset", "").strip() or None
139
+ name = sections.get("name", "").strip() or None
140
+ for lang in langs:
141
+ language_counts[lang] += 1
142
+ for t in tgs:
143
+ tag_set.add(t)
144
+ module = os.path.relpath(path, REPO_ROOT)
145
+ docs.append(TaskDoc(file_path=path, module=module, abstract=abstract, languages=langs, tags=tgs, paper=paper, dataset=dataset, name=name))
146
+ languages_sorted = [
147
+ lang for lang, _ in sorted(language_counts.items(), key=lambda kv: (-kv[1], kv[0]))
148
+ ]
149
+ tags_sorted = sorted(tag_set)
150
+ return docs, languages_sorted, tags_sorted
151
+
152
+
153
+ def save_index(path: str, tasks: List[TaskDoc], langs: List[str], tags: List[str]) -> None:
154
+ data = {
155
+ "tasks": [asdict(t) for t in tasks],
156
+ "languages": list(langs),
157
+ "tags": list(tags),
158
+ }
159
+ with open(path, "w", encoding="utf-8") as f:
160
+ json.dump(data, f, ensure_ascii=False, indent=2)
161
+
162
+
163
+ def load_index(path: str) -> Optional[Tuple[List[TaskDoc], List[str], List[str]]]:
164
+ if not os.path.exists(path):
165
+ return None
166
+ with open(path, "r", encoding="utf-8") as f:
167
+ data = json.load(f)
168
+ tasks = [TaskDoc(**t) for t in data.get("tasks", [])]
169
+ langs = list(data.get("languages", []))
170
+ tags = list(data.get("tags", []))
171
+ return tasks, langs, tags
172
+
173
+
174
+ def build_and_cache_index() -> Tuple[List[TaskDoc], List[str], List[str]]:
175
+ tasks, langs, tags = index_tasks()
176
+ os.makedirs(os.path.dirname(CACHE_PATH), exist_ok=True)
177
+ save_index(CACHE_PATH, tasks, langs, tags)
178
+ return tasks, langs, tags
179
+
180
+
181
+ #_loaded = load_index(CACHE_PATH)
182
+ #if _loaded is None:
183
+ ALL_TASKS, ALL_LANGS, ALL_TAGS = build_and_cache_index()
184
+ # else:
185
+ # ALL_TASKS, ALL_LANGS, ALL_TAGS = _loaded
186
+ TOP_LANGS = ALL_LANGS[:5]
187
+
188
+
189
+ def filter_tasks(languages: List[str], tags: List[str], search: str) -> List[TaskDoc]:
190
+ selected_langs = [lang.lower() for lang in (languages or [])]
191
+ selected_tags = [t.lower() for t in (tags or [])]
192
+ search_lc = (search or "").strip().lower()
193
+ out: List[TaskDoc] = []
194
+ for td in ALL_TASKS:
195
+ if selected_langs and not any(lang in td.languages for lang in selected_langs):
196
+ continue
197
+ if selected_tags and not any(t in td.tags for t in selected_tags):
198
+ continue
199
+ if search_lc:
200
+ # Search module path or abstract or tags
201
+ hay = " ".join([td.module, td.abstract, ", ".join(td.tags)]).lower()
202
+ if search_lc not in hay:
203
+ continue
204
+ out.append(td)
205
+ return out
206
+
207
+
208
+ def render_cards(tasks: List[TaskDoc]) -> str:
209
+ # Responsive grid of pretty cards; show all details without clicks
210
+ items: List[str] = []
211
+ for t in tasks:
212
+ parts = t.module.split("/")
213
+ base_no_ext = parts[-1].rsplit(".", 1)[0]
214
+ fallback_name = parts[-2] if base_no_ext == "main" and len(parts) >= 2 else base_no_ext
215
+ task_name = t.name or fallback_name
216
+ paper_html = f'<a href="{t.paper}" target="_blank">paper</a>' if t.paper else ""
217
+ tags_html = " ".join([f'<span class="chip">{tag}</span>' for tag in t.tags]) if t.tags else ""
218
+ langs_html = " ".join([f'<span class="chip chip-lang">{lang}</span>' for lang in t.languages]) if t.languages else ""
219
+ abstract_html = (t.abstract or "-")
220
+ dataset_links = []
221
+ if t.dataset:
222
+ for ds in [d.strip() for d in t.dataset.split(",") if d.strip()]:
223
+ dataset_links.append(f'<a class="dataset" href="https://huggingface.co/datasets/{ds}" target="_blank">{ds}</a>')
224
+ dataset_html = " ".join(dataset_links) if dataset_links else ""
225
+ items.append(
226
+ f"""
227
+ <div class=\"card\">\n <div class=\"title\"><span class=\"title-text\">{task_name}</span> {dataset_html}</div>\n <div class=\"chips\">{tags_html} {langs_html}</div>\n <div class=\"abstract\">{abstract_html}</div>\n <div class=\"links\">{paper_html}</div>\n</div>
228
+ """
229
+ )
230
+ style = """
231
+ <style>
232
+ .cards-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 14px; }
233
+ .card { border: 1px solid #dbeafe; border-radius: 12px; padding: 12px 14px; background: #ffffff; transition: box-shadow 120ms ease, border-color 120ms ease; }
234
+ .card:hover { box-shadow: 0 6px 20px rgba(2, 132, 199, 0.08); border-color: #60a5fa; }
235
+ .title-text { font-weight: 600; font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell; color: #1d4ed8; }
236
+ .dataset { margin-left: 8px; font-size: 12px; color: #0ea5e9; background: #ecfeff; padding: 2px 6px; border-radius: 6px; text-decoration: none; }
237
+ .chips { margin: 8px 0 0 0; }
238
+ .chip { display: inline-block; margin-right: 6px; margin-bottom: 4px; padding: 2px 8px; border-radius: 9999px; background: #eff6ff; font-size: 12px; color: #1d4ed8; }
239
+ .chip-lang { background: #eef2ff; color: #3730a3; }
240
+ .abstract { color: #334155; font-size: 14px; line-height: 1.35; margin-top: 6px; }
241
+ .links a { font-size: 12px; color: #2563eb; text-decoration: none; }
242
+ </style>
243
+ """
244
+ return style + "<div class=\"cards-grid\">" + "\n".join(items) + "</div>"
245
+
246
+
247
+ def on_filter(languages: List[str], tags: List[str], search: str):
248
+ tasks = filter_tasks(languages, tags, search)
249
+ return render_cards(tasks)
250
+
251
+
252
+ def on_toggle_language_choices(show_all: bool, selected_langs: List[str], tags: List[str], search: str):
253
+ choices = ALL_LANGS if show_all else TOP_LANGS
254
+ kept = [lang for lang in (selected_langs or []) if lang in choices]
255
+ tasks = filter_tasks(kept, tags, search)
256
+ return gr.update(choices=choices, value=kept), render_cards(tasks)
257
+
258
+
259
+ def on_toggle_tags_visibility(show: bool, selected_tags: List[str], languages: List[str], search: str):
260
+ # Only toggle visibility; preserve current tag selections and keep them active in filtering
261
+ tags_value: List[str] = selected_tags or []
262
+ tasks = filter_tasks(languages, tags_value, search)
263
+ return gr.update(visible=show, value=tags_value), render_cards(tasks)
264
+
265
+
266
+ with gr.Blocks(title="Lighteval Tasks Explorer") as demo:
267
+ gr.Markdown("""
268
+ ### lighteval tasks explorer
269
+ Explore tasks by language and tag. Use the search to quickly narrow down.
270
+ """)
271
+
272
+ # Search on top
273
+ search_tb = gr.Textbox(label="Search", placeholder="Search in module path, tags, abstract…")
274
+
275
+ # Filters on two separate stacked levels
276
+ gr.Markdown("**Languages**")
277
+ show_all_langs = gr.Checkbox(label="Show all languages", value=False)
278
+ lang_dd = gr.CheckboxGroup(choices=TOP_LANGS)
279
+
280
+ gr.Markdown("**Tags**")
281
+ show_tags_filters = gr.Checkbox(label="Show tag checkboxes", value=False)
282
+ tag_dd = gr.CheckboxGroup(choices=ALL_TAGS, visible=False)
283
+ cards = gr.HTML()
284
+
285
+ # Toggle to expand/collapse language choices
286
+ show_all_langs.change(on_toggle_language_choices, inputs=[show_all_langs, lang_dd, tag_dd, search_tb], outputs=[lang_dd, cards])
287
+
288
+ # Toggle to show/hide tag filters (clears selections when hiding)
289
+ show_tags_filters.change(on_toggle_tags_visibility, inputs=[show_tags_filters, tag_dd, lang_dd, search_tb], outputs=[tag_dd, cards])
290
+
291
+ # Live filtering as user interacts
292
+ lang_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
293
+ tag_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
294
+ search_tb.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
295
+
296
+ # Initial load: display all tasks
297
+ cards.value = render_cards(ALL_TASKS)
298
+
299
+
300
+ if __name__ == "__main__":
301
+ # Run with `python benchmark_finder/app.py`
302
+ demo.launch()
tasks_index.json ADDED
The diff for this file is too large to render. See raw diff