Spaces:

OpenEvals
/

open_benchmark_index

Running

App Files Files Community

Linker1907 commited on Oct 16

Commit

290947d

1 Parent(s): b63b8ed

init

Browse files

Files changed (1) hide show

app.py +127 -45

app.py CHANGED Viewed

@@ -30,7 +30,8 @@ TASK_DIRS = [
     os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "tasks"),
     os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "multilingual", "tasks"),
 ]
-CACHE_PATH = "tasks_index.json"
 @dataclass
@@ -156,6 +157,7 @@ def save_index(path: str, tasks: List[TaskDoc], langs: List[str], tags: List[str
         "languages": list(langs),
         "tags": list(tags),
     }
     with open(path, "w", encoding="utf-8") as f:
         json.dump(data, f, ensure_ascii=False, indent=2)
@@ -173,7 +175,6 @@ def load_index(path: str) -> Optional[Tuple[List[TaskDoc], List[str], List[str]]
 def build_and_cache_index() -> Tuple[List[TaskDoc], List[str], List[str]]:
     tasks, langs, tags = index_tasks()
-    os.makedirs(os.path.dirname(CACHE_PATH), exist_ok=True)
     save_index(CACHE_PATH, tasks, langs, tags)
     return tasks, langs, tags
@@ -186,7 +187,7 @@ else:
     print("Loading index from cache...")
     ALL_TASKS, ALL_LANGS, ALL_TAGS = _loaded
 print(f"Loaded {len(ALL_TASKS)} tasks from cache")
-TOP_LANGS = ALL_LANGS[:5]
 def filter_tasks(languages: List[str], tags: List[str], search: str) -> List[TaskDoc]:
@@ -200,8 +201,8 @@ def filter_tasks(languages: List[str], tags: List[str], search: str) -> List[Tas
         if selected_tags and not any(t in td.tags for t in selected_tags):
             continue
         if search_lc:
-            # Search module path or abstract or tags
-            hay = " ".join([td.module, td.abstract, ", ".join(td.tags)]).lower()
             if search_lc not in hay:
                 continue
         out.append(td)
@@ -212,36 +213,101 @@ def render_cards(tasks: List[TaskDoc]) -> str:
     # Responsive grid of pretty cards; show all details without clicks
     items: List[str] = []
     for t in tasks:
-        parts = t.module.split("/")
         base_no_ext = parts[-1].rsplit(".", 1)[0]
         fallback_name = parts[-2] if base_no_ext == "main" and len(parts) >= 2 else base_no_ext
-        task_name = t.name or fallback_name
-        paper_html = f'<a href="{t.paper}" target="_blank">paper</a>' if t.paper else ""
-        tags_html = " ".join([f'<span class="chip">{tag}</span>' for tag in t.tags]) if t.tags else ""
-        langs_html = " ".join([f'<span class="chip chip-lang">{lang}</span>' for lang in t.languages]) if t.languages else ""
-        abstract_html = (t.abstract or "-")
         dataset_links = []
         if t.dataset:
             for ds in [d.strip() for d in t.dataset.split(",") if d.strip()]:
-                dataset_links.append(f'<a class="dataset" href="https://huggingface.co/datasets/{ds}" target="_blank">{ds}</a>')
         dataset_html = " ".join(dataset_links) if dataset_links else ""
         items.append(
             f"""
-            <div class=\"card\">\n  <div class=\"title\"><span class=\"title-text\">{task_name}</span> {dataset_html}</div>\n  <div class=\"chips\">{tags_html} {langs_html}</div>\n  <div class=\"abstract\">{abstract_html}</div>\n  <div class=\"links\">{paper_html}</div>\n</div>
             """
         )
     style = """
     <style>
-      .cards-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 14px; }
-      .card { border: 1px solid #dbeafe; border-radius: 12px; padding: 12px 14px; background: #ffffff; transition: box-shadow 120ms ease, border-color 120ms ease; }
-      .card:hover { box-shadow: 0 6px 20px rgba(2, 132, 199, 0.08); border-color: #60a5fa; }
-      .title-text { font-weight: 600; font-family: ui-sans-serif, system-ui, -apple-system, Segoe UI, Roboto, Ubuntu, Cantarell; color: #1d4ed8; }
       .dataset { margin-left: 8px; font-size: 12px; color: #0ea5e9; background: #ecfeff; padding: 2px 6px; border-radius: 6px; text-decoration: none; }
-      .chips { margin: 8px 0 0 0; }
-      .chip { display: inline-block; margin-right: 6px; margin-bottom: 4px; padding: 2px 8px; border-radius: 9999px; background: #eff6ff; font-size: 12px; color: #1d4ed8; }
-      .chip-lang { background: #eef2ff; color: #3730a3; }
-      .abstract { color: #334155; font-size: 14px; line-height: 1.35; margin-top: 6px; }
-      .links a { font-size: 12px; color: #2563eb; text-decoration: none; }
     </style>
     """
     return style + "<div class=\"cards-grid\">" + "\n".join(items) + "</div>"
@@ -263,38 +329,54 @@ def on_toggle_tags_visibility(show: bool, selected_tags: List[str], languages: L
     # Only toggle visibility; preserve current tag selections and keep them active in filtering
     tags_value: List[str] = selected_tags or []
     tasks = filter_tasks(languages, tags_value, search)
     return gr.update(visible=show, value=tags_value), render_cards(tasks)
-with gr.Blocks(title="Lighteval Tasks Explorer") as demo:
-    gr.Markdown("""
-    ### lighteval tasks explorer
-    Explore tasks by language and tag. Use the search to quickly narrow down.
-    """)
-    # Search on top
-    search_tb = gr.Textbox(label="Search", placeholder="Search in module path, tags, abstract…")
-    # Filters on two separate stacked levels
-    gr.Markdown("**Languages**")
-    show_all_langs = gr.Checkbox(label="Show all languages", value=False)
-    lang_dd = gr.CheckboxGroup(choices=TOP_LANGS)
-    gr.Markdown("**Tags**")
-    show_tags_filters = gr.Checkbox(label="Show tag checkboxes", value=False)
-    tag_dd = gr.CheckboxGroup(choices=ALL_TAGS, visible=False)
-    cards = gr.HTML()
-    # Toggle to expand/collapse language choices
     show_all_langs.change(on_toggle_language_choices, inputs=[show_all_langs, lang_dd, tag_dd, search_tb], outputs=[lang_dd, cards])
-    # Toggle to show/hide tag filters (clears selections when hiding)
     show_tags_filters.change(on_toggle_tags_visibility, inputs=[show_tags_filters, tag_dd, lang_dd, search_tb], outputs=[tag_dd, cards])
-    # Live filtering as user interacts
     lang_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
     tag_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
-    search_tb.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
     # Initial load: display all tasks
     cards.value = render_cards(ALL_TASKS)

     os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "tasks"),
     os.path.join(REPO_ROOT, "src", "lighteval", "tasks", "multilingual", "tasks"),
 ]
+# place cache inside repo root to avoid dirname('') issue
+CACHE_PATH = os.path.join(REPO_ROOT, "tasks_index.json")
 @dataclass
         "languages": list(langs),
         "tags": list(tags),
     }
+    os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
     with open(path, "w", encoding="utf-8") as f:
         json.dump(data, f, ensure_ascii=False, indent=2)
 def build_and_cache_index() -> Tuple[List[TaskDoc], List[str], List[str]]:
     tasks, langs, tags = index_tasks()
     save_index(CACHE_PATH, tasks, langs, tags)
     return tasks, langs, tags
     print("Loading index from cache...")
     ALL_TASKS, ALL_LANGS, ALL_TAGS = _loaded
 print(f"Loaded {len(ALL_TASKS)} tasks from cache")
+TOP_LANGS = ALL_LANGS[:8]  # show more by default
 def filter_tasks(languages: List[str], tags: List[str], search: str) -> List[TaskDoc]:
         if selected_tags and not any(t in td.tags for t in selected_tags):
             continue
         if search_lc:
+            # Search module path, abstract, tags, and dataset names
+            hay = " ".join([td.module, td.abstract, ", ".join(td.tags), (td.dataset or "")]).lower()
             if search_lc not in hay:
                 continue
         out.append(td)
     # Responsive grid of pretty cards; show all details without clicks
     items: List[str] = []
     for t in tasks:
+        parts = t.module.replace("\\", "/").split("/")
         base_no_ext = parts[-1].rsplit(".", 1)[0]
         fallback_name = parts[-2] if base_no_ext == "main" and len(parts) >= 2 else base_no_ext
+        task_name = (t.name or fallback_name).replace("_", " ").title()
+        mod_path = t.module.replace("\\", "/")
+        source_html = f'<a href="https://github.com/huggingface/lighteval/blob/main/{mod_path}" target="_blank" rel="noopener">source</a>'
+        paper_html = f'<a href="{t.paper}" target="_blank" rel="noopener">paper</a>' if t.paper else ""
+        tags_html = " ".join([f'<span class="chip" title="tag: {tag}">{tag}</span>' for tag in t.tags]) if t.tags else ""
+        langs_html = " ".join([f'<span class="chip chip-lang" title="language: {lang}">{lang}</span>' for lang in t.languages]) if t.languages else ""
+        abstract_html = (t.abstract or "-").replace("\n", "<br/>")
+        sep_html = ' <span class="sep">|</span> ' if paper_html else ""
+        links_html = f"{source_html}{sep_html}{paper_html}"
         dataset_links = []
         if t.dataset:
             for ds in [d.strip() for d in t.dataset.split(",") if d.strip()]:
+                dataset_links.append(f'<a class="dataset" href="https://huggingface.co/datasets/{ds}" target="_blank" rel="noopener">{ds}</a>')
         dataset_html = " ".join(dataset_links) if dataset_links else ""
         items.append(
             f"""
+            <article class="card" tabindex="0" aria-label="Task {task_name}">
+              <div class="title"><span class="title-text">{task_name}</span> <span class="dataset-inline">{dataset_html}</span></div>
+              <div class="chips">{tags_html} {langs_html}</div>
+              <div class="abstract">{abstract_html}</div>
+              <div class="links">{links_html}</div>
+            </article>
             """
         )
+    # CSS includes light and dark mode support
     style = """
     <style>
+      /* layout */
+      .cards-grid { display: grid; grid-template-columns: repeat(auto-fill, minmax(280px, 1fr)); gap: 16px; margin-top: 10px; }
+      /* card base */
+      .card { border-radius: 12px; padding: 14px; transition: box-shadow 160ms ease, transform 120ms ease, border-color 120ms ease; outline: none; }
+      .card:hover, .card:focus { transform: translateY(-4px); box-shadow: 0 10px 30px rgba(2,6,23,0.08); }
+      .title { display:flex; align-items:center; gap:8px; flex-wrap:wrap; }
+      .title-text { font-weight: 600; font-size: 16px; font-family: ui-sans-serif, system-ui, -apple-system, "Segoe UI", Roboto, "Helvetica Neue", Arial; }
+      .dataset-inline { font-size: 12px; }
+      .chips { margin: 8px 0 6px 0; display:flex; gap:8px; flex-wrap:wrap; }
+      .chip { display:inline-block; padding:4px 8px; border-radius:999px; font-size:12px; background: rgba(99,102,241,0.08); color: #3730a3; }
+      .chip-lang { background: rgba(14,165,233,0.08); color: #0369a1; }
+      .abstract { color: #475569; font-size: 13.5px; line-height: 1.35; margin-top: 6px; min-height: 48px; }
+      .links { margin-top: 10px; font-size:12px; }
+      .links a { text-decoration: none; font-weight: 600; }
+      .sep { color: #94a3b8; margin: 0 8px; }
       .dataset { margin-left: 8px; font-size: 12px; color: #0ea5e9; background: #ecfeff; padding: 2px 6px; border-radius: 6px; text-decoration: none; }
+      /* Light mode */
+      :root {
+        --bg: #f8fafc;
+        --card-bg: #ffffff;
+        --card-border: #e6f2ff;
+        --title-color: #1e3a8a;
+        --text-color: #0f172a;
+        --muted: #475569;
+        --link: #2563eb;
+      }
+      /* Dark mode overrides */
+      @media (prefers-color-scheme: dark) {
+        :root {
+          --bg: #0b1220;
+          --card-bg: #071022;
+          --card-border: #0f2a44;
+          --title-color: #93c5fd;
+          --text-color: #e6eef8;
+          --muted: #cbd5e1;
+          --link: #6ea8ff;
+        }
+      }
+      /* apply */
+      body { background: var(--bg); color: var(--text-color); }
+      .card { background: var(--card-bg); border: 1px solid var(--card-border); color: var(--text-color); }
+      .title-text { color: var(--title-color); }
+      .abstract { color: var(--muted); }
+      .links a { color: var(--link); }
+      .chip { background: rgba(255,255,255,0.03); }
+      /* tweak chips for dark mode for better contrast */
+      @media (prefers-color-scheme: dark) {
+        .chip { background: rgba(255,255,255,0.04); color: var(--text-color); border: 1px solid rgba(255,255,255,0.02); }
+        .chip-lang { background: rgba(255,255,255,0.02); }
+      }
+      /* small screens adjustments */
+      @media (max-width: 520px) {
+        .cards-grid { gap: 10px; }
+        .title-text { font-size: 15px; }
+      }
     </style>
     """
     return style + "<div class=\"cards-grid\">" + "\n".join(items) + "</div>"
     # Only toggle visibility; preserve current tag selections and keep them active in filtering
     tags_value: List[str] = selected_tags or []
     tasks = filter_tasks(languages, tags_value, search)
+    # keep selections when showing; when hiding we keep value but component hidden (so filter still uses them)
     return gr.update(visible=show, value=tags_value), render_cards(tasks)
+with gr.Blocks(title="Lighteval Tasks Explorer", css=None) as demo:
+    # Header / hero
+    with gr.Row():
+        gr.Markdown(
+            """
+            <h2 style="margin:6px 0 2px 0;">Lighteval Tasks Explorer</h2>
+            <p style="margin:0 0 12px 0; color:var(--muted);">Browse tasks by language, tags and search the task descriptions.</p>
+            """
+        )
+    # Controls and results in two columns (left: controls, right: cards)
+    with gr.Row(equal_height=False):
+        with gr.Column(scale=2):
+            # Search with interactive debounce
+            search_tb = gr.Textbox(label="Search", placeholder="Search in module path, tags, abstract…", value="", interactive=True)
+            # We want debounce behavior: use .change with every character by setting interactive=True and triggering on input
+            # Filters
+            with gr.Group():
+                gr.Markdown("**Languages**")
+                show_all_langs = gr.Checkbox(label="Show all languages", value=False)
+                lang_dd = gr.CheckboxGroup(choices=TOP_LANGS, value=[])  # default none selected
+            with gr.Group():
+                gr.Markdown("**Benchmark type**")
+                show_tags_filters = gr.Checkbox(label="Show tag checkboxes", value=False)
+                tag_dd = gr.CheckboxGroup(choices=ALL_TAGS, value=[], visible=False)
+            # small hint
+            gr.Markdown("Tip: use the filters and search together. Results update live.")
+        with gr.Column(scale=5):
+            cards = gr.HTML()
+            # put an initially visible loading placeholder
+            cards.value = "<div style='padding:18px'>Loading tasks…</div>"
+    # Wire interactions
+    # Toggle expand/collapse language choices
     show_all_langs.change(on_toggle_language_choices, inputs=[show_all_langs, lang_dd, tag_dd, search_tb], outputs=[lang_dd, cards])
+    # Toggle tag filter visibility (keeps values)
     show_tags_filters.change(on_toggle_tags_visibility, inputs=[show_tags_filters, tag_dd, lang_dd, search_tb], outputs=[tag_dd, cards])
+    # Live filtering: wire change events on controls to update cards.
+    # Textbox: trigger on every change (interactive True). If Gradio runtime has debounce param, it's used internally.
+    search_tb.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
     lang_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
     tag_dd.change(on_filter, inputs=[lang_dd, tag_dd, search_tb], outputs=[cards])
     # Initial load: display all tasks
     cards.value = render_cards(ALL_TASKS)