Spaces:
Running
Running
Suqi Sun
commited on
Commit
·
911c9b4
1
Parent(s):
e07beed
Upload more metrics and fix some issues in app.py
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +20 -17
- eval-results/medqa/0/ckpt_003/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_003/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_006/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_006/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_009/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_009/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_012/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_012/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_015/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_015/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_018/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_018/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_021/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_021/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_024/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_024/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_027/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_027/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_030/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_030/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_033/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_033/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_036/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_036/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_039/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_039/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_042/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_042/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_045/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_045/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_048/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_048/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_051/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_051/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_054/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_054/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_057/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_057/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_060/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_060/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_063/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_063/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_066/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_066/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_069/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_069/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_072/medqa_4options.jsonl.tar.gz +0 -0
- eval-results/medqa/0/ckpt_072/results.json.tar.gz +0 -0
- eval-results/medqa/0/ckpt_075/medqa_4options.jsonl.tar.gz +0 -0
app.py
CHANGED
|
@@ -2,7 +2,6 @@ import os
|
|
| 2 |
import streamlit as st
|
| 3 |
import json
|
| 4 |
import tarfile
|
| 5 |
-
from base64 import b64encode
|
| 6 |
|
| 7 |
st.set_page_config(layout="wide")
|
| 8 |
|
|
@@ -13,10 +12,8 @@ st.markdown("""The K2 gallery allows one to browse the output of various evaluat
|
|
| 13 |
|
| 14 |
|
| 15 |
with st.sidebar:
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='data:image/svg+xml;base64,{b64}' width='100' />"
|
| 19 |
-
st.markdown(html, unsafe_allow_html=True)
|
| 20 |
|
| 21 |
metric = st.radio(
|
| 22 |
"Choose a metric", options=os.listdir(os.path.join(EVAL_DIR))
|
|
@@ -28,18 +25,24 @@ with st.sidebar:
|
|
| 28 |
|
| 29 |
col1, col2 = st.columns(2)
|
| 30 |
|
| 31 |
-
|
| 32 |
-
st.header("Checkpoint
|
| 33 |
-
ckpt = st.
|
| 34 |
st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
|
| 35 |
-
file = st.selectbox("Select a file", sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key=
|
| 36 |
-
|
| 37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
with col2:
|
| 40 |
-
|
| 41 |
-
ckpt = st.selectbox('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key="B1")
|
| 42 |
-
st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
|
| 43 |
-
file = st.selectbox("Select a file", sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key="B2")
|
| 44 |
-
with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as f:
|
| 45 |
-
st.json(json.load(f.extractfile(f.next())))
|
|
|
|
| 2 |
import streamlit as st
|
| 3 |
import json
|
| 4 |
import tarfile
|
|
|
|
| 5 |
|
| 6 |
st.set_page_config(layout="wide")
|
| 7 |
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
with st.sidebar:
|
| 15 |
+
html = f"<img src='https://www.llm360.ai/images/logo-highres.png' width='100' /><img src='https://huggingface.co/spaces/LLM360/k2-eval-gallery/raw/main/k2-logo.svg' width='100' />"
|
| 16 |
+
st.markdown(html, unsafe_allow_html=True)
|
|
|
|
|
|
|
| 17 |
|
| 18 |
metric = st.radio(
|
| 19 |
"Choose a metric", options=os.listdir(os.path.join(EVAL_DIR))
|
|
|
|
| 25 |
|
| 26 |
col1, col2 = st.columns(2)
|
| 27 |
|
| 28 |
+
def render_column(col_label):
|
| 29 |
+
st.header(f"Checkpoint {col_label}")
|
| 30 |
+
ckpt = st.select_slider('Select a checkpoint', sorted(os.listdir(os.path.join(EVAL_DIR, metric, n_shot))), key=col_label + '1')
|
| 31 |
st.write(f'Veiwing Evaluation Results for Checkpoint: `{ckpt}`')
|
| 32 |
+
file = st.selectbox("Select a file", sorted(f_name[:-len(".tar.gz")] for f_name in os.listdir(os.path.join(EVAL_DIR, metric, n_shot, ckpt))), key=col_label + '2')
|
| 33 |
+
file += ".tar.gz"
|
| 34 |
+
with tarfile.open(os.path.join(EVAL_DIR, metric, n_shot, ckpt, file), "r:gz") as tar:
|
| 35 |
+
f = tar.extractfile(tar.next())
|
| 36 |
+
eval_json = json.load(f)
|
| 37 |
+
if isinstance(eval_json, list):
|
| 38 |
+
doc_id = st.slider("Select a document id", 0, len(eval_json) - 1, 0, 1, key=col_label + '3')
|
| 39 |
+
st.json(eval_json[doc_id])
|
| 40 |
+
else:
|
| 41 |
+
st.json(eval_json)
|
| 42 |
+
f.close()
|
| 43 |
+
|
| 44 |
+
with col1:
|
| 45 |
+
render_column('A')
|
| 46 |
|
| 47 |
with col2:
|
| 48 |
+
render_column('B')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
eval-results/medqa/0/ckpt_003/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (692 kB). View file
|
|
|
eval-results/medqa/0/ckpt_003/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_006/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (691 kB). View file
|
|
|
eval-results/medqa/0/ckpt_006/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_009/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (690 kB). View file
|
|
|
eval-results/medqa/0/ckpt_009/results.json.tar.gz
ADDED
|
Binary file (2.84 kB). View file
|
|
|
eval-results/medqa/0/ckpt_012/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (692 kB). View file
|
|
|
eval-results/medqa/0/ckpt_012/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_015/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (691 kB). View file
|
|
|
eval-results/medqa/0/ckpt_015/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_018/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (691 kB). View file
|
|
|
eval-results/medqa/0/ckpt_018/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_021/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (691 kB). View file
|
|
|
eval-results/medqa/0/ckpt_021/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_024/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (691 kB). View file
|
|
|
eval-results/medqa/0/ckpt_024/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_027/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (690 kB). View file
|
|
|
eval-results/medqa/0/ckpt_027/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_030/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (691 kB). View file
|
|
|
eval-results/medqa/0/ckpt_030/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_033/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (691 kB). View file
|
|
|
eval-results/medqa/0/ckpt_033/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_036/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (691 kB). View file
|
|
|
eval-results/medqa/0/ckpt_036/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_039/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (691 kB). View file
|
|
|
eval-results/medqa/0/ckpt_039/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_042/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (691 kB). View file
|
|
|
eval-results/medqa/0/ckpt_042/results.json.tar.gz
ADDED
|
Binary file (2.82 kB). View file
|
|
|
eval-results/medqa/0/ckpt_045/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (692 kB). View file
|
|
|
eval-results/medqa/0/ckpt_045/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_048/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (690 kB). View file
|
|
|
eval-results/medqa/0/ckpt_048/results.json.tar.gz
ADDED
|
Binary file (2.82 kB). View file
|
|
|
eval-results/medqa/0/ckpt_051/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (692 kB). View file
|
|
|
eval-results/medqa/0/ckpt_051/results.json.tar.gz
ADDED
|
Binary file (2.82 kB). View file
|
|
|
eval-results/medqa/0/ckpt_054/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (692 kB). View file
|
|
|
eval-results/medqa/0/ckpt_054/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_057/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (692 kB). View file
|
|
|
eval-results/medqa/0/ckpt_057/results.json.tar.gz
ADDED
|
Binary file (2.84 kB). View file
|
|
|
eval-results/medqa/0/ckpt_060/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (692 kB). View file
|
|
|
eval-results/medqa/0/ckpt_060/results.json.tar.gz
ADDED
|
Binary file (2.84 kB). View file
|
|
|
eval-results/medqa/0/ckpt_063/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (692 kB). View file
|
|
|
eval-results/medqa/0/ckpt_063/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_066/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (693 kB). View file
|
|
|
eval-results/medqa/0/ckpt_066/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_069/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (693 kB). View file
|
|
|
eval-results/medqa/0/ckpt_069/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_072/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (692 kB). View file
|
|
|
eval-results/medqa/0/ckpt_072/results.json.tar.gz
ADDED
|
Binary file (2.81 kB). View file
|
|
|
eval-results/medqa/0/ckpt_075/medqa_4options.jsonl.tar.gz
ADDED
|
Binary file (692 kB). View file
|
|
|