Spaces:
Running
Running
Ludwig Stumpp
commited on
Commit
·
1c71762
1
Parent(s):
412a418
Fix app
Browse files- poetry.lock +0 -0
- pyproject.toml +14 -0
- requirements-dev.txt +0 -4
- requirements.txt +0 -2
- setup.cfg +2 -0
- streamlit_app.py +35 -13
poetry.lock
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pyproject.toml
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[tool.poetry]
|
2 |
+
package-mode = false
|
3 |
+
description = ""
|
4 |
+
authors = ["Ludwig Stumpp <[email protected]>"]
|
5 |
+
readme = "README.md"
|
6 |
+
|
7 |
+
[tool.poetry.dependencies]
|
8 |
+
python = "^3.10"
|
9 |
+
pandas = "^2.2.2"
|
10 |
+
streamlit = "^1.37.1"
|
11 |
+
|
12 |
+
[build-system]
|
13 |
+
requires = ["poetry-core"]
|
14 |
+
build-backend = "poetry.core.masonry.api"
|
requirements-dev.txt
DELETED
@@ -1,4 +0,0 @@
|
|
1 |
-
black
|
2 |
-
flake
|
3 |
-
isort
|
4 |
-
mypy
|
|
|
|
|
|
|
|
|
|
requirements.txt
DELETED
@@ -1,2 +0,0 @@
|
|
1 |
-
pandas~=2.0.1
|
2 |
-
streamlit~=1.22.0
|
|
|
|
|
|
setup.cfg
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
[flake8]
|
2 |
+
max-line-length = 88
|
streamlit_app.py
CHANGED
@@ -4,7 +4,8 @@ from collections.abc import Iterable
|
|
4 |
|
5 |
import pandas as pd
|
6 |
import streamlit as st
|
7 |
-
from pandas.api.types import is_bool_dtype, is_datetime64_any_dtype,
|
|
|
8 |
|
9 |
GITHUB_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
|
10 |
NON_BENCHMARK_COLS = ["Open?", "Publisher"]
|
@@ -22,11 +23,13 @@ def extract_table_and_format_from_markdown_text(markdown_table: str) -> pd.DataF
|
|
22 |
df = (
|
23 |
pd.read_table(io.StringIO(markdown_table), sep="|", header=0, index_col=1)
|
24 |
.dropna(axis=1, how="all") # drop empty columns
|
25 |
-
.iloc[
|
|
|
|
|
26 |
.sort_index(ascending=True)
|
27 |
.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
|
28 |
.replace("", float("NaN"))
|
29 |
-
.
|
30 |
)
|
31 |
|
32 |
# remove whitespace from column names and index
|
@@ -37,7 +40,9 @@ def extract_table_and_format_from_markdown_text(markdown_table: str) -> pd.DataF
|
|
37 |
return df
|
38 |
|
39 |
|
40 |
-
def extract_markdown_table_from_multiline(
|
|
|
|
|
41 |
"""Extracts the markdown table from a multiline string.
|
42 |
|
43 |
Args:
|
@@ -89,7 +94,9 @@ def remove_markdown_links(text: str) -> str:
|
|
89 |
return text
|
90 |
|
91 |
|
92 |
-
def filter_dataframe_by_row_and_columns(
|
|
|
|
|
93 |
"""
|
94 |
Filter dataframe by the rows and columns to display.
|
95 |
|
@@ -116,7 +123,8 @@ def filter_dataframe_by_row_and_columns(df: pd.DataFrame, ignore_columns: list[s
|
|
116 |
df = pd.DataFrame(df.loc[to_filter_index])
|
117 |
|
118 |
to_filter_columns = st.multiselect(
|
119 |
-
"Filter by benchmark:",
|
|
|
120 |
)
|
121 |
if to_filter_columns:
|
122 |
df = pd.DataFrame(df[ignore_columns + to_filter_columns])
|
@@ -173,7 +181,9 @@ def filter_dataframe_by_column_values(df: pd.DataFrame) -> pd.DataFrame:
|
|
173 |
),
|
174 |
)
|
175 |
if isinstance(user_date_input, Iterable) and len(user_date_input) == 2:
|
176 |
-
user_date_input_datetime = tuple(
|
|
|
|
|
177 |
start_date, end_date = user_date_input_datetime
|
178 |
df = df.loc[df[column].between(start_date, end_date)]
|
179 |
|
@@ -207,22 +217,30 @@ def setup_basic():
|
|
207 |
|
208 |
|
209 |
def setup_leaderboard(readme: str):
|
210 |
-
leaderboard_table = extract_markdown_table_from_multiline(
|
|
|
|
|
211 |
leaderboard_table = remove_markdown_links(leaderboard_table)
|
212 |
df_leaderboard = extract_table_and_format_from_markdown_text(leaderboard_table)
|
213 |
-
df_leaderboard["Open?"] =
|
|
|
|
|
214 |
|
215 |
st.markdown("## Leaderboard")
|
216 |
modify = st.checkbox("Add filters")
|
217 |
clear_empty_entries = st.checkbox("Clear empty entries", value=True)
|
218 |
|
219 |
if modify:
|
220 |
-
df_leaderboard = filter_dataframe_by_row_and_columns(
|
|
|
|
|
221 |
df_leaderboard = filter_dataframe_by_column_values(df_leaderboard)
|
222 |
|
223 |
if clear_empty_entries:
|
224 |
df_leaderboard = df_leaderboard.dropna(axis=1, how="all")
|
225 |
-
benchmark_columns = [
|
|
|
|
|
226 |
rows_wo_any_benchmark = df_leaderboard[benchmark_columns].isna().all(axis=1)
|
227 |
df_leaderboard = df_leaderboard[~rows_wo_any_benchmark]
|
228 |
|
@@ -246,12 +264,16 @@ def setup_leaderboard(readme: str):
|
|
246 |
|
247 |
|
248 |
def setup_benchmarks(readme: str):
|
249 |
-
benchmarks_table = extract_markdown_table_from_multiline(
|
|
|
|
|
250 |
df_benchmarks = extract_table_and_format_from_markdown_text(benchmarks_table)
|
251 |
|
252 |
st.markdown("## Covered Benchmarks")
|
253 |
|
254 |
-
selected_benchmark = st.selectbox(
|
|
|
|
|
255 |
df_selected = df_benchmarks.loc[selected_benchmark]
|
256 |
text = [
|
257 |
f"Name: {selected_benchmark}",
|
|
|
4 |
|
5 |
import pandas as pd
|
6 |
import streamlit as st
|
7 |
+
from pandas.api.types import (is_bool_dtype, is_datetime64_any_dtype,
|
8 |
+
is_numeric_dtype)
|
9 |
|
10 |
GITHUB_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
|
11 |
NON_BENCHMARK_COLS = ["Open?", "Publisher"]
|
|
|
23 |
df = (
|
24 |
pd.read_table(io.StringIO(markdown_table), sep="|", header=0, index_col=1)
|
25 |
.dropna(axis=1, how="all") # drop empty columns
|
26 |
+
.iloc[
|
27 |
+
1:
|
28 |
+
] # drop first row which is the "----" separator of the original markdown table
|
29 |
.sort_index(ascending=True)
|
30 |
.apply(lambda x: x.str.strip() if x.dtype == "object" else x)
|
31 |
.replace("", float("NaN"))
|
32 |
+
.apply(pd.to_numeric, errors="ignore")
|
33 |
)
|
34 |
|
35 |
# remove whitespace from column names and index
|
|
|
40 |
return df
|
41 |
|
42 |
|
43 |
+
def extract_markdown_table_from_multiline(
|
44 |
+
multiline: str, table_headline: str, next_headline_start: str = "#"
|
45 |
+
) -> str:
|
46 |
"""Extracts the markdown table from a multiline string.
|
47 |
|
48 |
Args:
|
|
|
94 |
return text
|
95 |
|
96 |
|
97 |
+
def filter_dataframe_by_row_and_columns(
|
98 |
+
df: pd.DataFrame, ignore_columns: list[str] | None = None
|
99 |
+
) -> pd.DataFrame:
|
100 |
"""
|
101 |
Filter dataframe by the rows and columns to display.
|
102 |
|
|
|
123 |
df = pd.DataFrame(df.loc[to_filter_index])
|
124 |
|
125 |
to_filter_columns = st.multiselect(
|
126 |
+
"Filter by benchmark:",
|
127 |
+
sorted([c for c in df.columns if c not in ignore_columns]),
|
128 |
)
|
129 |
if to_filter_columns:
|
130 |
df = pd.DataFrame(df[ignore_columns + to_filter_columns])
|
|
|
181 |
),
|
182 |
)
|
183 |
if isinstance(user_date_input, Iterable) and len(user_date_input) == 2:
|
184 |
+
user_date_input_datetime = tuple(
|
185 |
+
map(pd.to_datetime, user_date_input)
|
186 |
+
)
|
187 |
start_date, end_date = user_date_input_datetime
|
188 |
df = df.loc[df[column].between(start_date, end_date)]
|
189 |
|
|
|
217 |
|
218 |
|
219 |
def setup_leaderboard(readme: str):
|
220 |
+
leaderboard_table = extract_markdown_table_from_multiline(
|
221 |
+
readme, table_headline="## Leaderboard"
|
222 |
+
)
|
223 |
leaderboard_table = remove_markdown_links(leaderboard_table)
|
224 |
df_leaderboard = extract_table_and_format_from_markdown_text(leaderboard_table)
|
225 |
+
df_leaderboard["Open?"] = (
|
226 |
+
df_leaderboard["Open?"].map({"yes": 1, "no": 0}).astype(bool)
|
227 |
+
)
|
228 |
|
229 |
st.markdown("## Leaderboard")
|
230 |
modify = st.checkbox("Add filters")
|
231 |
clear_empty_entries = st.checkbox("Clear empty entries", value=True)
|
232 |
|
233 |
if modify:
|
234 |
+
df_leaderboard = filter_dataframe_by_row_and_columns(
|
235 |
+
df_leaderboard, ignore_columns=NON_BENCHMARK_COLS
|
236 |
+
)
|
237 |
df_leaderboard = filter_dataframe_by_column_values(df_leaderboard)
|
238 |
|
239 |
if clear_empty_entries:
|
240 |
df_leaderboard = df_leaderboard.dropna(axis=1, how="all")
|
241 |
+
benchmark_columns = [
|
242 |
+
c for c in df_leaderboard.columns if df_leaderboard[c].dtype == float
|
243 |
+
]
|
244 |
rows_wo_any_benchmark = df_leaderboard[benchmark_columns].isna().all(axis=1)
|
245 |
df_leaderboard = df_leaderboard[~rows_wo_any_benchmark]
|
246 |
|
|
|
264 |
|
265 |
|
266 |
def setup_benchmarks(readme: str):
|
267 |
+
benchmarks_table = extract_markdown_table_from_multiline(
|
268 |
+
readme, table_headline="## Benchmarks"
|
269 |
+
)
|
270 |
df_benchmarks = extract_table_and_format_from_markdown_text(benchmarks_table)
|
271 |
|
272 |
st.markdown("## Covered Benchmarks")
|
273 |
|
274 |
+
selected_benchmark = st.selectbox(
|
275 |
+
"Select a benchmark to learn more:", df_benchmarks.index.unique()
|
276 |
+
)
|
277 |
df_selected = df_benchmarks.loc[selected_benchmark]
|
278 |
text = [
|
279 |
f"Name: {selected_benchmark}",
|