Spaces:
Running
Running
Ludwig Stumpp
commited on
Commit
Β·
697be1a
1
Parent(s):
bc01ae8
First entries and streamlit app
Browse files- .vscode/extensions.json +5 -0
- README.md +13 -5
- requirements-dev.txt +3 -0
- requirements.txt +2 -0
- streamlit_app.py +100 -0
.vscode/extensions.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"recommendations": [
|
3 |
+
"takumii.markdowntable"
|
4 |
+
]
|
5 |
+
}
|
README.md
CHANGED
@@ -1,10 +1,18 @@
|
|
1 |
# llm-leaderboard
|
2 |
A joint community effort to create one central leaderboard for LLMs
|
3 |
|
|
|
|
|
4 |
### Leaderboard
|
5 |
|
6 |
-
|
|
7 |
-
|
8 |
-
|
|
9 |
-
|
|
10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
# llm-leaderboard
|
2 |
A joint community effort to create one central leaderboard for LLMs
|
3 |
|
4 |
+
Visit the interactive leaderboard at TODO.
|
5 |
+
|
6 |
### Leaderboard
|
7 |
|
8 |
+
| Model Name | [Chatbot Arena Elo](https://lmsys.org/blog/2023-05-03-arena/) |
|
9 |
+
| --------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------------- |
|
10 |
+
| [alpaca-13b](https://crfm.stanford.edu/2023/03/13/alpaca.html) | 1008 |
|
11 |
+
| [chatglm-6b](https://chatglm.cn/blog) | 985 |
|
12 |
+
| [dolly-v2-12b](https://www.databricks.com/blog/2023/04/12/dolly-first-open-commercially-viable-instruction-tuned-llm) | 944 |
|
13 |
+
| [fastchat-t5-3b](https://huggingface.co/lmsys/fastchat-t5-3b-v1.0) | 951 |
|
14 |
+
| [koala-13b](https://bair.berkeley.edu/blog/2023/04/03/koala/) | 1082 |
|
15 |
+
| [llama-13b](https://ai.facebook.com/blog/large-language-model-llama-meta-ai/) | 932 |
|
16 |
+
| [stablelm-tuned-alpha-7b](https://github.com/stability-AI/stableLM) | 858 |
|
17 |
+
| [vicuna-13b](https://lmsys.org/blog/2023-03-30-vicuna/) | 1169 |
|
18 |
+
| [oasst-pythia-12b](https://open-assistant.io/) | 1065 |
|
requirements-dev.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
black
|
2 |
+
flake
|
3 |
+
mypy
|
requirements.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
pandas~=2.0.1
|
2 |
+
streamlit~=1.22.0
|
streamlit_app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
import streamlit as st
|
3 |
+
import io
|
4 |
+
import requests
|
5 |
+
|
6 |
+
REPO_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
|
7 |
+
|
8 |
+
|
9 |
+
def grab_readme_file_from_repo(repo_url: str) -> str:
|
10 |
+
"""Grabs the README.md file from a GitHub repository.
|
11 |
+
|
12 |
+
Args:
|
13 |
+
repo_url (str): URL of the GitHub repository.
|
14 |
+
|
15 |
+
Returns:
|
16 |
+
str: Content of the README.md file.
|
17 |
+
"""
|
18 |
+
readme_url = repo_url.replace("github.com", "raw.githubusercontent.com") + "/main/README.md"
|
19 |
+
readme = requests.get(readme_url).text
|
20 |
+
return readme
|
21 |
+
|
22 |
+
|
23 |
+
def extract_markdown_table_from_multiline(multiline: str, table_headline: str) -> str:
|
24 |
+
"""Extracts the markdown table from a multiline string.
|
25 |
+
|
26 |
+
Args:
|
27 |
+
multiline (str): content of README.md file.
|
28 |
+
table_headline (str): Headline of the table in the README.md file.
|
29 |
+
|
30 |
+
Returns:
|
31 |
+
str: Markdown table.
|
32 |
+
|
33 |
+
Raises:
|
34 |
+
ValueError: If the table could not be found.
|
35 |
+
"""
|
36 |
+
# extract everything between the table headline and the next headline
|
37 |
+
table = []
|
38 |
+
start = False
|
39 |
+
for line in multiline.split("\n"):
|
40 |
+
if line.startswith(table_headline):
|
41 |
+
start = True
|
42 |
+
elif line.startswith("###"):
|
43 |
+
start = False
|
44 |
+
elif start:
|
45 |
+
table.append(line + "\n")
|
46 |
+
|
47 |
+
if len(table) == 0:
|
48 |
+
raise ValueError(f"Could not find table with headline '{table_headline}'")
|
49 |
+
|
50 |
+
return "".join(table)
|
51 |
+
|
52 |
+
|
53 |
+
def setup_basic():
|
54 |
+
title = "LLM-Leaderboard"
|
55 |
+
|
56 |
+
st.set_page_config(
|
57 |
+
page_title=title,
|
58 |
+
page_icon="π",
|
59 |
+
)
|
60 |
+
st.title(title)
|
61 |
+
|
62 |
+
st.markdown(
|
63 |
+
"""
|
64 |
+
A joint community effort to create one central leaderboard for LLMs.
|
65 |
+
Visit [llm-leaderboard](https://github.com/LudwigStumpp/llm-leaderboard) to contribute.
|
66 |
+
"""
|
67 |
+
)
|
68 |
+
|
69 |
+
|
70 |
+
def setup_table():
|
71 |
+
readme = grab_readme_file_from_repo(REPO_URL)
|
72 |
+
markdown_table = extract_markdown_table_from_multiline(readme, table_headline="### Leaderboard")
|
73 |
+
|
74 |
+
df = (
|
75 |
+
pd.read_table(io.StringIO(markdown_table), sep="|", header=0, skipinitialspace=True, index_col=1)
|
76 |
+
.dropna(axis=1, how="all") # drop empty columns
|
77 |
+
.iloc[1:] # drop first row which is the "----" separator of the original markdown table
|
78 |
+
)
|
79 |
+
|
80 |
+
# show interactive table
|
81 |
+
st.dataframe(df)
|
82 |
+
|
83 |
+
|
84 |
+
def setup_footer():
|
85 |
+
st.markdown(
|
86 |
+
"""
|
87 |
+
---
|
88 |
+
Made with β€οΈ by the awesome open-source community from all over π.
|
89 |
+
"""
|
90 |
+
)
|
91 |
+
|
92 |
+
|
93 |
+
def main():
|
94 |
+
setup_basic()
|
95 |
+
setup_table()
|
96 |
+
setup_footer()
|
97 |
+
|
98 |
+
|
99 |
+
if __name__ == "__main__":
|
100 |
+
main()
|