Ludwig Stumpp commited on
Commit
f008087
·
1 Parent(s): 37bf1e8

Remove links from streamlit table

Browse files
Files changed (1) hide show
  1. streamlit_app.py +79 -8
streamlit_app.py CHANGED
@@ -2,6 +2,7 @@ import pandas as pd
2
  import streamlit as st
3
  import io
4
  import requests
 
5
 
6
  REPO_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
7
 
@@ -20,6 +21,82 @@ def grab_readme_file_from_repo(repo_url: str) -> str:
20
  return readme
21
 
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def extract_markdown_table_from_multiline(multiline: str, table_headline: str) -> str:
24
  """Extracts the markdown table from a multiline string.
25
 
@@ -70,14 +147,8 @@ def setup_basic():
70
  def setup_table():
71
  readme = grab_readme_file_from_repo(REPO_URL)
72
  markdown_table = extract_markdown_table_from_multiline(readme, table_headline="### Leaderboard")
73
-
74
- df = (
75
- pd.read_table(io.StringIO(markdown_table), sep="|", header=0, skipinitialspace=True, index_col=1)
76
- .dropna(axis=1, how="all") # drop empty columns
77
- .iloc[1:] # drop first row which is the "----" separator of the original markdown table
78
- )
79
-
80
- # show interactive table
81
  st.dataframe(df)
82
 
83
 
 
2
  import streamlit as st
3
  import io
4
  import requests
5
+ import re
6
 
7
  REPO_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
8
 
 
21
  return readme
22
 
23
 
24
+ def modify_from_markdown_links_to_html_links(text: str) -> str:
25
+ """Modifies a markdown text to replace all markdown links with HTML links.
26
+
27
+ Example: [DISPLAY](LINK) to <a href=LINK, target="_blank">DISPLAY</a>
28
+
29
+ First find all markdown links with regex.
30
+ Then replace them with: <a href=$2, target="_blank">$1</a>
31
+
32
+ Args:
33
+ text (str): Markdown text containing markdown links
34
+
35
+ Returns:
36
+ str: Markdown text with HTML links.
37
+ """
38
+
39
+ # find all markdown links
40
+ markdown_links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", text)
41
+
42
+ # replace them with HTML links
43
+ for display, link in markdown_links:
44
+ text = text.replace(f"[{display}]({link})", f'<a href="{link}" target="_blank">{display}</a>')
45
+
46
+ return text
47
+
48
+
49
+ def remove_markdown_links(text: str) -> str:
50
+ """Modifies a markdown text to remove all markdown links.
51
+
52
+ Example: [DISPLAY](LINK) to DISPLAY
53
+
54
+ First find all markdown links with regex.
55
+ Then replace them with: $1
56
+
57
+ Args:
58
+ text (str): Markdown text containing markdown links
59
+
60
+ Returns:
61
+ str: Markdown text without markdown links.
62
+ """
63
+
64
+ # find all markdown links
65
+ markdown_links = re.findall(r"\[([^\]]+)\]\(([^)]+)\)", text)
66
+
67
+ # remove link keep display text
68
+ for display, link in markdown_links:
69
+ text = text.replace(f"[{display}]({link})", display)
70
+
71
+ return text
72
+
73
+
74
+ def extract_table_and_format_from_markdown_text(markdown_table: str) -> pd.DataFrame:
75
+ """Extracts a table from a markdown text and formats it as a pandas DataFrame.
76
+
77
+ Args:
78
+ text (str): Markdown text containing a table.
79
+
80
+ Returns:
81
+ pd.DataFrame: Table as pandas DataFrame.
82
+ """
83
+ df = (
84
+ pd.read_table(io.StringIO(markdown_table), sep="|", header=0, index_col=1)
85
+ .dropna(axis=1, how="all") # drop empty columns
86
+ .iloc[1:] # drop first row which is the "----" separator of the original markdown table
87
+ )
88
+
89
+ # change all column datatypes to numeric
90
+ for col in df.columns:
91
+ df[col] = pd.to_numeric(df[col], errors="ignore")
92
+
93
+ # remove whitespace from column names and index
94
+ df.columns = df.columns.str.strip()
95
+ df.index = df.index.str.strip()
96
+
97
+ return df
98
+
99
+
100
  def extract_markdown_table_from_multiline(multiline: str, table_headline: str) -> str:
101
  """Extracts the markdown table from a multiline string.
102
 
 
147
  def setup_table():
148
  readme = grab_readme_file_from_repo(REPO_URL)
149
  markdown_table = extract_markdown_table_from_multiline(readme, table_headline="### Leaderboard")
150
+ markdown_table = remove_markdown_links(markdown_table)
151
+ df = extract_table_and_format_from_markdown_text(markdown_table)
 
 
 
 
 
 
152
  st.dataframe(df)
153
 
154