Spaces:
Running
Running
__all__ = ['block', 'make_clickable_model', 'make_clickable_user', 'get_submissions'] | |
import gradio as gr | |
import pandas as pd | |
COLUMN_NAMES = ["Model", "Size", "Avg", "PPDB", "PPDB filtered", "Turney", "BIRD", "YAGO", "UMLS", "CoNLL", "BC5CDR", "AutoFJ"] | |
UNTUNED_MODEL_RESULTS = '''[FastText](https://fasttext.cc/) &--&94.4&61.2&59.6&58.9&16.9&14.5&3.0&0.2&53.6 \\ | |
[Sentence-BERT](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) &110M&94.6&66.8&50.4&62.6&21.6&23.6&25.5&48.4&57.2 \\ | |
[Phrase-BERT](https://huggingface.co/whaleloops/phrase-bert) &110M&96.8&68.7&57.2&68.8&23.7&26.1&35.4&59.5&66.9 \\ | |
[UCTopic](https://github.com/JiachengLi1995/UCTopic) &240M&91.2&64.6&60.2&60.2&5.2&6.9&18.3&33.3&29.5 \\ | |
[E5-small](https://huggingface.co/intfloat/e5-small-v2) &34M&96.0&56.8&55.9&63.1&43.3&42.0&27.6&53.7&74.8 \\ | |
[E5-base](https://huggingface.co/intfloat/e5-base-v2) &110M&95.4&65.6&59.4&66.3&47.3&44.0&32.0&69.3&76.1\\ | |
[PEARL-small](https://huggingface.co/Lihuchen/pearl_small) &34M& 97.0&70.2&57.9&68.1& 48.1&44.5&42.4&59.3&75.2\\ | |
[PEARL-base](https://huggingface.co/Lihuchen/pearl_base) &110M&97.3&72.2&59.7&72.6&50.7&45.8&39.3&69.4&77.1\\''' | |
def parse_line(line): | |
model_results = line.replace(" ", "").strip("\\").split("&") | |
for i in range(1, len(model_results)): | |
if i == 1: | |
res = model_results[1] | |
else: | |
res = float(model_results[i]) | |
model_results[i] = res | |
return model_results | |
def get_baseline_df(): | |
df_data = [] | |
lines = UNTUNED_MODEL_RESULTS.split("\n") | |
for line in lines: | |
model_results = parse_line(line) | |
print(model_results) | |
assert len(model_results) == 11 | |
avg = sum(model_results[2:]) / 9 | |
model_results.insert(2, avg) | |
#model_results.insert(1, "False") | |
df_data.append(model_results) | |
# lines = TUNED_MODEL_RESULTS.split("\n") | |
# for line in lines: | |
# model_results = parse_line(line) | |
# assert len(model_results) == 10 | |
# avg = sum(model_results[1:-3] + model_results[-2:]) / 8 | |
# model_results.insert(1, avg) | |
# model_results.insert(1, "True") | |
# df_data.append(model_results) | |
print(len(df_data)) | |
df = pd.DataFrame(df_data, columns=COLUMN_NAMES).round(1) | |
print(df.head()) | |
return df | |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results" | |
CITATION_BUTTON_TEXT = r"""@article{chen2024learning, | |
title={Learning High-Quality and General-Purpose Phrase Representations}, | |
author={Chen, Lihu and Varoquaux, Ga{\"e}l and Suchanek, Fabian M}, | |
journal={arXiv preprint arXiv:2401.10407}, | |
year={2024} | |
} | |
}""" | |
block = gr.Blocks() | |
with block: | |
gr.Markdown( | |
"""# π¦ͺβͺ The PEARL-Leaderboard aims to evaluate string embeddings on various tasks. | |
π Our PEARL leaderboard contains 9 phrase-level datasets of five types of tasks, covering both the tasks of data science and natural language processing. <br> | |
| **[ π paper](https://arxiv.org/pdf/2401.10407.pdf)** | **[π€ PEARL-small](https://huggingface.co/Lihuchen/pearl_small)** | **[π€ PEARL-base](https://huggingface.co/Lihuchen/pearl_base)** | π€ **[PEARL-Benchmark](https://huggingface.co/datasets/Lihuchen/pearl_benchmark)** | | |
**[πΎ data](https://zenodo.org/records/10676475)** | | |
""" | |
) | |
gr.Markdown( | |
""" ## Task Description<br> | |
* **Paraphrase Classification**: PPDB and PPDBfiltered ([Wang et al., 2021](https://aclanthology.org/2021.emnlp-main.846/)) | |
* **Phrase Similarity**: Turney ([Turney, 2012](https://arxiv.org/pdf/1309.4035.pdf)) and BIRD ([Asaadi et al., 2019](https://aclanthology.org/N19-1050/)) | |
* **Entity Retrieval**: We constructed two datasets based on Yago ([Pellissier Tanon et al., 2020](https://hal-lara.archives-ouvertes.fr/DIG/hal-03108570v1)) and UMLS ([Bodenreider, 2004](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC308795/)) | |
* **Entity Clustering**: CoNLL 03 ([Tjong Kim Sang, 2002](https://aclanthology.org/W02-2024/)) and BC5CDR ([Li et al., 2016](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC4860626/)) | |
* **Fuzzy Join**: AutoFJ benchmark ([Li et al., 2021](https://arxiv.org/abs/2103.04489)) contains 50 diverse fuzzy-join datasets | |
""" | |
) | |
with gr.Row(): | |
data = gr.components.Dataframe( | |
type="pandas", datatype=["markdown", "markdown", "number", "number", "number", "number", "number", "number", "number", "number", "number", "number"] | |
) | |
with gr.Row(): | |
data_run = gr.Button("Refresh") | |
data_run.click( | |
get_baseline_df, outputs=data | |
) | |
with gr.Row(): | |
with gr.Accordion("Citation", open=True): | |
citation_button = gr.Textbox( | |
value=CITATION_BUTTON_TEXT, | |
label=CITATION_BUTTON_LABEL, | |
elem_id="citation-button", | |
) | |
#.style(show_copy_button=True) | |
block.load(get_baseline_df, outputs=data) | |
block.launch() |