Spaces:
Build error
Build error
import sys | |
tabpfn_path = 'TabPFN' | |
sys.path.insert(0, tabpfn_path) # our submodule of the TabPFN repo (at 045c8400203ebd062346970b4f2c0ccda5a40618) | |
from TabPFN.scripts.transformer_prediction_interface import TabPFNClassifier | |
import numpy as np | |
import pandas as pd | |
import torch | |
import gradio as gr | |
import openml | |
from sklearn.model_selection import cross_val_score | |
def compute(file, y_attribute, cv_folds): | |
if file is None: | |
return 'Please upload a .arff file', y_attribute | |
if file.name.endswith('.arff'): | |
dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name) | |
X_, _, categorical_indicator_, attribute_names_ = dataset.get_data( | |
dataset_format="array") | |
if y_attribute not in attribute_names_: | |
return f"**Select attribute from {', '.join(attribute_names_)}**", y_attribute | |
X, y, categorical_indicator_, attribute_names_ = dataset.get_data( | |
dataset_format="array", target=y_attribute) | |
else: | |
return 'Please upload a .arff file', y_attribute | |
order = np.arange(y.shape[0]) | |
np.random.seed(13) | |
np.random.shuffle(order) | |
X, y = torch.tensor(X[order]), torch.tensor(y[order]) | |
classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu') | |
scores = cross_val_score(classifier, X, y, cv=cv_folds, scoring='roc_auc_ovo') | |
print(scores) | |
# classifier.fit(x_train, y_train) | |
# y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True) | |
# print(file, type(file)) | |
return f"ROC AUC OVO Cross Val mean is {sum(scores) / len(scores)} from {scores}. " + ( | |
"The PFN is only trained for datasets with up to 1024 training examples and it had to extrapolate to greater datasets for this evaluation." if len( | |
y) // cv_folds > 1024 else ""), y_attribute | |
def upload_file(file): | |
if file is None: | |
return | |
if file.name.endswith('.arff'): | |
dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name) | |
print(y_attribute) | |
X_, _, categorical_indicator_, attribute_names_ = dataset.get_data( | |
dataset_format="array") | |
return f"Select attribute from {', '.join(attribute_names_)}", attribute_names_[-1] | |
else: | |
return 'Please upload a .arff file', None | |
with gr.Blocks() as demo: | |
gr.Markdown("""This demo allows you to play with the **TabPFN**. | |
Upload a .arff file, select an attribute to predict and the number of cross validation folds and get the ROC AUC OVO score for one seed. | |
""") | |
inp_file = gr.File( | |
label='Drop a .arff file.') | |
cv_folds = gr.Dropdown([2, 3, 4, 5], value=2, label='Number of CV folds') | |
out_text = gr.Markdown() | |
y_attribute = gr.Textbox(label='y attribute') | |
examples = gr.Examples(examples=['balance-scale.arff'], | |
inputs=[inp_file], | |
outputs=[out_text, y_attribute], | |
fn=upload_file, | |
cache_examples=True) | |
btn = gr.Button("Predict Empty Table Cells") | |
# out_table = gr.DataFrame() | |
inp_file.change(fn=upload_file, inputs=inp_file, outputs=[out_text, y_attribute]) | |
btn.click(fn=compute, inputs=[inp_file, y_attribute, cv_folds], outputs=[out_text, y_attribute]) | |
demo.launch() |