yesssssssss's picture
init
5faa10b
raw
history blame
3.31 kB
import sys
tabpfn_path = 'TabPFN'
sys.path.insert(0, tabpfn_path) # our submodule of the TabPFN repo (at 045c8400203ebd062346970b4f2c0ccda5a40618)
from TabPFN.scripts.transformer_prediction_interface import TabPFNClassifier
import numpy as np
import pandas as pd
import torch
import gradio as gr
import openml
from sklearn.model_selection import cross_val_score
def compute(file, y_attribute, cv_folds):
if file is None:
return 'Please upload a .arff file', y_attribute
if file.name.endswith('.arff'):
dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
dataset_format="array")
if y_attribute not in attribute_names_:
return f"**Select attribute from {', '.join(attribute_names_)}**", y_attribute
X, y, categorical_indicator_, attribute_names_ = dataset.get_data(
dataset_format="array", target=y_attribute)
else:
return 'Please upload a .arff file', y_attribute
order = np.arange(y.shape[0])
np.random.seed(13)
np.random.shuffle(order)
X, y = torch.tensor(X[order]), torch.tensor(y[order])
classifier = TabPFNClassifier(base_path=tabpfn_path, device='cpu')
scores = cross_val_score(classifier, X, y, cv=cv_folds, scoring='roc_auc_ovo')
print(scores)
# classifier.fit(x_train, y_train)
# y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True)
# print(file, type(file))
return f"ROC AUC OVO Cross Val mean is {sum(scores) / len(scores)} from {scores}. " + (
"The PFN is only trained for datasets with up to 1024 training examples and it had to extrapolate to greater datasets for this evaluation." if len(
y) // cv_folds > 1024 else ""), y_attribute
def upload_file(file):
if file is None:
return
if file.name.endswith('.arff'):
dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
print(y_attribute)
X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
dataset_format="array")
return f"Select attribute from {', '.join(attribute_names_)}", attribute_names_[-1]
else:
return 'Please upload a .arff file', None
with gr.Blocks() as demo:
gr.Markdown("""This demo allows you to play with the **TabPFN**.
Upload a .arff file, select an attribute to predict and the number of cross validation folds and get the ROC AUC OVO score for one seed.
""")
inp_file = gr.File(
label='Drop a .arff file.')
cv_folds = gr.Dropdown([2, 3, 4, 5], value=2, label='Number of CV folds')
out_text = gr.Markdown()
y_attribute = gr.Textbox(label='y attribute')
examples = gr.Examples(examples=['balance-scale.arff'],
inputs=[inp_file],
outputs=[out_text, y_attribute],
fn=upload_file,
cache_examples=True)
btn = gr.Button("Predict Empty Table Cells")
# out_table = gr.DataFrame()
inp_file.change(fn=upload_file, inputs=inp_file, outputs=[out_text, y_attribute])
btn.click(fn=compute, inputs=[inp_file, y_attribute, cv_folds], outputs=[out_text, y_attribute])
demo.launch()