Spaces:
Running
Running
import numpy as np | |
import matplotlib.pyplot as plt | |
from matplotlib.colors import ListedColormap | |
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.datasets import make_moons, make_circles, make_classification | |
from sklearn.neural_network import MLPClassifier | |
from sklearn.neighbors import KNeighborsClassifier | |
from sklearn.svm import SVC | |
from sklearn.gaussian_process import GaussianProcessClassifier | |
from sklearn.gaussian_process.kernels import RBF | |
from sklearn.tree import DecisionTreeClassifier | |
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier | |
from sklearn.naive_bayes import GaussianNB | |
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis | |
from sklearn.inspection import DecisionBoundaryDisplay | |
from sklearn.datasets import make_blobs, make_circles, make_moons | |
import gradio as gr | |
import math | |
from functools import partial | |
### DATASETS | |
def normalize(X): | |
return StandardScaler().fit_transform(X) | |
def linearly_separable(): | |
X, y = make_classification( | |
n_features=2, n_redundant=0, n_informative=2, random_state=1, n_clusters_per_class=1 | |
) | |
rng = np.random.RandomState(2) | |
X += 2 * rng.uniform(size=X.shape) | |
linearly_separable = (X, y) | |
return linearly_separable | |
DATA_MAPPING = { | |
"Moons": make_moons(noise=0.3, random_state=0), | |
"Circles":make_circles(noise=0.2, factor=0.5, random_state=1), | |
"Linearly Separable Random Dataset": linearly_separable(), | |
} | |
#### MODELS | |
def get_groundtruth_model(X, labels): | |
# dummy model to show true label distribution | |
class Dummy: | |
def __init__(self, y): | |
self.labels_ = labels | |
return Dummy(labels) | |
DATASETS = [ | |
make_moons(noise=0.3, random_state=0), | |
make_circles(noise=0.2, factor=0.5, random_state=1), | |
linearly_separable() | |
] | |
NAME_CLF_MAPPING = { | |
"Ground Truth":get_groundtruth_model, | |
"Nearest Neighbors":KNeighborsClassifier(3), | |
"Linear SVM":SVC(kernel="linear", C=0.025), | |
"RBF SVM":SVC(gamma=2, C=1), | |
"Gaussian Process":GaussianProcessClassifier(1.0 * RBF(1.0)), | |
"Decision Tree":DecisionTreeClassifier(max_depth=5), | |
"Random Forest":RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1), | |
"Neural Net":MLPClassifier(alpha=1, max_iter=1000), | |
"AdaBoost":AdaBoostClassifier(), | |
"Naive Bayes":GaussianNB(), | |
} | |
#### PLOT | |
FIGSIZE = 7,7 | |
figure = plt.figure(figsize=(25, 10)) | |
i = 1 | |
def train_models(selected_data, clf_name): | |
cm = plt.cm.RdBu | |
cm_bright = ListedColormap(["#FF0000", "#0000FF"]) | |
clf = NAME_CLF_MAPPING[clf_name] | |
X, y = DATA_MAPPING[selected_data] | |
X = StandardScaler().fit_transform(X) | |
X_train, X_test, y_train, y_test = train_test_split( | |
X, y, test_size=0.4, random_state=42 | |
) | |
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 | |
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 | |
if clf_name != "Ground Truth": | |
clf.fit(X_train, y_train) | |
score = clf.score(X_test, y_test) | |
fig, ax = plt.subplots(figsize=FIGSIZE) | |
ax.set_title(clf_name, fontsize = 10) | |
DecisionBoundaryDisplay.from_estimator( | |
clf, X, cmap=cm, alpha=0.8, ax=ax, eps=0.5 | |
).plot() | |
return fig | |
else: | |
######### | |
for ds_cnt, ds in enumerate(DATASETS): | |
X, y = ds | |
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5 | |
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5 | |
# just plot the dataset first | |
cm = plt.cm.RdBu | |
cm_bright = ListedColormap(["#FF0000", "#0000FF"]) | |
fig, ax = plt.subplots(figsize=FIGSIZE) | |
ax.set_title("Input data") | |
# Plot the training points | |
ax.scatter(X_train[:, 0], X_train[:, 1], c=y_train, cmap=cm_bright, edgecolors="k") | |
# Plot the testing points | |
ax.scatter( | |
X_test[:, 0], X_test[:, 1], c=y_test, cmap=cm_bright, alpha=0.6, edgecolors="k" | |
) | |
ax.set_xlim(x_min, x_max) | |
ax.set_ylim(y_min, y_max) | |
ax.set_xticks(()) | |
ax.set_yticks(()) | |
return fig | |
########### | |
description = "Learn how different statistical classifiers perform in different datasets." | |
def iter_grid(n_rows, n_cols): | |
# create a grid using gradio Block | |
for _ in range(n_rows): | |
with gr.Row(): | |
for _ in range(n_cols): | |
with gr.Column(): | |
yield | |
title = "Compare Classifiers!" | |
with gr.Blocks(title=title) as demo: | |
gr.Markdown(f"## {title}") | |
gr.Markdown(description) | |
input_models = list(NAME_CLF_MAPPING) | |
input_data = gr.Radio( | |
choices=["Moons", "Circles", "Linearly Separable Random Dataset"], | |
value="Moons" | |
) | |
counter = 0 | |
for _ in iter_grid(2, 5): | |
if counter >= len(input_models): | |
break | |
input_model = input_models[counter] | |
plot = gr.Plot(label=input_model) | |
fn = partial(train_models, clf_name=input_model) | |
input_data.change(fn=fn, inputs=[input_data], outputs=plot) | |
counter += 1 | |
demo.launch(debug=True) | |