Spaces:
Sleeping
Sleeping
File size: 4,914 Bytes
0fcc2bd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# streamlit_app.py
import streamlit as st
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingClassifier, StackingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, confusion_matrix, roc_curve, auc, classification_report
# Title and description
st.title("Classification Model Comparison: Stacking and Voting Classifiers")
st.write("""
### Predict target goals using different ensemble techniques
This application compares the performance of Stacking and Voting classifiers on the provided dataset.
""")
# File upload
uploaded_file = st.file_uploader("Upload your CSV file", type=["csv"])
if uploaded_file is not None:
df = pd.read_csv(uploaded_file)
st.write("### Raw Data")
st.write(df)
# Correlation Matrix
corrMatrix = df.corr()
# Plot heatmap
st.write("### Correlation Heatmap")
plt.figure(figsize=(25, 10))
color_palette = sns.color_palette("viridis", as_cmap=True)
ax = sns.heatmap(corrMatrix, vmin=-1, vmax=1, center=0, cmap=color_palette, annot=True, fmt=".2f", linewidths=0.5, square=True, cbar_kws={"shrink": 0.75})
plt.title('Correlation Heatmap', fontsize=20, pad=20)
plt.xticks(rotation=45, ha='right')
plt.yticks(rotation=0)
st.pyplot(plt)
# Replace target variable
df['Target_goal'] = df['Target_goal'].replace({1: 0, 2: 1})
# Define features and target variable
X = df.drop(columns=['Target_goal'])
y = df['Target_goal']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Standardize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
# Define base models for stacking and voting
estimators = [
('lr', LogisticRegression()),
('dt', DecisionTreeClassifier()),
('rf', RandomForestClassifier()),
('gb', GradientBoostingClassifier()),
('svc', SVC(probability=True))
]
# Stacking Classifier
stacking_clf = StackingClassifier(estimators=estimators, final_estimator=LogisticRegression())
stacking_clf.fit(X_train, y_train)
y_pred_stack = stacking_clf.predict(X_test)
y_pred_stack_proba = stacking_clf.predict_proba(X_test)[:, 1]
# Voting Classifier
voting_clf = VotingClassifier(estimators=estimators, voting='soft')
voting_clf.fit(X_train, y_train)
y_pred_vote = voting_clf.predict(X_test)
y_pred_vote_proba = voting_clf.predict_proba(X_test)[:, 1]
# Evaluation
st.write("### Accuracy Scores")
accuracy_stack = accuracy_score(y_test, y_pred_stack)
accuracy_vote = accuracy_score(y_test, y_pred_vote)
st.write(f'Stacking Classifier Accuracy: {accuracy_stack:.2f}')
st.write(f'Voting Classifier Accuracy: {accuracy_vote:.2f}')
# Classification Reports
st.write("### Classification Reports")
st.write("#### Stacking Classifier")
st.text(classification_report(y_test, y_pred_stack))
st.write("#### Voting Classifier")
st.text(classification_report(y_test, y_pred_vote))
# Confusion Matrix
st.write("### Confusion Matrix for Stacking Classifier")
conf_matrix_stack = confusion_matrix(y_test, y_pred_stack)
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix_stack, annot=True, fmt='d', cmap='Blues')
plt.title('Stacking Classifier Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
st.pyplot(plt)
st.write("### Confusion Matrix for Voting Classifier")
conf_matrix_vote = confusion_matrix(y_test, y_pred_vote)
plt.figure(figsize=(6, 5))
sns.heatmap(conf_matrix_vote, annot=True, fmt='d', cmap='Blues')
plt.title('Voting Classifier Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
st.pyplot(plt)
# ROC Curve and AUC
fpr_stack, tpr_stack, _ = roc_curve(y_test, y_pred_stack_proba)
roc_auc_stack = auc(fpr_stack, tpr_stack)
fpr_vote, tpr_vote, _ = roc_curve(y_test, y_pred_vote_proba)
roc_auc_vote = auc(fpr_vote, tpr_vote)
plt.figure(figsize=(10, 6))
plt.plot(fpr_stack, tpr_stack, color='blue', lw=2, label='Stacking Classifier (AUC = %0.2f)' % roc_auc_stack)
plt.plot(fpr_vote, tpr_vote, color='red', lw=2, label='Voting Classifier (AUC = %0.2f)' % roc_auc_vote)
plt.plot([0, 1], [0, 1], color='gray', lw=1, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc="lower right")
st.pyplot(plt)
|