File size: 3,638 Bytes
9aaee22
 
 
 
e3b3d38
9aaee22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e3b3d38
9aaee22
 
 
 
 
 
 
 
 
 
 
 
 
e3b3d38
9aaee22
 
 
 
e3b3d38
9aaee22
 
 
 
 
 
 
 
 
 
 
 
 
 
e3b3d38
9aaee22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
'''
Author: hibana2077 [email protected]
Date: 2024-01-02 21:43:38
LastEditors: hibana2077 [email protected]
LastEditTime: 2024-01-04 22:21:47
FilePath: \hayabusa\src\main.py
Description: 这是默认设置,请设置`customMade`, 打开koroFileHeader查看配置 进行设置: https://github.com/OBKoro1/koro1FileHeader/wiki/%E9%85%8D%E7%BD%AE
'''
from operator import index
import streamlit as st
import plotly.express as px
from ydata_profiling import ProfileReport
import pandas as pd
import pickle
import time
import ydata_profiling
from streamlit_pandas_profiling import st_profile_report
import os 

if os.path.exists('./dataset/') == False:
    os.mkdir('./dataset/')
if os.path.exists('./model/') == False:
    os.mkdir('./model/')
if os.path.exists('./pipeline/') == False:
    os.mkdir('./pipeline/')

with st.sidebar:
    st.title("AutoML")
    choice = st.radio("Navigation", ["Upload","Profiling","Modelling", "Download"])
    st.info("This project application helps you build and explore your data.")

if choice == "Upload":
    st.title("Upload Your Dataset")
    file = st.file_uploader("Upload Your Dataset")
    if file:
        df = pd.read_csv(file, index_col=None)
        df.drop("Unnamed: 0", axis=1, inplace=True) if "Unnamed: 0" in df.columns else "No Unnamed: 0 Detected"
        df.to_csv('dataset.csv', index=None)
        st.dataframe(df)
        st.session_state['df'] = df

if choice == "Profiling": 
    st.title("Exploratory Data Analysis")
    df:pd.DataFrame = st.session_state['df']
    pr = ProfileReport(df, title="Profiling Report")
    st_profile_report(pr)

if choice == "Modelling": 
    df:pd.DataFrame = st.session_state['df']
    chosen_target = st.selectbox('Choose the Target Column', df.columns)
    drop_columns = st.multiselect('Choose the Columns to Drop', df.columns)
    ml_task = st.selectbox('Choose the ML Task', ['Classification', 'Regression'])
    if st.button('Run Modelling'):
        if ml_task == 'Classification':
            from pycaret.classification import setup, compare_models, pull, save_model, get_config
            setup(df, target=chosen_target, ignore_features=drop_columns)
            setup_df = pull()
            st.dataframe(setup_df)
            best_model = compare_models(exclude=['lightgbm'])
            compare_df = pull()
            st.dataframe(compare_df)
            save_model(best_model, 'best_model')
            save_model(best_model, f"./model/{chosen_target}_{time.time()}")
            pipeline = get_config('pipeline')
            st.write(pipeline)
            # save the pipeline
            with open('pipeline.pkl', 'wb') as f:
                pickle.dump(pipeline, f)
        else:
            from pycaret.regression import setup, compare_models, pull, save_model
            setup(df, target=chosen_target, ignore_features=drop_columns)
            setup_df = pull()
            st.dataframe(setup_df)
            best_model = compare_models(exclude=['lightgbm'])
            compare_df = pull()
            st.dataframe(compare_df)
            save_model(best_model, 'best_model')
            save_model(best_model, f"./model/{chosen_target}_{time.time()}")
            pipeline = get_config('pipeline')
            st.write(pipeline)
            # save the pipeline
            with open('pipeline.pkl', 'wb') as f:
                pickle.dump(pipeline, f)

if choice == "Download": 
    with open('best_model.pkl', 'rb') as f: 
        st.download_button('Download Model', f, file_name="best_model.pkl")
    with open('pipeline.pkl', 'rb') as f:
        st.download_button('Download Pipeline', f, file_name="pipeline.pkl")