laudavid commited on
Commit
c2522bb
·
1 Parent(s): 345cd95

Add files to app

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitignore +166 -0
  2. README.md +13 -12
  3. data/classification/credit_score/credit_score_cm_train +0 -0
  4. data/classification/credit_score/credit_score_test_pp.pkl +3 -0
  5. data/classification/credit_score/credit_score_test_raw.pkl +3 -0
  6. data/classification/credit_score/credit_score_train_raw.pkl +3 -0
  7. data/clustering/clean_marketing.pkl +3 -0
  8. data/clustering/results/results_2_clusters.pkl +3 -0
  9. data/clustering/results/results_3_clusters.pkl +3 -0
  10. data/clustering/results/results_4_clusters.pkl +3 -0
  11. data/clustering/results/results_5_clusters.pkl +3 -0
  12. data/clustering/results/results_6_clusters.pkl +3 -0
  13. data/hotels/booking_df.csv +0 -0
  14. data/household/household_power_consumption_clean.pkl +3 -0
  15. data/movies/csr_data_tf.pkl +3 -0
  16. data/movies/movies_dict2.pkl +3 -0
  17. data/movies/vote_info.pkl +3 -0
  18. data/pinterest/image1.jpg +0 -0
  19. data/pinterest/image2.jpg +0 -0
  20. data/pinterest/image3.jpg +0 -0
  21. data/pinterest/image4.jpg +0 -0
  22. data/sa_data/reviews_raw.pkl +3 -0
  23. data/sa_data/reviews_results.pkl +3 -0
  24. images/AI.jpg +0 -0
  25. images/clustering.webp +0 -0
  26. images/credit_score.jpg +0 -0
  27. images/cs.webp +0 -0
  28. images/energy_consumption.jpg +0 -0
  29. images/france.jpeg +0 -0
  30. images/group.png +0 -0
  31. images/hec.png +0 -0
  32. images/hi-paris.png +0 -0
  33. images/models/credit_score/EDA_numeric_credit.png +0 -0
  34. images/object_detection.png +0 -0
  35. images/od_fashion.jpg +0 -0
  36. images/reviews.jpg +0 -0
  37. images/room.jpg +0 -0
  38. images/rs.png +0 -0
  39. images/sentiment_analysis.png +0 -0
  40. images/singapore.jpg +0 -0
  41. images/spain-banner.jpg +0 -0
  42. images/spain.WebP +0 -0
  43. images/supervised_learner.png +0 -0
  44. images/thailand.jpeg +0 -0
  45. images/ts_patterns.png +0 -0
  46. images/unsupervised_learner.webp +0 -0
  47. main_page.py +84 -0
  48. notebooks/Supervised-Unsupervised/credit_score.ipynb +0 -0
  49. notebooks/Supervised-Unsupervised/customer_churn.ipynb +0 -0
  50. notebooks/Supervised-Unsupervised/customer_segmentation.ipynb +632 -0
.gitignore ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # Streamlit secrets
7
+ .streamlit/
8
+
9
+ # C extensions
10
+ *.so
11
+
12
+ # Virtual Environment
13
+ venv-app-ai-ds/
14
+
15
+ # Distribution / packaging
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ share/python-wheels/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+ MANIFEST
34
+
35
+ # PyInstaller
36
+ # Usually these files are written by a python script from a template
37
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
38
+ *.manifest
39
+ *.spec
40
+
41
+ # Installer logs
42
+ pip-log.txt
43
+ pip-delete-this-directory.txt
44
+
45
+ # Unit test / coverage reports
46
+ htmlcov/
47
+ .tox/
48
+ .nox/
49
+ .coverage
50
+ .coverage.*
51
+ .cache
52
+ nosetests.xml
53
+ coverage.xml
54
+ *.cover
55
+ *.py,cover
56
+ .hypothesis/
57
+ .pytest_cache/
58
+ cover/
59
+
60
+ # Translations
61
+ *.mo
62
+ *.pot
63
+
64
+ # Django stuff:
65
+ *.log
66
+ local_settings.py
67
+ db.sqlite3
68
+ db.sqlite3-journal
69
+
70
+ # Flask stuff:
71
+ instance/
72
+ .webassets-cache
73
+
74
+ # Scrapy stuff:
75
+ .scrapy
76
+
77
+ # Sphinx documentation
78
+ docs/_build/
79
+
80
+ # PyBuilder
81
+ .pybuilder/
82
+ target/
83
+
84
+ # Jupyter Notebook
85
+ .ipynb_checkpoints
86
+
87
+ # IPython
88
+ profile_default/
89
+ ipython_config.py
90
+
91
+ # pyenv
92
+ # For a library or package, you might want to ignore these files since the code is
93
+ # intended to run in multiple environments; otherwise, check them in:
94
+ # .python-version
95
+
96
+ # pipenv
97
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
98
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
99
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
100
+ # install all needed dependencies.
101
+ #Pipfile.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/#use-with-ide
116
+ .pdm.toml
117
+
118
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119
+ __pypackages__/
120
+
121
+ # Celery stuff
122
+ celerybeat-schedule
123
+ celerybeat.pid
124
+
125
+ # SageMath parsed files
126
+ *.sage.py
127
+
128
+ # Environments
129
+ .env
130
+ .venv
131
+ env/
132
+ venv/
133
+ ENV/
134
+ env.bak/
135
+ venv.bak/
136
+
137
+ # Spyder project settings
138
+ .spyderproject
139
+ .spyproject
140
+
141
+ # Rope project settings
142
+ .ropeproject
143
+
144
+ # mkdocs documentation
145
+ /site
146
+
147
+ # mypy
148
+ .mypy_cache/
149
+ .dmypy.json
150
+ dmypy.json
151
+
152
+ # Pyre type checker
153
+ .pyre/
154
+
155
+ # pytype static type analyzer
156
+ .pytype/
157
+
158
+ # Cython debug symbols
159
+ cython_debug/
160
+
161
+ # PyCharm
162
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
165
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
166
+ #.idea/
README.md CHANGED
@@ -1,13 +1,14 @@
1
- ---
2
- title: App Ai Ds Hec
3
- emoji: 🐨
4
- colorFrom: indigo
5
- colorTo: purple
6
- sdk: streamlit
7
- sdk_version: 1.31.1
8
- app_file: app.py
9
- pinned: false
10
- license: mit
11
- ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
1
+ # AI and Data Science examples 🧠
2
+ Space for the Streamlit "AI and Data Science examples" HEC Paris app.
 
 
 
 
 
 
 
 
 
3
 
4
+ The app is structured in 5 pages:
5
+ - Supervised vs Unsupervised
6
+ - Time Series Analysis
7
+ - Sentiment Analysis
8
+ - Object detection
9
+ - Recommendation system
10
+
11
+ Each page contains one or more real-life use cases of AI.
12
+ Some of these use cases include electrical power consumption forecasting or customer segmentation
13
+
14
+ Other pages on image segmentation and topic modeling are currently being developped.
data/classification/credit_score/credit_score_cm_train ADDED
Binary file (779 Bytes). View file
 
data/classification/credit_score/credit_score_test_pp.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0fbd76a90f5289377d21c2d39f377cb73e13de1c71f3818c7b0ea71f46a29ac
3
+ size 241322
data/classification/credit_score/credit_score_test_raw.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2659d847aeb751f63a49e15b6bdc501be32eaddfaba9b33ca86f279065559f2
3
+ size 103703
data/classification/credit_score/credit_score_train_raw.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d218682f6e67a5c9f81d227fd90362976185aa78958ab432d6561b6dfd960a4
3
+ size 725729
data/clustering/clean_marketing.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bde1e077f04583237c0029abf25f841d9100d9050375fbec00c328f14c5c1b2
3
+ size 284225
data/clustering/results/results_2_clusters.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd983411f4d5fa3e5e82db4058183af7d22683d8ce52cc909a9a5edb03a153c
3
+ size 1155
data/clustering/results/results_3_clusters.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ddb93b8758544c327ec2f3336db7dadb3ce013cbe54e82693c4d5cb2d0d441b
3
+ size 1279
data/clustering/results/results_4_clusters.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6b483a60584781dc8667241b041c9c1962ea0bbd601dd98c3fbc19259e1be34
3
+ size 1403
data/clustering/results/results_5_clusters.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:853b059260cbdedf4ebb2f61f45c4a44d78195609c58797e5d4a47646b357ae2
3
+ size 1527
data/clustering/results/results_6_clusters.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6c9dd3f0fcdeb05a15076c1e196d7748b79db981153489b0ac31f73a3b69518
3
+ size 1651
data/hotels/booking_df.csv ADDED
The diff for this file is too large to render. See raw diff
 
data/household/household_power_consumption_clean.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c6207c16c7301dbd331706eac29c250a04da757a73a070d156c69c9f4e04d4c
3
+ size 81958
data/movies/csr_data_tf.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1740a81957cb480a43c02c956a64d2fb3be9213888279641cdbfea8b5ea9c60a
3
+ size 1632893
data/movies/movies_dict2.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fae593200377c6dbfa1b5bc00883234b5c5986fb8f21997431ef0fdd4a814ac8
3
+ size 1722011
data/movies/vote_info.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcea5abd2b4f192b1db3c3dea541aeea36ddea662c22270fa186c3bcbf887cc
3
+ size 114227
data/pinterest/image1.jpg ADDED
data/pinterest/image2.jpg ADDED
data/pinterest/image3.jpg ADDED
data/pinterest/image4.jpg ADDED
data/sa_data/reviews_raw.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11fa1be19b16bdd6e183991367670e735caf9b974dbbbd651b877786078aa557
3
+ size 19784
data/sa_data/reviews_results.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a304e138d8444653e3288434ac2b0804079a6de1271b1de3d27755bba3b25d07
3
+ size 20455
images/AI.jpg ADDED
images/clustering.webp ADDED
images/credit_score.jpg ADDED
images/cs.webp ADDED
images/energy_consumption.jpg ADDED
images/france.jpeg ADDED
images/group.png ADDED
images/hec.png ADDED
images/hi-paris.png ADDED
images/models/credit_score/EDA_numeric_credit.png ADDED
images/object_detection.png ADDED
images/od_fashion.jpg ADDED
images/reviews.jpg ADDED
images/room.jpg ADDED
images/rs.png ADDED
images/sentiment_analysis.png ADDED
images/singapore.jpg ADDED
images/spain-banner.jpg ADDED
images/spain.WebP ADDED
images/supervised_learner.png ADDED
images/thailand.jpeg ADDED
images/ts_patterns.png ADDED
images/unsupervised_learner.webp ADDED
main_page.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import pandas as pd
4
+ import numpy as np
5
+
6
+ from st_pages import Page, show_pages
7
+ from PIL import Image
8
+ #from utils import authenticate_drive
9
+
10
+
11
+
12
+ ##################################################################################
13
+ # PAGE CONFIGURATION #
14
+ ##################################################################################
15
+
16
+ st.set_page_config(layout="wide")
17
+
18
+
19
+
20
+
21
+ ##################################################################################
22
+ # GOOGLE DRIVE CONNEXION #
23
+ ##################################################################################
24
+
25
+ # if ["drive_oauth"] not in st.session_state:
26
+ # st.session_state["drive_oauth"] = authenticate_drive()
27
+
28
+ # drive_oauth = st.session_state["drive_oauth"]
29
+
30
+
31
+
32
+
33
+ ##################################################################################
34
+ # TITLE #
35
+ ##################################################################################
36
+
37
+ st.image("images/AI.jpg")
38
+ st.title("AI and Data Science Examples")
39
+ st.subheader("HEC Paris, 2023-2024")
40
+ st.markdown("Course provided by **Shirish C. SRIVASTAVA**")
41
+
42
+ st.markdown(" ")
43
+ st.info("""**About the app**: The AI and Data Science Examples app was created to introduce students to the field of Data Science by showcasing real-life applications of AI.
44
+ It includes use cases using traditional Machine Learning algorithms on structured data, as well as Deep Learning models run on unstructured data (text, images,...).""")
45
+
46
+ st.divider()
47
+
48
+
49
+ #Hi! PARIS collaboration mention
50
+ st.markdown(" ")
51
+ image_hiparis = Image.open('images/hi-paris.png')
52
+ st.image(image_hiparis, width=150)
53
+ url = "https://www.hi-paris.fr/"
54
+ st.markdown("**The app was made in collaboration with: [Hi! PARIS Engineering Team](%s)**" % url)
55
+
56
+
57
+
58
+
59
+ ##################################################################################
60
+ # DASHBOARD/SIDEBAR #
61
+ ##################################################################################
62
+
63
+
64
+ # AI use case pages
65
+ show_pages(
66
+ [
67
+ Page("main_page.py", "Home Page", "🏠"),
68
+ Page("pages/supervised_unsupervised_page.py", "Supervised vs Unsupervised", "🔍"),
69
+ Page("pages/timeseries_analysis.py", "Time Series Forecasting", "📈"),
70
+ Page("pages/sentiment_analysis.py", "Sentiment Analysis", "👍"),
71
+ #Page("pages/object_detection.py", "Object Detection", "📹"), #need to reduce RAM costs
72
+ Page("pages/recommendation_system.py", "Recommendation system", "🛒")
73
+ ]
74
+ )
75
+
76
+
77
+
78
+ ##################################################################################
79
+ # PAGE CONTENT #
80
+ ##################################################################################
81
+
82
+
83
+
84
+
notebooks/Supervised-Unsupervised/credit_score.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/Supervised-Unsupervised/customer_churn.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
notebooks/Supervised-Unsupervised/customer_segmentation.ipynb ADDED
@@ -0,0 +1,632 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 5,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import os\n",
10
+ "import pandas as pd\n",
11
+ "import numpy as np\n",
12
+ "import matplotlib.pyplot as plt \n",
13
+ "import seaborn as sns"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "markdown",
18
+ "metadata": {},
19
+ "source": [
20
+ "## Customer segmentation for targeted marketing campaign\n",
21
+ "\n",
22
+ "https://www.kaggle.com/datasets/imakash3011/customer-personality-analysis\n",
23
+ "\n",
24
+ "**People**\n",
25
+ "- ID: Customer's unique identifier\n",
26
+ "- Year_Birth: Customer's birth year\n",
27
+ "- Education: Customer's education level\n",
28
+ "- Marital_Status: Customer's marital status\n",
29
+ "- Income: Customer's yearly household income\n",
30
+ "- Kidhome: Number of children in customer's household\n",
31
+ "- Teenhome: Number of teenagers in customer's household\n",
32
+ "- Dt_Customer: Date of customer's enrollment with the company\n",
33
+ "- Recency: Number of days since customer's last purchase\n",
34
+ "- Complain: 1 if the customer complained in the last 2 years, 0 otherwise\n",
35
+ "\n",
36
+ "**Products**\n",
37
+ "- MntWines: Amount spent on wine in last 2 years\n",
38
+ "- MntFruits: Amount spent on fruits in last 2 years\n",
39
+ "- MntMeatProducts: Amount spent on meat in last 2 years\n",
40
+ "- MntFishProducts: Amount spent on fish in last 2 years\n",
41
+ "- MntSweetProducts: Amount spent on sweets in last 2 years\n",
42
+ "- MntGoldProds: Amount spent on gold in last 2 years\n",
43
+ "\n",
44
+ "**Promotion**\n",
45
+ "- NumDealsPurchases: Number of purchases made with a discount\n",
46
+ "- AcceptedCmp1: 1 if customer accepted the offer in the 1st campaign, 0 otherwise\n",
47
+ "- AcceptedCmp2: 1 if customer accepted the offer in the 2nd campaign, 0 otherwise\n",
48
+ "- AcceptedCmp3: 1 if customer accepted the offer in the 3rd campaign, 0 otherwise\n",
49
+ "- AcceptedCmp4: 1 if customer accepted the offer in the 4th campaign, 0 otherwise\n",
50
+ "- AcceptedCmp5: 1 if customer accepted the offer in the 5th campaign, 0 otherwise\n",
51
+ "- Response: 1 if customer accepted the offer in the last campaign, 0 otherwise\n",
52
+ "\n",
53
+ "**Place**\n",
54
+ "- NumWebPurchases: Number of purchases made through the company’s website\n",
55
+ "- NumCatalogPurchases: Number of purchases made using a catalogue\n",
56
+ "- NumStorePurchases: Number of purchases made directly in stores\n",
57
+ "- NumWebVisitsMonth: Number of visits to company’s website in the last month"
58
+ ]
59
+ },
60
+ {
61
+ "cell_type": "markdown",
62
+ "metadata": {},
63
+ "source": [
64
+ "### Data Cleaning"
65
+ ]
66
+ },
67
+ {
68
+ "cell_type": "code",
69
+ "execution_count": 1363,
70
+ "metadata": {},
71
+ "outputs": [],
72
+ "source": [
73
+ "# Load dataset\n",
74
+ "path_data_marketing = r\"C:\\Users\\LaurèneDAVID\\Documents\\Teaching\\Educational_apps\\app-hec-AI-DS\\data\\clustering\\marketing_campaign.csv\"\n",
75
+ "marketing_data = pd.read_csv(path_data_marketing, sep=\";\")"
76
+ ]
77
+ },
78
+ {
79
+ "cell_type": "code",
80
+ "execution_count": 1364,
81
+ "metadata": {},
82
+ "outputs": [],
83
+ "source": [
84
+ "# Delete columns\n",
85
+ "marketing_data.drop(columns=['ID','MntGoldProds','Response','Complain','AcceptedCmp3', 'AcceptedCmp4', 'AcceptedCmp5', 'AcceptedCmp1','AcceptedCmp2',\n",
86
+ " 'Z_CostContact', 'Z_Revenue'], inplace=True)\n",
87
+ "\n",
88
+ "#marketing_data = marketing_data.loc[marketing_data[\"Marital_Status\"].isin([\"Single\",\"Married\",\"Divorced\"])]\n",
89
+ "marketing_data.drop(columns=[\"Marital_Status\"], inplace=True)\n",
90
+ "\n",
91
+ "# marketing_data = marketing_data.loc[marketing_data[\"Education\"].isin([\"2n Cycle\",\"Graduation\",\"Master\",\"PhD\"])]\n",
92
+ "marketing_data.drop(columns=[\"Education\"],inplace=True)\n",
93
+ "\n",
94
+ "marketing_data = marketing_data[marketing_data[\"Income\"]>5000]"
95
+ ]
96
+ },
97
+ {
98
+ "cell_type": "code",
99
+ "execution_count": 1365,
100
+ "metadata": {},
101
+ "outputs": [],
102
+ "source": [
103
+ "# Change column names\n",
104
+ "new_columns = [col.replace(\"Mnt\",\"\").replace(\"Num\",\"\") for col in list(marketing_data.columns)]\n",
105
+ "new_columns = [col + \"Products\" if col in [\"Wines\",\"Fruits\"] else col for col in new_columns]\n",
106
+ "marketing_data.columns = new_columns"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "markdown",
111
+ "metadata": {},
112
+ "source": [
113
+ "### Data Preprocessing"
114
+ ]
115
+ },
116
+ {
117
+ "cell_type": "code",
118
+ "execution_count": 1366,
119
+ "metadata": {},
120
+ "outputs": [],
121
+ "source": [
122
+ "# Proportion of a customer's income spent on wines, fruits, ...\n",
123
+ "products_col = [\"WinesProducts\",\"FruitsProducts\", \"MeatProducts\",\"FishProducts\",\"SweetProducts\"]\n",
124
+ "total_amount_spent = marketing_data[products_col].sum(axis=1)\n",
125
+ "\n",
126
+ "for col in products_col:\n",
127
+ " marketing_data[col] = (100*marketing_data[col] / total_amount_spent).round(1)"
128
+ ]
129
+ },
130
+ {
131
+ "cell_type": "code",
132
+ "execution_count": 1367,
133
+ "metadata": {},
134
+ "outputs": [],
135
+ "source": [
136
+ "# Proportion of web, catalog and store purchases (based on total number of purchases)\n",
137
+ "purchases_col = [\"WebPurchases\", \"CatalogPurchases\", \"StorePurchases\"]\n",
138
+ "total_purchases = marketing_data[purchases_col].sum(axis=1)\n",
139
+ "\n",
140
+ "for col in purchases_col:\n",
141
+ " marketing_data[col] = (100*marketing_data[col] / total_purchases).round(1)"
142
+ ]
143
+ },
144
+ {
145
+ "cell_type": "code",
146
+ "execution_count": 1368,
147
+ "metadata": {},
148
+ "outputs": [],
149
+ "source": [
150
+ "from datetime import datetime, date\n",
151
+ "\n",
152
+ "def get_number_days(input_date):\n",
153
+ " date1 = datetime.strptime(input_date, '%d/%m/%Y').date()\n",
154
+ " date2 = date(2022, 2, 13)\n",
155
+ " return (date2 - date1).days"
156
+ ]
157
+ },
158
+ {
159
+ "cell_type": "code",
160
+ "execution_count": 1369,
161
+ "metadata": {},
162
+ "outputs": [],
163
+ "source": [
164
+ "# Compute a customer's age, based on year of birth\n",
165
+ "marketing_data.insert(0, \"Age\", marketing_data[\"Year_Birth\"].apply(lambda x: 2023-x))\n",
166
+ "\n",
167
+ "# Compute the number of days a customer has been subscribed \n",
168
+ "marketing_data.insert(1, \"Days_subscription\", marketing_data[\"Dt_Customer\"].apply(get_number_days))\n",
169
+ "\n",
170
+ "# Compute total number of kids (kids + teens)\n",
171
+ "marketing_data[\"Kids\"] = marketing_data[\"Kidhome\"] + marketing_data[\"Teenhome\"]\n",
172
+ "marketing_data.drop(columns=[\"Kidhome\",\"Teenhome\"], inplace=True)\n",
173
+ "\n",
174
+ "marketing_data.drop(columns=[\"Year_Birth\", \"Dt_Customer\"], inplace=True)\n",
175
+ "marketing_data.dropna(inplace=True)"
176
+ ]
177
+ },
178
+ {
179
+ "cell_type": "code",
180
+ "execution_count": 1370,
181
+ "metadata": {},
182
+ "outputs": [],
183
+ "source": [
184
+ "path_cleandata = r\"C:\\Users\\LaurèneDAVID\\Documents\\Teaching\\Educational_apps\\app-hec-AI-DS\\data\\clustering\"\n",
185
+ "marketing_data.to_pickle(os.path.join(path_cleandata,\"clean_marketing.pkl\"))"
186
+ ]
187
+ },
188
+ {
189
+ "cell_type": "code",
190
+ "execution_count": 1371,
191
+ "metadata": {},
192
+ "outputs": [
193
+ {
194
+ "data": {
195
+ "text/html": [
196
+ "<div>\n",
197
+ "<style scoped>\n",
198
+ " .dataframe tbody tr th:only-of-type {\n",
199
+ " vertical-align: middle;\n",
200
+ " }\n",
201
+ "\n",
202
+ " .dataframe tbody tr th {\n",
203
+ " vertical-align: top;\n",
204
+ " }\n",
205
+ "\n",
206
+ " .dataframe thead th {\n",
207
+ " text-align: right;\n",
208
+ " }\n",
209
+ "</style>\n",
210
+ "<table border=\"1\" class=\"dataframe\">\n",
211
+ " <thead>\n",
212
+ " <tr style=\"text-align: right;\">\n",
213
+ " <th></th>\n",
214
+ " <th>Age</th>\n",
215
+ " <th>Days_subscription</th>\n",
216
+ " <th>Income</th>\n",
217
+ " <th>Recency</th>\n",
218
+ " <th>WinesProducts</th>\n",
219
+ " <th>FruitsProducts</th>\n",
220
+ " <th>MeatProducts</th>\n",
221
+ " <th>FishProducts</th>\n",
222
+ " <th>SweetProducts</th>\n",
223
+ " <th>DealsPurchases</th>\n",
224
+ " <th>WebPurchases</th>\n",
225
+ " <th>CatalogPurchases</th>\n",
226
+ " <th>StorePurchases</th>\n",
227
+ " <th>WebVisitsMonth</th>\n",
228
+ " <th>Kids</th>\n",
229
+ " </tr>\n",
230
+ " </thead>\n",
231
+ " <tbody>\n",
232
+ " <tr>\n",
233
+ " <th>0</th>\n",
234
+ " <td>66</td>\n",
235
+ " <td>3449</td>\n",
236
+ " <td>58138.0</td>\n",
237
+ " <td>58</td>\n",
238
+ " <td>41.5</td>\n",
239
+ " <td>5.8</td>\n",
240
+ " <td>35.7</td>\n",
241
+ " <td>11.2</td>\n",
242
+ " <td>5.8</td>\n",
243
+ " <td>3</td>\n",
244
+ " <td>36.4</td>\n",
245
+ " <td>45.5</td>\n",
246
+ " <td>18.2</td>\n",
247
+ " <td>7</td>\n",
248
+ " <td>0</td>\n",
249
+ " </tr>\n",
250
+ " <tr>\n",
251
+ " <th>1</th>\n",
252
+ " <td>69</td>\n",
253
+ " <td>2899</td>\n",
254
+ " <td>46344.0</td>\n",
255
+ " <td>38</td>\n",
256
+ " <td>52.4</td>\n",
257
+ " <td>4.8</td>\n",
258
+ " <td>28.6</td>\n",
259
+ " <td>9.5</td>\n",
260
+ " <td>4.8</td>\n",
261
+ " <td>2</td>\n",
262
+ " <td>25.0</td>\n",
263
+ " <td>25.0</td>\n",
264
+ " <td>50.0</td>\n",
265
+ " <td>5</td>\n",
266
+ " <td>2</td>\n",
267
+ " </tr>\n",
268
+ " <tr>\n",
269
+ " <th>2</th>\n",
270
+ " <td>58</td>\n",
271
+ " <td>3098</td>\n",
272
+ " <td>71613.0</td>\n",
273
+ " <td>26</td>\n",
274
+ " <td>58.0</td>\n",
275
+ " <td>6.7</td>\n",
276
+ " <td>17.3</td>\n",
277
+ " <td>15.1</td>\n",
278
+ " <td>2.9</td>\n",
279
+ " <td>1</td>\n",
280
+ " <td>40.0</td>\n",
281
+ " <td>10.0</td>\n",
282
+ " <td>50.0</td>\n",
283
+ " <td>4</td>\n",
284
+ " <td>0</td>\n",
285
+ " </tr>\n",
286
+ " <tr>\n",
287
+ " <th>3</th>\n",
288
+ " <td>39</td>\n",
289
+ " <td>2925</td>\n",
290
+ " <td>26646.0</td>\n",
291
+ " <td>26</td>\n",
292
+ " <td>22.9</td>\n",
293
+ " <td>8.3</td>\n",
294
+ " <td>41.7</td>\n",
295
+ " <td>20.8</td>\n",
296
+ " <td>6.2</td>\n",
297
+ " <td>2</td>\n",
298
+ " <td>33.3</td>\n",
299
+ " <td>0.0</td>\n",
300
+ " <td>66.7</td>\n",
301
+ " <td>6</td>\n",
302
+ " <td>1</td>\n",
303
+ " </tr>\n",
304
+ " <tr>\n",
305
+ " <th>4</th>\n",
306
+ " <td>42</td>\n",
307
+ " <td>2947</td>\n",
308
+ " <td>58293.0</td>\n",
309
+ " <td>94</td>\n",
310
+ " <td>42.5</td>\n",
311
+ " <td>10.6</td>\n",
312
+ " <td>29.0</td>\n",
313
+ " <td>11.3</td>\n",
314
+ " <td>6.6</td>\n",
315
+ " <td>5</td>\n",
316
+ " <td>35.7</td>\n",
317
+ " <td>21.4</td>\n",
318
+ " <td>42.9</td>\n",
319
+ " <td>5</td>\n",
320
+ " <td>1</td>\n",
321
+ " </tr>\n",
322
+ " <tr>\n",
323
+ " <th>...</th>\n",
324
+ " <td>...</td>\n",
325
+ " <td>...</td>\n",
326
+ " <td>...</td>\n",
327
+ " <td>...</td>\n",
328
+ " <td>...</td>\n",
329
+ " <td>...</td>\n",
330
+ " <td>...</td>\n",
331
+ " <td>...</td>\n",
332
+ " <td>...</td>\n",
333
+ " <td>...</td>\n",
334
+ " <td>...</td>\n",
335
+ " <td>...</td>\n",
336
+ " <td>...</td>\n",
337
+ " <td>...</td>\n",
338
+ " <td>...</td>\n",
339
+ " </tr>\n",
340
+ " <tr>\n",
341
+ " <th>2235</th>\n",
342
+ " <td>56</td>\n",
343
+ " <td>3167</td>\n",
344
+ " <td>61223.0</td>\n",
345
+ " <td>46</td>\n",
346
+ " <td>64.8</td>\n",
347
+ " <td>3.9</td>\n",
348
+ " <td>16.6</td>\n",
349
+ " <td>3.8</td>\n",
350
+ " <td>10.8</td>\n",
351
+ " <td>2</td>\n",
352
+ " <td>56.2</td>\n",
353
+ " <td>18.8</td>\n",
354
+ " <td>25.0</td>\n",
355
+ " <td>5</td>\n",
356
+ " <td>1</td>\n",
357
+ " </tr>\n",
358
+ " <tr>\n",
359
+ " <th>2236</th>\n",
360
+ " <td>77</td>\n",
361
+ " <td>2805</td>\n",
362
+ " <td>64014.0</td>\n",
363
+ " <td>56</td>\n",
364
+ " <td>93.1</td>\n",
365
+ " <td>0.0</td>\n",
366
+ " <td>6.9</td>\n",
367
+ " <td>0.0</td>\n",
368
+ " <td>0.0</td>\n",
369
+ " <td>7</td>\n",
370
+ " <td>53.3</td>\n",
371
+ " <td>13.3</td>\n",
372
+ " <td>33.3</td>\n",
373
+ " <td>7</td>\n",
374
+ " <td>3</td>\n",
375
+ " </tr>\n",
376
+ " <tr>\n",
377
+ " <th>2237</th>\n",
378
+ " <td>42</td>\n",
379
+ " <td>2941</td>\n",
380
+ " <td>56981.0</td>\n",
381
+ " <td>91</td>\n",
382
+ " <td>74.6</td>\n",
383
+ " <td>3.9</td>\n",
384
+ " <td>17.8</td>\n",
385
+ " <td>2.6</td>\n",
386
+ " <td>1.0</td>\n",
387
+ " <td>1</td>\n",
388
+ " <td>11.1</td>\n",
389
+ " <td>16.7</td>\n",
390
+ " <td>72.2</td>\n",
391
+ " <td>6</td>\n",
392
+ " <td>0</td>\n",
393
+ " </tr>\n",
394
+ " <tr>\n",
395
+ " <th>2238</th>\n",
396
+ " <td>67</td>\n",
397
+ " <td>2942</td>\n",
398
+ " <td>69245.0</td>\n",
399
+ " <td>8</td>\n",
400
+ " <td>54.7</td>\n",
401
+ " <td>3.8</td>\n",
402
+ " <td>27.4</td>\n",
403
+ " <td>10.2</td>\n",
404
+ " <td>3.8</td>\n",
405
+ " <td>2</td>\n",
406
+ " <td>28.6</td>\n",
407
+ " <td>23.8</td>\n",
408
+ " <td>47.6</td>\n",
409
+ " <td>3</td>\n",
410
+ " <td>1</td>\n",
411
+ " </tr>\n",
412
+ " <tr>\n",
413
+ " <th>2239</th>\n",
414
+ " <td>69</td>\n",
415
+ " <td>3408</td>\n",
416
+ " <td>52869.0</td>\n",
417
+ " <td>40</td>\n",
418
+ " <td>55.6</td>\n",
419
+ " <td>2.0</td>\n",
420
+ " <td>40.4</td>\n",
421
+ " <td>1.3</td>\n",
422
+ " <td>0.7</td>\n",
423
+ " <td>3</td>\n",
424
+ " <td>37.5</td>\n",
425
+ " <td>12.5</td>\n",
426
+ " <td>50.0</td>\n",
427
+ " <td>7</td>\n",
428
+ " <td>2</td>\n",
429
+ " </tr>\n",
430
+ " </tbody>\n",
431
+ "</table>\n",
432
+ "<p>2208 rows × 15 columns</p>\n",
433
+ "</div>"
434
+ ],
435
+ "text/plain": [
436
+ " Age Days_subscription Income Recency WinesProducts FruitsProducts \\\n",
437
+ "0 66 3449 58138.0 58 41.5 5.8 \n",
438
+ "1 69 2899 46344.0 38 52.4 4.8 \n",
439
+ "2 58 3098 71613.0 26 58.0 6.7 \n",
440
+ "3 39 2925 26646.0 26 22.9 8.3 \n",
441
+ "4 42 2947 58293.0 94 42.5 10.6 \n",
442
+ "... ... ... ... ... ... ... \n",
443
+ "2235 56 3167 61223.0 46 64.8 3.9 \n",
444
+ "2236 77 2805 64014.0 56 93.1 0.0 \n",
445
+ "2237 42 2941 56981.0 91 74.6 3.9 \n",
446
+ "2238 67 2942 69245.0 8 54.7 3.8 \n",
447
+ "2239 69 3408 52869.0 40 55.6 2.0 \n",
448
+ "\n",
449
+ " MeatProducts FishProducts SweetProducts DealsPurchases WebPurchases \\\n",
450
+ "0 35.7 11.2 5.8 3 36.4 \n",
451
+ "1 28.6 9.5 4.8 2 25.0 \n",
452
+ "2 17.3 15.1 2.9 1 40.0 \n",
453
+ "3 41.7 20.8 6.2 2 33.3 \n",
454
+ "4 29.0 11.3 6.6 5 35.7 \n",
455
+ "... ... ... ... ... ... \n",
456
+ "2235 16.6 3.8 10.8 2 56.2 \n",
457
+ "2236 6.9 0.0 0.0 7 53.3 \n",
458
+ "2237 17.8 2.6 1.0 1 11.1 \n",
459
+ "2238 27.4 10.2 3.8 2 28.6 \n",
460
+ "2239 40.4 1.3 0.7 3 37.5 \n",
461
+ "\n",
462
+ " CatalogPurchases StorePurchases WebVisitsMonth Kids \n",
463
+ "0 45.5 18.2 7 0 \n",
464
+ "1 25.0 50.0 5 2 \n",
465
+ "2 10.0 50.0 4 0 \n",
466
+ "3 0.0 66.7 6 1 \n",
467
+ "4 21.4 42.9 5 1 \n",
468
+ "... ... ... ... ... \n",
469
+ "2235 18.8 25.0 5 1 \n",
470
+ "2236 13.3 33.3 7 3 \n",
471
+ "2237 16.7 72.2 6 0 \n",
472
+ "2238 23.8 47.6 3 1 \n",
473
+ "2239 12.5 50.0 7 2 \n",
474
+ "\n",
475
+ "[2208 rows x 15 columns]"
476
+ ]
477
+ },
478
+ "execution_count": 1371,
479
+ "metadata": {},
480
+ "output_type": "execute_result"
481
+ }
482
+ ],
483
+ "source": [
484
+ "pd.read_pickle(os.path.join(path_cleandata,\"clean_marketing.pkl\"))"
485
+ ]
486
+ },
487
+ {
488
+ "cell_type": "code",
489
+ "execution_count": 1372,
490
+ "metadata": {},
491
+ "outputs": [],
492
+ "source": [
493
+ "from sklearn.compose import ColumnTransformer\n",
494
+ "from sklearn.preprocessing import MinMaxScaler, StandardScaler, RobustScaler\n",
495
+ "\n",
496
+ "num_columns = marketing_data.select_dtypes(include=[\"int64\", \"float64\"]).columns\n",
497
+ "\n",
498
+ "# Build data processing pipeline\n",
499
+ "ct = ColumnTransformer(\n",
500
+ " [(\"numerical\", RobustScaler(), num_columns)])\n",
501
+ "\n",
502
+ "X = ct.fit_transform(marketing_data)"
503
+ ]
504
+ },
505
+ {
506
+ "cell_type": "code",
507
+ "execution_count": 1373,
508
+ "metadata": {},
509
+ "outputs": [],
510
+ "source": [
511
+ "columns_transform = [col.split(\"__\")[1] for col in ct.get_feature_names_out()]\n",
512
+ "df_clean = pd.DataFrame(X, columns=columns_transform)"
513
+ ]
514
+ },
515
+ {
516
+ "cell_type": "markdown",
517
+ "metadata": {},
518
+ "source": [
519
+ "### Clustering"
520
+ ]
521
+ },
522
+ {
523
+ "cell_type": "code",
524
+ "execution_count": 1374,
525
+ "metadata": {},
526
+ "outputs": [],
527
+ "source": [
528
+ "from sklearn.cluster import KMeans\n",
529
+ "from sklearn.metrics import silhouette_score\n",
530
+ "\n",
531
+ "def clustering_model(X, list_nb_clusters):\n",
532
+ " dict_labels = dict()\n",
533
+ " list_scores = []\n",
534
+ "\n",
535
+ " for n in list_nb_clusters:\n",
536
+ " kmeans = KMeans(n_clusters=n, n_init=10)\n",
537
+ " labels = kmeans.fit_predict(X)\n",
538
+ " score = silhouette_score(X, labels)\n",
539
+ " dict_labels[f\"{n} clusters\"] = labels\n",
540
+ " list_scores.append(score)\n",
541
+ "\n",
542
+ " return list_scores, dict_labels"
543
+ ]
544
+ },
545
+ {
546
+ "cell_type": "code",
547
+ "execution_count": 1375,
548
+ "metadata": {},
549
+ "outputs": [],
550
+ "source": [
551
+ "list_nb_clusters = np.arange(2,7)\n",
552
+ "scores_kmeans, labels_kmeans = clustering_model(X, list_nb_clusters)"
553
+ ]
554
+ },
555
+ {
556
+ "cell_type": "code",
557
+ "execution_count": 1376,
558
+ "metadata": {},
559
+ "outputs": [
560
+ {
561
+ "data": {
562
+ "image/png": "",
563
+ "text/plain": [
564
+ "<Figure size 640x480 with 1 Axes>"
565
+ ]
566
+ },
567
+ "metadata": {},
568
+ "output_type": "display_data"
569
+ }
570
+ ],
571
+ "source": [
572
+ "marketing_data_results = pd.DataFrame({\"nb_clusters\":[str(i) for i in np.arange(2,7)], \"scores\":scores_kmeans})\n",
573
+ "\n",
574
+ "sns.lineplot(data=marketing_data_results, x=\"nb_clusters\", y=\"scores\", marker=\"o\")\n",
575
+ "plt.xlabel(\"number of clusters\")\n",
576
+ "plt.ylabel(\"silhouette score\")\n",
577
+ "plt.title(\"Silhouette score of Kmeans\")\n",
578
+ "plt.show()"
579
+ ]
580
+ },
581
+ {
582
+ "cell_type": "markdown",
583
+ "metadata": {},
584
+ "source": [
585
+ "### Save results"
586
+ ]
587
+ },
588
+ {
589
+ "cell_type": "code",
590
+ "execution_count": 1377,
591
+ "metadata": {},
592
+ "outputs": [],
593
+ "source": [
594
+ "import os\n",
595
+ "path_results = r\"C:\\Users\\LaurèneDAVID\\Documents\\Teaching\\Educational_apps\\app-hec-AI-DS\\data\\clustering\\results\"\n",
596
+ "\n",
597
+ "for nb_clusters in list_nb_clusters:\n",
598
+ " labels_ = labels_kmeans[f\"{nb_clusters} clusters\"] # chosen labels\n",
599
+ " marketing_data_labels = marketing_data.copy()\n",
600
+ " marketing_data_labels[\"Group\"] = labels_\n",
601
+ " marketing_data_labels[\"Group\"] = marketing_data_labels[\"Group\"].astype(int)\n",
602
+ "\n",
603
+ " df_mean_results = marketing_data_labels.groupby(\"Group\")[num_columns].mean().reset_index()\n",
604
+ " df_mean_results = df_mean_results.round(1).melt(id_vars=[\"Group\"])\n",
605
+ " df_mean_results = pd.pivot_table(df_mean_results, values='value', index=['variable'], columns=[\"Group\"])\n",
606
+ "\n",
607
+ " df_mean_results.to_pickle(os.path.join(path_results,f\"results_{nb_clusters}_clusters.pkl\"))"
608
+ ]
609
+ }
610
+ ],
611
+ "metadata": {
612
+ "kernelspec": {
613
+ "display_name": "venv",
614
+ "language": "python",
615
+ "name": "python3"
616
+ },
617
+ "language_info": {
618
+ "codemirror_mode": {
619
+ "name": "ipython",
620
+ "version": 3
621
+ },
622
+ "file_extension": ".py",
623
+ "mimetype": "text/x-python",
624
+ "name": "python",
625
+ "nbconvert_exporter": "python",
626
+ "pygments_lexer": "ipython3",
627
+ "version": "3.9.0"
628
+ }
629
+ },
630
+ "nbformat": 4,
631
+ "nbformat_minor": 2
632
+ }