Prathamesh1420 commited on
Commit
e9063f9
·
verified ·
1 Parent(s): 087fbf6

Upload MLFlow Mentos Zindagi.ipynb

Browse files
Files changed (1) hide show
  1. MLFlow Mentos Zindagi.ipynb +696 -0
MLFlow Mentos Zindagi.ipynb ADDED
@@ -0,0 +1,696 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "7dd3aed1-8c77-491a-beb4-6658b3e603b6",
6
+ "metadata": {},
7
+ "source": [
8
+ "# Import Packages"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": null,
14
+ "id": "b1b9541c-7de1-4c89-9424-01058657d4b8",
15
+ "metadata": {},
16
+ "outputs": [],
17
+ "source": [
18
+ "import numpy as np\n",
19
+ "import pandas as pd\n",
20
+ "\n",
21
+ "import matplotlib.pyplot as plt\n",
22
+ "import seaborn as sns\n",
23
+ "\n",
24
+ "from sklearn.model_selection import train_test_split\n",
25
+ "from sklearn import set_config\n",
26
+ "from sklearn.ensemble import RandomForestClassifier\n",
27
+ "\n",
28
+ "from sklearn.compose import ColumnTransformer\n",
29
+ "from sklearn.pipeline import Pipeline, FeatureUnion\n",
30
+ "\n",
31
+ "from sklearn.impute import SimpleImputer\n",
32
+ "from sklearn.preprocessing import (\n",
33
+ " StandardScaler,\n",
34
+ " MinMaxScaler,\n",
35
+ " OneHotEncoder,\n",
36
+ " OrdinalEncoder\n",
37
+ ")\n",
38
+ "\n",
39
+ "from feature_engine.encoding import CountFrequencyEncoder\n",
40
+ "from feature_engine.outliers.winsorizer import Winsorizer\n",
41
+ "\n",
42
+ "import mlflow\n",
43
+ "\n",
44
+ "from sklearn.metrics import (\n",
45
+ " accuracy_score, \n",
46
+ " precision_score, \n",
47
+ " recall_score, \n",
48
+ " f1_score\n",
49
+ ")\n",
50
+ "\n",
51
+ "from sklearn.metrics import ConfusionMatrixDisplay"
52
+ ]
53
+ },
54
+ {
55
+ "cell_type": "markdown",
56
+ "id": "0f44afcc-35a3-4e78-8b0f-1bff5cac2f42",
57
+ "metadata": {},
58
+ "source": [
59
+ "# Load the Data"
60
+ ]
61
+ },
62
+ {
63
+ "cell_type": "code",
64
+ "execution_count": null,
65
+ "id": "fc883d66-7142-451c-b7a7-a88407311855",
66
+ "metadata": {},
67
+ "outputs": [],
68
+ "source": [
69
+ "# read the csv file\n",
70
+ "\n",
71
+ "df = pd.read_csv(\"data/titanic.csv\")\n",
72
+ "\n",
73
+ "df.head()"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": null,
79
+ "id": "74d95fa4-20c7-4e1a-a34a-438343bf1b89",
80
+ "metadata": {},
81
+ "outputs": [],
82
+ "source": [
83
+ "# check for missing values in data\n",
84
+ "\n",
85
+ "(\n",
86
+ " df\n",
87
+ " .isna()\n",
88
+ " .sum()\n",
89
+ ")"
90
+ ]
91
+ },
92
+ {
93
+ "cell_type": "markdown",
94
+ "id": "b4406de8-2796-471b-9b1d-37f324eb25fa",
95
+ "metadata": {},
96
+ "source": [
97
+ "**Observations**:\n",
98
+ "1. `Age`, `Emabrked` and `Cabin` columns have missing values."
99
+ ]
100
+ },
101
+ {
102
+ "cell_type": "code",
103
+ "execution_count": null,
104
+ "id": "c73034ac-df11-42dd-8238-c7ff9de91979",
105
+ "metadata": {},
106
+ "outputs": [],
107
+ "source": [
108
+ "# info about the data\n",
109
+ "\n",
110
+ "df.info()"
111
+ ]
112
+ },
113
+ {
114
+ "cell_type": "markdown",
115
+ "id": "34bdfe67-8229-491e-b08f-2388aea5aab6",
116
+ "metadata": {},
117
+ "source": [
118
+ "# Data CLeaning"
119
+ ]
120
+ },
121
+ {
122
+ "cell_type": "code",
123
+ "execution_count": null,
124
+ "id": "2f67329d-b6f3-4486-8ca0-bebfac68d258",
125
+ "metadata": {},
126
+ "outputs": [],
127
+ "source": [
128
+ "# columns to drop\n",
129
+ "\n",
130
+ "columns_to_drop = ['passengerid','name','ticket','cabin']"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "code",
135
+ "execution_count": null,
136
+ "id": "eae542f3-ee1c-4e5f-8600-85a29a7ec48a",
137
+ "metadata": {},
138
+ "outputs": [],
139
+ "source": [
140
+ "def clean_data(df):\n",
141
+ " return (\n",
142
+ " df\n",
143
+ " .rename(columns=str.lower)\n",
144
+ " .drop(columns=columns_to_drop)\n",
145
+ " .assign(\n",
146
+ " family = lambda df_ : df_['sibsp'] + df_['parch']\n",
147
+ " )\n",
148
+ " .drop(columns=['sibsp','parch'])\n",
149
+ " )"
150
+ ]
151
+ },
152
+ {
153
+ "cell_type": "code",
154
+ "execution_count": null,
155
+ "id": "4465d425-1dd4-49be-9b1b-d7876fb42277",
156
+ "metadata": {},
157
+ "outputs": [],
158
+ "source": [
159
+ "final_df = clean_data(df)\n",
160
+ "\n",
161
+ "final_df.head()"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "code",
166
+ "execution_count": null,
167
+ "id": "37cef40c-628a-42a9-934a-ae3461d46853",
168
+ "metadata": {},
169
+ "outputs": [],
170
+ "source": [
171
+ "# shape of the cleaned data \n",
172
+ "\n",
173
+ "print(f'The cleaned data has {final_df.shape[0]} rows and {final_df.shape[1]} columns')"
174
+ ]
175
+ },
176
+ {
177
+ "cell_type": "code",
178
+ "execution_count": null,
179
+ "id": "cebfd73f-5ede-4a17-be63-7355369997f7",
180
+ "metadata": {},
181
+ "outputs": [],
182
+ "source": [
183
+ "# missing values in the cleaned data\n",
184
+ "\n",
185
+ "(\n",
186
+ " final_df\n",
187
+ " .isna()\n",
188
+ " .sum()\n",
189
+ ")"
190
+ ]
191
+ },
192
+ {
193
+ "cell_type": "markdown",
194
+ "id": "087aedb7-b716-4d10-8e03-d9a9149e3c57",
195
+ "metadata": {},
196
+ "source": [
197
+ "# EDA"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": null,
203
+ "id": "075fc561-597a-48c8-9da4-718e1f0f21e0",
204
+ "metadata": {},
205
+ "outputs": [],
206
+ "source": [
207
+ "# distribution of target\n",
208
+ "\n",
209
+ "(\n",
210
+ " final_df\n",
211
+ " .loc[:,'survived']\n",
212
+ " .value_counts(normalize=True)\n",
213
+ ")"
214
+ ]
215
+ },
216
+ {
217
+ "cell_type": "code",
218
+ "execution_count": null,
219
+ "id": "c414edaf-7749-4f0d-bc77-288f1846379e",
220
+ "metadata": {},
221
+ "outputs": [],
222
+ "source": [
223
+ "# boxplots\n",
224
+ "\n",
225
+ "def create_boxplot(data,column_name,hue=None):\n",
226
+ " sns.boxplot(data=data, y=column_name, hue=hue)"
227
+ ]
228
+ },
229
+ {
230
+ "cell_type": "code",
231
+ "execution_count": null,
232
+ "id": "053c8ad1-307a-4182-b798-aecd2e56e349",
233
+ "metadata": {},
234
+ "outputs": [],
235
+ "source": [
236
+ "# boxplot for age column\n",
237
+ "create_boxplot(final_df,'age')"
238
+ ]
239
+ },
240
+ {
241
+ "cell_type": "code",
242
+ "execution_count": null,
243
+ "id": "d4e6b0c1-beb6-4eb4-a1a3-e1ed297b7ac7",
244
+ "metadata": {},
245
+ "outputs": [],
246
+ "source": [
247
+ "# boxplot for fare column\n",
248
+ "\n",
249
+ "create_boxplot(final_df,'fare')"
250
+ ]
251
+ },
252
+ {
253
+ "cell_type": "markdown",
254
+ "id": "2fc3dc52-6c52-4cef-b40d-f8b3f2553882",
255
+ "metadata": {},
256
+ "source": [
257
+ "**Overview**\n",
258
+ "- Outliers in the age and fare columns"
259
+ ]
260
+ },
261
+ {
262
+ "cell_type": "code",
263
+ "execution_count": null,
264
+ "id": "9eb075d8-c329-45ec-b311-c3ef16c55357",
265
+ "metadata": {},
266
+ "outputs": [],
267
+ "source": [
268
+ "# plot the distribution of categorical columns\n",
269
+ "\n",
270
+ "def plot_distribution(data,column_name):\n",
271
+ " sns.countplot(data=data, x=column_name)"
272
+ ]
273
+ },
274
+ {
275
+ "cell_type": "code",
276
+ "execution_count": null,
277
+ "id": "a8b1d684-37d7-445a-91cf-d017e5f1efa2",
278
+ "metadata": {},
279
+ "outputs": [],
280
+ "source": [
281
+ "# distribution for pclass\n",
282
+ "plot_distribution(final_df,'pclass')"
283
+ ]
284
+ },
285
+ {
286
+ "cell_type": "code",
287
+ "execution_count": null,
288
+ "id": "3ea410f0-8c0b-4281-acd8-9aecde4ee2d7",
289
+ "metadata": {},
290
+ "outputs": [],
291
+ "source": [
292
+ "# distribution for sex\n",
293
+ "\n",
294
+ "plot_distribution(final_df,'sex')"
295
+ ]
296
+ },
297
+ {
298
+ "cell_type": "code",
299
+ "execution_count": null,
300
+ "id": "d758c8c4-5541-4dac-9696-b0e99dab3979",
301
+ "metadata": {},
302
+ "outputs": [],
303
+ "source": [
304
+ "# distribution for embarked \n",
305
+ "\n",
306
+ "plot_distribution(final_df,'embarked')"
307
+ ]
308
+ },
309
+ {
310
+ "cell_type": "markdown",
311
+ "id": "d7fff975-6e32-43bb-8ec6-6be0a39f5c1e",
312
+ "metadata": {},
313
+ "source": [
314
+ "# Feature_Eng"
315
+ ]
316
+ },
317
+ {
318
+ "cell_type": "code",
319
+ "execution_count": null,
320
+ "id": "110ea78a-d709-46bc-b6e7-dd813557bec8",
321
+ "metadata": {},
322
+ "outputs": [],
323
+ "source": [
324
+ "final_df.head()"
325
+ ]
326
+ },
327
+ {
328
+ "cell_type": "code",
329
+ "execution_count": null,
330
+ "id": "5c374064-e47c-40f0-baf7-54e0ff842560",
331
+ "metadata": {},
332
+ "outputs": [],
333
+ "source": [
334
+ "# make X and y\n",
335
+ "\n",
336
+ "X = final_df.drop(columns=['survived'])\n",
337
+ "y = final_df['survived']"
338
+ ]
339
+ },
340
+ {
341
+ "cell_type": "code",
342
+ "execution_count": null,
343
+ "id": "51861761-7ee7-4613-9992-2ddfaef05b53",
344
+ "metadata": {},
345
+ "outputs": [],
346
+ "source": [
347
+ "X.head()"
348
+ ]
349
+ },
350
+ {
351
+ "cell_type": "code",
352
+ "execution_count": null,
353
+ "id": "503e0bb6-af40-43d8-8614-8c56b5910ae3",
354
+ "metadata": {},
355
+ "outputs": [],
356
+ "source": [
357
+ "# do train test split\n",
358
+ "\n",
359
+ "X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)\n",
360
+ "\n",
361
+ "print('The shape of training data is',X_train.shape)\n",
362
+ "print('The shape of testing data is',X_test.shape)"
363
+ ]
364
+ },
365
+ {
366
+ "cell_type": "markdown",
367
+ "id": "970b2558-9fe4-4bf7-9d36-80775f1a640d",
368
+ "metadata": {},
369
+ "source": [
370
+ "## Pipelines for Individual Columns"
371
+ ]
372
+ },
373
+ {
374
+ "cell_type": "code",
375
+ "execution_count": null,
376
+ "id": "ce21c311-c9b5-48fb-9619-1c386b95b065",
377
+ "metadata": {},
378
+ "outputs": [],
379
+ "source": [
380
+ "# age_pipeline\n",
381
+ "age_pipe = Pipeline(steps=[\n",
382
+ " ('impute',SimpleImputer(strategy='median')),\n",
383
+ " ('outliers',Winsorizer(capping_method='gaussian',fold=3)),\n",
384
+ " ('scale',StandardScaler())\n",
385
+ "])\n",
386
+ "\n",
387
+ "\n",
388
+ "age_pipe"
389
+ ]
390
+ },
391
+ {
392
+ "cell_type": "code",
393
+ "execution_count": null,
394
+ "id": "e9bc1761-c7d8-43ab-939e-ca1a84249af5",
395
+ "metadata": {},
396
+ "outputs": [],
397
+ "source": [
398
+ "# fare pipeline\n",
399
+ "\n",
400
+ "fare_pipe = Pipeline(steps=[\n",
401
+ " ('outliers',Winsorizer(capping_method='iqr',fold=1.5)),\n",
402
+ " ('scale',StandardScaler())\n",
403
+ "])\n",
404
+ "\n",
405
+ "fare_pipe"
406
+ ]
407
+ },
408
+ {
409
+ "cell_type": "code",
410
+ "execution_count": null,
411
+ "id": "d588548f-ae54-43d3-8efe-16f34dd66954",
412
+ "metadata": {},
413
+ "outputs": [],
414
+ "source": [
415
+ "# embarked_pipeline\n",
416
+ "\n",
417
+ "embarked_pipe = Pipeline(steps=[\n",
418
+ " ('impute',SimpleImputer(strategy='most_frequent')),\n",
419
+ " ('count_encode',CountFrequencyEncoder(encoding_method='count')),\n",
420
+ " ('scale',MinMaxScaler())\n",
421
+ "])\n",
422
+ "\n",
423
+ "embarked_pipe"
424
+ ]
425
+ },
426
+ {
427
+ "cell_type": "markdown",
428
+ "id": "24838a6d-af02-44dc-abfc-addd714f7533",
429
+ "metadata": {},
430
+ "source": [
431
+ "## Column Transformer"
432
+ ]
433
+ },
434
+ {
435
+ "cell_type": "code",
436
+ "execution_count": null,
437
+ "id": "1af74974-3b86-49ea-b495-663d20edd0a0",
438
+ "metadata": {},
439
+ "outputs": [],
440
+ "source": [
441
+ "set_config(transform_output='pandas')"
442
+ ]
443
+ },
444
+ {
445
+ "cell_type": "code",
446
+ "execution_count": null,
447
+ "id": "95f9b639-2194-4cdc-b565-9021eb933aaf",
448
+ "metadata": {},
449
+ "outputs": [],
450
+ "source": [
451
+ "# make column column transformer\n",
452
+ "\n",
453
+ "preprocessor = ColumnTransformer(transformers=[\n",
454
+ " ('age',age_pipe,['age']),\n",
455
+ " ('fare',fare_pipe,['fare']),\n",
456
+ " ('embarked',embarked_pipe,['embarked']),\n",
457
+ " ('sex',OneHotEncoder(sparse_output=False,handle_unknown='ignore'),['sex']),\n",
458
+ " ('family',MinMaxScaler(),['family'])\n",
459
+ "],remainder='passthrough',n_jobs=-1,force_int_remainder_cols=False)\n",
460
+ "\n",
461
+ "preprocessor"
462
+ ]
463
+ },
464
+ {
465
+ "cell_type": "code",
466
+ "execution_count": null,
467
+ "id": "aa6aa741-afc3-449c-b75d-38a1bea32de6",
468
+ "metadata": {},
469
+ "outputs": [],
470
+ "source": [
471
+ "# fit and transform the training data\n",
472
+ "\n",
473
+ "preprocessor.fit_transform(X_train)"
474
+ ]
475
+ },
476
+ {
477
+ "cell_type": "code",
478
+ "execution_count": null,
479
+ "id": "9ad34e5a-43e4-4e81-b2bb-b92e2c0b90ca",
480
+ "metadata": {},
481
+ "outputs": [],
482
+ "source": [
483
+ "preprocessor.get_params()"
484
+ ]
485
+ },
486
+ {
487
+ "cell_type": "markdown",
488
+ "id": "898afc54-e717-4b3e-9142-c6235abdfe0a",
489
+ "metadata": {},
490
+ "source": [
491
+ "# Model Pipeline"
492
+ ]
493
+ },
494
+ {
495
+ "cell_type": "code",
496
+ "execution_count": null,
497
+ "id": "a5c5d60d-3746-46c1-b15b-0bc59f62a187",
498
+ "metadata": {},
499
+ "outputs": [],
500
+ "source": [
501
+ "# build the model pipeline\n",
502
+ "\n",
503
+ "model_params = {'bootstrap': True,\n",
504
+ " 'ccp_alpha': 0.0,\n",
505
+ " 'class_weight': None,\n",
506
+ " 'criterion': 'gini',\n",
507
+ " 'max_depth': 6,\n",
508
+ " 'max_features': 'sqrt',\n",
509
+ " 'max_leaf_nodes': None,\n",
510
+ " 'max_samples': 0.8,\n",
511
+ " 'min_impurity_decrease': 0.0,\n",
512
+ " 'min_samples_leaf': 1,\n",
513
+ " 'min_samples_split': 2,\n",
514
+ " 'min_weight_fraction_leaf': 0.0,\n",
515
+ " 'monotonic_cst': None,\n",
516
+ " 'n_estimators': 300,\n",
517
+ " 'n_jobs': -1,\n",
518
+ " 'oob_score': False,\n",
519
+ " 'random_state': 30,\n",
520
+ " 'verbose': 0,\n",
521
+ " 'warm_start': False}"
522
+ ]
523
+ },
524
+ {
525
+ "cell_type": "code",
526
+ "execution_count": null,
527
+ "id": "b19559c5-53cb-4630-b64d-cbf2a1c9ca39",
528
+ "metadata": {},
529
+ "outputs": [],
530
+ "source": [
531
+ "model_pipe = Pipeline(steps=[\n",
532
+ " ('preprocessor',preprocessor),\n",
533
+ " ('clf',RandomForestClassifier(**model_params))\n",
534
+ "])\n",
535
+ "\n",
536
+ "model_pipe"
537
+ ]
538
+ },
539
+ {
540
+ "cell_type": "code",
541
+ "execution_count": null,
542
+ "id": "66876201-5959-45ca-9112-ef7d16bf66b5",
543
+ "metadata": {},
544
+ "outputs": [],
545
+ "source": [
546
+ "# fit the model on the training data\n",
547
+ "\n",
548
+ "model_pipe.fit(X_train,y_train)"
549
+ ]
550
+ },
551
+ {
552
+ "cell_type": "code",
553
+ "execution_count": null,
554
+ "id": "eaf4ffb7-1763-4000-b9bc-3d2a8b776704",
555
+ "metadata": {},
556
+ "outputs": [],
557
+ "source": [
558
+ "# evaluate the model on the test data\n",
559
+ "\n",
560
+ "y_pred = model_pipe.predict(X_test)\n",
561
+ "\n",
562
+ "accuracy = accuracy_score(y_test,y_pred)\n",
563
+ "precision = precision_score(y_test,y_pred).item()\n",
564
+ "recall = recall_score(y_test,y_pred).item()\n",
565
+ "f1 = f1_score(y_test,y_pred).item()"
566
+ ]
567
+ },
568
+ {
569
+ "cell_type": "code",
570
+ "execution_count": null,
571
+ "id": "3b4d315f-690e-442e-b2f0-f1872e6ef579",
572
+ "metadata": {},
573
+ "outputs": [],
574
+ "source": [
575
+ "# metrics dict\n",
576
+ "\n",
577
+ "metrics = {\n",
578
+ " 'accuracy': accuracy,\n",
579
+ " 'precision': precision,\n",
580
+ " 'recall': recall,\n",
581
+ " 'f1_score': f1\n",
582
+ "}\n",
583
+ "\n",
584
+ "metrics"
585
+ ]
586
+ },
587
+ {
588
+ "cell_type": "code",
589
+ "execution_count": null,
590
+ "id": "0ba611a6-9d53-4e5a-ab68-7fc8cd615779",
591
+ "metadata": {},
592
+ "outputs": [],
593
+ "source": [
594
+ "# plot confusion matrix\n",
595
+ "\n",
596
+ "cm = ConfusionMatrixDisplay.from_predictions(y_test,y_pred)"
597
+ ]
598
+ },
599
+ {
600
+ "cell_type": "markdown",
601
+ "id": "d57486a5-e1e2-43c3-8090-b880b76bad74",
602
+ "metadata": {},
603
+ "source": [
604
+ "# MLFlow Tracking code"
605
+ ]
606
+ },
607
+ {
608
+ "cell_type": "code",
609
+ "execution_count": null,
610
+ "id": "25849a92-97bd-4f7e-a40b-4b593697080f",
611
+ "metadata": {},
612
+ "outputs": [],
613
+ "source": [
614
+ "model_pipe.get_params()"
615
+ ]
616
+ },
617
+ {
618
+ "cell_type": "code",
619
+ "execution_count": null,
620
+ "id": "5cee3f45-97ee-4888-bff3-f0f59031d906",
621
+ "metadata": {},
622
+ "outputs": [],
623
+ "source": [
624
+ "X_test.join(y_test)"
625
+ ]
626
+ },
627
+ {
628
+ "cell_type": "code",
629
+ "execution_count": null,
630
+ "id": "f0e312f1-a1c8-491d-86d3-917296af16a8",
631
+ "metadata": {},
632
+ "outputs": [],
633
+ "source": [
634
+ "# set the uri for server\n",
635
+ "\n",
636
+ "mlflow.set_tracking_uri(\"http://127.0.0.1:8080\")\n",
637
+ "\n",
638
+ "mlflow.set_experiment(\"Mentos Zindagi\")\n",
639
+ "\n",
640
+ "with mlflow.start_run() as run:\n",
641
+ " # log the data signature\n",
642
+ " data_signature = mlflow.models.infer_signature(model_input=X_train,model_output=model_pipe.predict(X_train))\n",
643
+ "\n",
644
+ " # log preprocessor parameters\n",
645
+ " mlflow.log_params(model_pipe.get_params())\n",
646
+ "\n",
647
+ " # log model metrics\n",
648
+ " mlflow.log_metrics(metrics)\n",
649
+ " \n",
650
+ " # log the model\n",
651
+ " mlflow.sklearn.log_model(sk_model=model_pipe,artifact_path=\"model.pkl\",signature=data_signature)\n",
652
+ "\n",
653
+ " # Get the model uri\n",
654
+ " model_uri = mlflow.get_artifact_uri(\"model.pkl\")\n",
655
+ " \n",
656
+ " # # evaluate the model\n",
657
+ " # evaluations = mlflow.models.evaluate(model=model_uri,\n",
658
+ " # data=X_test.join(y_test),\n",
659
+ " # targets='survived',\n",
660
+ " # model_type=\"classifier\")\n",
661
+ "\n",
662
+ " # log the confusion matrix\n",
663
+ " mlflow.log_figure(cm.figure_,artifact_file='confusion_matrix.png')"
664
+ ]
665
+ },
666
+ {
667
+ "cell_type": "code",
668
+ "execution_count": null,
669
+ "id": "6db5e7a5-486f-4fb1-9070-77db2af3e98a",
670
+ "metadata": {},
671
+ "outputs": [],
672
+ "source": []
673
+ }
674
+ ],
675
+ "metadata": {
676
+ "kernelspec": {
677
+ "display_name": "Python 3 (ipykernel)",
678
+ "language": "python",
679
+ "name": "python3"
680
+ },
681
+ "language_info": {
682
+ "codemirror_mode": {
683
+ "name": "ipython",
684
+ "version": 3
685
+ },
686
+ "file_extension": ".py",
687
+ "mimetype": "text/x-python",
688
+ "name": "python",
689
+ "nbconvert_exporter": "python",
690
+ "pygments_lexer": "ipython3",
691
+ "version": "3.11.9"
692
+ }
693
+ },
694
+ "nbformat": 4,
695
+ "nbformat_minor": 5
696
+ }