Spaces:
Runtime error
Runtime error
ziggycross
commited on
Commit
·
80e8771
1
Parent(s):
47cc2c7
Added data cleaner.
Browse files- loader-cleaner.ipynb +21 -0
loader-cleaner.ipynb
CHANGED
@@ -64,6 +64,27 @@
|
|
64 |
" except Exception as error:\n",
|
65 |
" print(f\"Error: Unable to read file '{file_candidate}' ({str(type(error))}: {error})\".ljust(120))"
|
66 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
}
|
68 |
],
|
69 |
"metadata": {
|
|
|
64 |
" except Exception as error:\n",
|
65 |
" print(f\"Error: Unable to read file '{file_candidate}' ({str(type(error))}: {error})\".ljust(120))"
|
66 |
]
|
67 |
+
},
|
68 |
+
{
|
69 |
+
"attachments": {},
|
70 |
+
"cell_type": "markdown",
|
71 |
+
"metadata": {},
|
72 |
+
"source": [
|
73 |
+
"### Clean data to remove duplicates and rows with missing values."
|
74 |
+
]
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"cell_type": "code",
|
78 |
+
"execution_count": 3,
|
79 |
+
"metadata": {},
|
80 |
+
"outputs": [],
|
81 |
+
"source": [
|
82 |
+
"DROP_MISSING = True\n",
|
83 |
+
"REMOVE_DUPLICATES = True\n",
|
84 |
+
"\n",
|
85 |
+
"df = df.dropna(how=\"any\" if DROP_MISSING else \"all\")\n",
|
86 |
+
"if REMOVE_DUPLICATES: df = df.drop_duplicates()"
|
87 |
+
]
|
88 |
}
|
89 |
],
|
90 |
"metadata": {
|