TakeWhatsYours
commited on
Upload First_Version.py
Browse files- First_Version.py +65 -0
First_Version.py
ADDED
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pandas as pd
|
2 |
+
|
3 |
+
df = pd.read_csv("Data_With_Phonks_and_Not_Phonks.csv")
|
4 |
+
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
|
7 |
+
train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)
|
8 |
+
|
9 |
+
from sklearn.experimental import enable_iterative_imputer
|
10 |
+
from sklearn.impute import IterativeImputer
|
11 |
+
|
12 |
+
imputer = IterativeImputer(initial_strategy="median", random_state=42)
|
13 |
+
|
14 |
+
import numpy as np
|
15 |
+
|
16 |
+
training_data_num = train_data.select_dtypes(include=[np.number])
|
17 |
+
|
18 |
+
imputer.fit(training_data_num)
|
19 |
+
|
20 |
+
X = imputer.transform(training_data_num)
|
21 |
+
|
22 |
+
imputer.feature_names_in_
|
23 |
+
|
24 |
+
train_data_tr = pd.DataFrame(X, columns=training_data_num.columns,
|
25 |
+
index=training_data_num.index)
|
26 |
+
from sklearn.pipeline import Pipeline
|
27 |
+
from sklearn.experimental import enable_iterative_imputer
|
28 |
+
from sklearn.impute import IterativeImputer
|
29 |
+
from sklearn.preprocessing import StandardScaler
|
30 |
+
|
31 |
+
num_pipeline = Pipeline([
|
32 |
+
("imputer", IterativeImputer(initial_strategy="median")),
|
33 |
+
("scaler", StandardScaler())
|
34 |
+
])
|
35 |
+
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder
|
36 |
+
|
37 |
+
cat_pipeline = Pipeline([
|
38 |
+
("ordinal_encoder", OrdinalEncoder()),
|
39 |
+
("imputer", IterativeImputer(initial_strategy="most_frequent")),
|
40 |
+
("cat_encoder", OneHotEncoder(sparse_output=False)),
|
41 |
+
])
|
42 |
+
from sklearn.compose import ColumnTransformer
|
43 |
+
|
44 |
+
num_attribs = ["danceability_%", "energy_%", "bpm", "speechiness_%", "acousticness_%",
|
45 |
+
"instrumentalness_%", "liveness_%", "valence_%"]
|
46 |
+
cat_attribs = ["key", "mode"]
|
47 |
+
|
48 |
+
preprocess_pipeline = ColumnTransformer([
|
49 |
+
("num", num_pipeline, num_attribs),
|
50 |
+
("cat", cat_pipeline, cat_attribs),
|
51 |
+
])
|
52 |
+
X_train = preprocess_pipeline.fit_transform(train_data)
|
53 |
+
X_train
|
54 |
+
y_train = train_data["genre"]
|
55 |
+
from sklearn.svm import SVC
|
56 |
+
|
57 |
+
svm_clf = SVC(random_state=42)
|
58 |
+
svm_clf.fit(X_train, y_train)
|
59 |
+
|
60 |
+
X_test = preprocess_pipeline.transform(test_data)
|
61 |
+
y_pred = svm_clf.predict(X_test)
|
62 |
+
from sklearn.model_selection import cross_val_score
|
63 |
+
|
64 |
+
svm_scores = cross_val_score(svm_clf, X_train, y_train, cv=10)
|
65 |
+
svm_scores.mean()
|