-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpreprocessing.py
28 lines (24 loc) · 1.1 KB
/
preprocessing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from sklearn.decomposition import PCA
from sklearn.preprocessing import Binarizer, PowerTransformer, StandardScaler
##------------------------------------------------------------------##
## Preprocessing Utils
##------------------------------------------------------------------##
def preprocess(X_train, X_test, standardize=True, yeo_johnson=False, pca=False, binarize=False):
if (yeo_johnson):
transformer = PowerTransformer(
method='yeo-johnson', standardize=False).fit(X_train)
X_train = transformer.transform(X_train)
X_test = transformer.transform(X_test)
if (pca):
principal = PCA(n_components=25).fit(X_train)
X_train = principal.transform(X_train)
X_test = principal.transform(X_test)
if (standardize):
scaler = StandardScaler(copy=False).fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)
if (binarize):
binarizer = Binarizer(threshold=0).fit(X_train)
X_train = binarizer.transform(X_train)
X_test = binarizer.transform(X_test)
return X_train, X_test