-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdataset.py
47 lines (35 loc) · 1.51 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import constants as C
class Dataset():
def __init__(self, path='churn_modelling.csv'):
self.path = path
self.dataset = pd.read_csv(C.DIR_PATH + self.path)
def preprocess(self):
# this is a preprocessing module for "Churn modelling" dataset
X = self.dataset.iloc[:, 3:-1].values
y = self.dataset.iloc[:, -1].values
# Encoding categorical data
# Label Encoding the "Gender" column
le = LabelEncoder()
X[:, 2] = le.fit_transform(X[:, 2])
# One Hot Encoding the "Geography" column
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [1])], remainder='passthrough')
X = np.array(ct.fit_transform(X))
y = np.array(y)
return X, y
def split(self, X, y, fraction=C.TEST_SPLIT_FRACTION):
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = fraction, random_state = 0)
return X_train, X_test, y_train, y_test
def scale(self, X_train, X_test):
#Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)
return X_train, X_test