-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtabular_learner.py
58 lines (43 loc) · 1.83 KB
/
tabular_learner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from fastai.tabular.all import *
import pandas as pd
import numpy as np
import missingno
# Import data
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
## Drop ID column
train_df = train_df.drop('id', axis = 1)
test_df = test_df.drop('id', axis = 1)
## Look at the distribution of the target variable - we have a regression problem
## {min: 0, max: 10, mean: 8}
## No Missing Values, features are approximately normalised
train_df.describe()
# Pre-process Data
## Make Tabular Pandas Object
cat_names = ['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', 'cat8', 'cat9']
cont_names = ['cont0', 'cont1', 'cont2', 'cont3', 'cont4', 'cont5', 'cont6', 'cont7', 'cont8', 'cont9', 'cont10', 'cont11', 'cont12', 'cont13']
procs = [Categorify, FillMissing]
y_names = "target"
block_y = RegressionBlock()
splits = RandomSplitter()(range_of(train_df))
y_range = torch.tensor([0, 15]) # Restrict the target variable to be between 0 and 15
to = TabularPandas(train_df, procs = procs, cat_names = cat_names, cont_names = cont_names, y_names = y_names, y_block = block_y, splits = splits)
# Make DataLoaders with batch size 4096 (must be a multiple of 8)
dls = to.dataloaders(bs = 4096)
# Setup Tabular Learner
## Metrics to track: RMSE
## 3 network layers with 250, 750 and 100 neurons each
learn = tabular_learner(dls, layers = [250, 750, 100], y_range = y_range, metrics = rmse, loss_func = MSELossFlat())
# Find learning rate
learn.lr_find()
# Fit model for 10 epochs with weight decay of 0.2
learn.fit_one_cycle(10, 1e-3, wd = 0.2)
# Obtain Validation RMSE
dl = learn.dls.test_dl(train_df.iloc[:])
learn.validate(dl=dl)
## Alternate method of validation RMSE
preds = learn.get_preds(dl=dl)
np.array(rmse(preds[0], preds[1]))
# Predict on test set
test_dl = learn.dls.test_dl(test_df)
test_preds = learn.get_preds(dl = test_dl)