-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathDataLoader.py
61 lines (49 loc) · 2.52 KB
/
DataLoader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from sklearn.preprocessing import MinMaxScaler
import os
import torch
import numpy as np
import random
import matplotlib.pyplot as plt
from joblib import dump
from icecream import ic
class SensorDataset(Dataset):
"""Face Landmarks dataset."""
def __init__(self, csv_name, root_dir, training_length, forecast_window):
"""
Args:
csv_file (string): Path to the csv file.
root_dir (string): Directory
"""
# load raw data file
csv_file = os.path.join(root_dir, csv_name)
self.df = pd.read_csv(csv_file)
self.root_dir = root_dir
self.transform = MinMaxScaler()
self.T = training_length
self.S = forecast_window
def __len__(self):
# return number of sensors
return len(self.df.groupby(by=["reindexed_id"]))
# Will pull an index between 0 and __len__.
def __getitem__(self, idx):
# Sensors are indexed from 1
idx = idx+1
# np.random.seed(0)
start = np.random.randint(0, len(self.df[self.df["reindexed_id"]==idx]) - self.T - self.S)
sensor_number = str(self.df[self.df["reindexed_id"]==idx][["sensor_id"]][start:start+1].values.item())
index_in = torch.tensor([i for i in range(start, start+self.T)])
index_tar = torch.tensor([i for i in range(start + self.T, start + self.T + self.S)])
_input = torch.tensor(self.df[self.df["reindexed_id"]==idx][["humidity", "sin_hour", "cos_hour", "sin_day", "cos_day", "sin_month", "cos_month"]][start : start + self.T].values)
target = torch.tensor(self.df[self.df["reindexed_id"]==idx][["humidity", "sin_hour", "cos_hour", "sin_day", "cos_day", "sin_month", "cos_month"]][start + self.T : start + self.T + self.S].values)
# scalar is fit only to the input, to avoid the scaled values "leaking" information about the target range.
# scalar is fit only for humidity, as the timestamps are already scaled
# scalar input/output of shape: [n_samples, n_features].
scaler = self.transform
scaler.fit(_input[:,0].unsqueeze(-1))
_input[:,0] = torch.tensor(scaler.transform(_input[:,0].unsqueeze(-1)).squeeze(-1))
target[:,0] = torch.tensor(scaler.transform(target[:,0].unsqueeze(-1)).squeeze(-1))
# save the scalar to be used later when inverse translating the data for plotting.
dump(scaler, 'scalar_item.joblib')
return index_in, index_tar, _input, target, sensor_number