-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate_data.py
61 lines (55 loc) · 2.27 KB
/
generate_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from __future__ import print_function
import pandas as pd
import numpy as np
from PIL import Image
import os
from tqdm import tqdm
class Generate_data():
def __init__(self, datapath):
"""
Generate_data class
Two methods to be used
1-split_test
2-save_images
[Note] that you have to split the public and private from fer2013 file
"""
self.data_path = datapath
def split_test(self, val_filename= 'val'):
"""
Helper function to split the validation and train data from general train file.
params:-
data_path = path to the folder that contains the train data file
"""
train_csv_path = self.data_path +"/"+ 'train.csv'
train = pd.read_csv(train_csv_path)
validation_data = pd.DataFrame(train.iloc[:3589,:])
train_data = pd.DataFrame(train.iloc[3589:,:])
train_data.to_csv(self.data_path+"/train.csv")
validation_data.to_csv(self.data_path+"/"+val_filename+".csv")
print("Done splitting the test file into validation & final test file")
def str_to_image(self, str_img = ' '):
'''
Convert string pixels from the csv file into image object
params:- take an image string
return :- return PIL image object
'''
imgarray_str = str_img.split(' ')
imgarray = np.asarray(imgarray_str,dtype=np.uint8).reshape(48,48)
return Image.fromarray(imgarray)
def save_images(self, datatype='train'):
'''
save_images is a function responsible for saving images from data files e.g(train, test) in a desired folder
params:-
datatype= str e.g (train, val, test)
'''
foldername= self.data_path+"/"+datatype
csvfile_path= self.data_path+"/"+datatype+'.csv'
if not os.path.exists(foldername):
os.mkdir(foldername)
data = pd.read_csv(csvfile_path)
images = data['pixels'] #dataframe to series pandas
numberofimages = images.shape[0]
for index in tqdm(range(numberofimages)):
img = self.str_to_image(images[index])
img.save(os.path.join(foldername,'{}{}.jpg'.format(datatype,index)),'JPEG')
print('Done saving {} data'.format((foldername)))