-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcity_master.py
38 lines (31 loc) · 1.13 KB
/
city_master.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
import pandas as pd
import numpy as np
from pydbgen import pydbgen
import uuid
import sys
size = sys.argv[1]
size = int(size)
class DataGenerator:
def __init__(self, datasetSize = 1):
self.pyDb = pydbgen.pydb()
self.datasetSize = datasetSize
def getCityName(self, size):
cityarr = np.array([self.pyDb.city_real() for i in range(size)])
return cityarr
def getCityType(self, size, levels = ['Business', 'Leisure'], prob = [0.28,0.72]):
citytypearr = np.random.choice(levels, size = size, p = prob)
return citytypearr
def getCityID(self, size):
return [str(uuid.uuid4()) for _ in range(size)]
# Generate complete dataset with properties containing array of described values
def genDataset(self):
size = self.datasetSize
data = {
'CityName' : self.getCityName(size),
'CityType' : self.getCityType(size),
'CityId' : self.getCityID(size)
}
return (pd.DataFrame(data))
myDataGen = DataGenerator(size)
myDataFrame = myDataGen.genDataset()
myDataFrame.to_csv('other_data/city_master.csv', index = False)