Skip to content

Commit

Permalink
[Modify] fixed some warnings
Browse files Browse the repository at this point in the history
  • Loading branch information
Y-nuclear committed Jul 23, 2024
1 parent 0a359e4 commit 4b16b0c
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 38 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
*_models/
dist/
*.toml
*.log
5 changes: 4 additions & 1 deletion src/gnnwr/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,4 @@
__version__ = '1.0.0'
from .datasets import *
from .models import *
from .networks import *
from .utils import *
87 changes: 57 additions & 30 deletions src/gnnwr/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,18 @@ def scale2(self, scale_fn, scale_params):
self.scale_fn = "minmax_scale"
x_scale_params = scale_params[0]
y_scale_params = scale_params[1]
self.x_data = (self.x_data - x_scale_params["min"]) / (x_scale_params["max"] - x_scale_params["min"])
if x_scale_params is not None:
self.x_data = (self.x_data - x_scale_params["min"]) / (x_scale_params["max"] - x_scale_params["min"])
if y_scale_params is not None:
self.y_data = (self.y_data - y_scale_params["min"]) / (y_scale_params["max"] - y_scale_params["min"])
elif scale_fn == "standard_scale":
self.scale_fn = "standard_scale"
x_scale_params = scale_params[0]
y_scale_params = scale_params[1]
self.x_data = (self.x_data - x_scale_params['mean']) / np.sqrt(x_scale_params["var"])
if x_scale_params is not None:
self.x_data = (self.x_data - x_scale_params['mean']) / np.sqrt(x_scale_params["var"])
if y_scale_params is not None:
self.y_data = (self.y_data - y_scale_params['mean']) / np.sqrt(y_scale_params["var"])

self.getScaledDataframe()

Expand All @@ -180,7 +186,7 @@ def getScaledDataframe(self):
scaledData = np.concatenate((self.x_data, self.y_data), axis=1)
self.scaledDataframe = pd.DataFrame(scaledData, columns=columns)

def rescale(self, x):
def rescale(self, x, y):
"""
rescale the data with the scale function and scale parameters
Expand All @@ -199,24 +205,31 @@ def rescale(self, x):
rescaled dependent variable data
"""
if self.scale_fn == "minmax_scale":
x = np.multiply(x, self.x_scale_info["max"] - self.x_scale_info["min"]) + self.x_scale_info["min"]
if x is not None:
x = np.multiply(x, self.x_scale_info["max"] - self.x_scale_info["min"]) + self.x_scale_info["min"]
if y is not None:
y = np.multiply(y, self.y_scale_info["max"] - self.y_scale_info["min"]) + self.y_scale_info["min"]
elif self.scale_fn == "standard_scale":
x = np.multiply(x, np.sqrt(self.x_scale_info["var"])) + self.x_scale_info["mean"]
if x is not None:
x = np.multiply(x, np.sqrt(self.x_scale_info["var"])) + self.x_scale_info["mean"]
if y is not None:
y = np.multiply(y, np.sqrt(self.y_scale_info["var"])) + self.y_scale_info["mean"]
else:
raise ValueError("invalid process_fn")
return x
return x, y

def save(self, dirname):
def save(self, dirname, exist_ok=False):
"""
save the dataset
:param dirname: save directory
"""
if os.path.exists(dirname):
if os.path.exists(dirname) and not exist_ok:
raise ValueError("dir is already exists")
if self.dataframe is None:
raise ValueError("dataframe is None")
os.makedirs(dirname)
if not os.path.exists(dirname):
os.makedirs(dirname)
x_scale_info = {}
y_scale_info = {}
for key, value in self.x_scale_info.items():
Expand Down Expand Up @@ -302,11 +315,12 @@ class predictDataset(Dataset):

def __init__(self, data, x_column, process_fn="minmax_scale", scale_info=None, is_need_STNN=False):

# data = data.astype(np.float32)
if scale_info is None:
scale_info = []

self.dataframe = data
self.x = x_column

if data is None:
self.x_data = None
self.datasize = -1
Expand Down Expand Up @@ -348,6 +362,7 @@ def __init__(self, data, x_column, process_fn="minmax_scale", scale_info=None, i

self.distances = None
self.temporal = None
self.scale_info_y = None

def __len__(self):
"""
Expand All @@ -367,21 +382,27 @@ def __getitem__(self, index):
return torch.tensor(self.distances[index], dtype=torch.float), torch.tensor(self.x_data[index],
dtype=torch.float)

def rescale(self, x):
def rescale(self, x, y):
"""
rescale the attribute data
:param x: Input attribute data
:return: rescaled attribute data
"""
if self.scale_fn == "minmax_scale":
x = x * (self.scale_info_x[1] - self.scale_info_x[0]) + self.scale_info_x[0]
if x is not None:
x = x * (self.scale_info_x[1] - self.scale_info_x[0]) + self.scale_info_x[0]
elif y is not None and self.scale_info_y is not None:
y = y * (self.scale_info_y["max"] - self.scale_info_y["min"]) + self.scale_info_y["min"]
elif self.scale_fn == "standard_scale":
x = x * np.sqrt(self.scale_info_x[1]) + self.scale_info_x[0]
if x is not None:
x = x * np.sqrt(self.scale_info_x[1]) + self.scale_info_x[0]
elif y is not None and self.scale_info_y is not None:
y = y * np.sqrt(self.scale_info_y["var"]) + self.scale_info_y["mean"]
else:
raise ValueError("invalid process_fn")

return x
return x,y

def minmax_scaler(self, x, min=None, max=None):
"""
Expand Down Expand Up @@ -577,9 +598,11 @@ def init_dataset(data, test_ratio,
elif use_model == "gtnnwr":
assert temp_column is not None, "temp_column must be not None in gtnnwr"
train_dataset.distances, val_dataset.distances, test_dataset.distances = _init_gtnnwr_distance(
reference_data[spatial_column + temp_column].values, train_data[spatial_column + temp_column].values,
val_data[spatial_column + temp_column].values, test_data[spatial_column + temp_column].values,
spatial_fun,temporal_fun
[reference_data[spatial_column].values,reference_data[temp_column].values],
[train_data[spatial_column].values, train_data[temp_column].values],
[val_data[spatial_column].values, val_data[temp_column].values],
[test_data[spatial_column].values, test_data[temp_column].values],
spatial_fun, temporal_fun
)
elif use_model == "gnnwr spnn":
train_dataset.distances, val_dataset.distances, test_dataset.distances = _init_gnnwr_spnn_distance(
Expand Down Expand Up @@ -611,28 +634,31 @@ def init_dataset(data, test_ratio,
distance_scale = StandardScaler()
temporal_scale = StandardScaler()
# scale distance matrix
train_distance_len = len(train_dataset.distances)
val_distance_len = len(val_dataset.distances)
distances = np.concatenate((train_dataset.distances, val_dataset.distances, test_dataset.distances), axis=0)
distances = train_dataset.distances
distances = distance_scale.fit_transform(distances.reshape(-1, distances.shape[-1])).reshape(distances.shape)

train_dataset.distances = distance_scale.transform(train_dataset.distances.reshape(-1, train_dataset.distances.shape[-1])).reshape(train_dataset.distances.shape)
val_dataset.distances = distance_scale.transform(val_dataset.distances.reshape(-1, val_dataset.distances.shape[-1])).reshape(val_dataset.distances.shape)
test_dataset.distances = distance_scale.transform(test_dataset.distances.reshape(-1, test_dataset.distances.shape[-1])).reshape(test_dataset.distances.shape)

if process_fn == "minmax_scale":
distance_scale_param = {"min": distance_scale.data_min_, "max": distance_scale.data_max_}
else:
distance_scale_param = {"mean": distance_scale.mean_, "var": distance_scale.var_}
train_dataset.distances = distances[:train_distance_len]
val_dataset.distances = distances[train_distance_len:train_distance_len + val_distance_len]
test_dataset.distances = distances[train_distance_len + val_distance_len:]
distance_scale_param = {"mean": distance_scale.mean_, "var": distance_scale.var_}
train_dataset.distances_scale_param = val_dataset.distances_scale_param = test_dataset.distances_scale_param = distance_scale_param
if temp_column is not None:
temporal = np.concatenate((train_dataset.temporal, val_dataset.temporal, test_dataset.temporal), axis=0)

if train_dataset.temporal is not None and val_dataset.temporal is not None and test_dataset.temporal is not None:
temporal = train_dataset.temporal
temporal = temporal_scale.fit_transform(temporal.reshape(-1, temporal.shape[-1])).reshape(temporal.shape)

train_dataset.temporal = temporal_scale.transform(train_dataset.temporal.reshape(-1, train_dataset.temporal.shape[-1])).reshape(train_dataset.temporal.shape)
val_dataset.temporal = temporal_scale.transform(val_dataset.temporal.reshape(-1, val_dataset.temporal.shape[-1])).reshape(val_dataset.temporal.shape)
test_dataset.temporal = temporal_scale.transform(test_dataset.temporal.reshape(-1, test_dataset.temporal.shape[-1])).reshape(test_dataset.temporal.shape)

if process_fn == "minmax_scale":
temporal_scale_param = {"min": temporal_scale.data_min_, "max": temporal_scale.data_max_}
else:
temporal_scale_param = {"mean": temporal_scale.mean_, "var": temporal_scale.var_}
train_dataset.temporal = temporal[:train_distance_len]
val_dataset.temporal = temporal[train_distance_len:train_distance_len + val_distance_len]
test_dataset.temporal = temporal[train_distance_len + val_distance_len:]
train_dataset.temporal_scale_param = val_dataset.temporal_scale_param = test_dataset.temporal_scale_param = temporal_scale_param
# initialize dataloader for train/val/test dataset
# set batch_size for train_dataset as batch_size
Expand Down Expand Up @@ -735,13 +761,14 @@ def init_predict_dataset(data, train_dataset, x_column, spatial_column=None, tem
process_params = [[train_dataset.x_scale_info['mean'], train_dataset.x_scale_info['std']]]
else:
raise ValueError("scale_fn must be minmax_scale or standard_scale")
# print("ProcessParams:",process_params)
if scale_sync:
predict_dataset = use_class(data=data, x_column=x_column, process_fn=process_fn, scale_info=process_params,
is_need_STNN=is_need_STNN)
else:
predict_dataset = use_class(data=data, x_column=x_column, process_fn=process_fn, is_need_STNN=is_need_STNN)

# get the y scale information
predict_dataset.scale_info_y = train_dataset.y_scale_info
# train_data = train_dataset.dataframe
reference_data = train_dataset.reference

Expand Down
27 changes: 21 additions & 6 deletions src/gnnwr/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def __init__(
self._log_file_name = log_file_name # log file
self._log_level = log_level # log level
self.__istrained = False # whether the model is trained
# TODO: use OLS in scaled data ot original data

self._coefficient = OLS(
train_dataset.scaledDataframe, train_dataset.x, train_dataset.y).params # coefficients of OLS

Expand Down Expand Up @@ -316,10 +316,11 @@ def __train(self):
x_true = torch.cat((x_true, coef), 0)
y_true = torch.cat((y_true, label), 0)
weight = self._model(data)

weight_all = torch.cat((weight_all, weight.to(torch.float32)), 0)
output = self._out(weight.mul(coef.to(torch.float32)))
y_pred = torch.cat((y_pred, output), 0)
loss = self._criterion(output, label) # calculate the loss
loss = self._criterion(output, label) # calculate the loss
loss.backward() # back propagation
self._optimizer.step() # update the parameters
if isinstance(data, list):
Expand Down Expand Up @@ -419,8 +420,9 @@ def __test(self):
self.__testLoss = test_loss
self.__testr2 = r2_score(label_list, out_list)
self._test_diagnosis = DIAGNOSIS(weight_all, x_data, y_data, y_pred)
return self._test_diagnosis.R2().data

def run(self, max_epoch=1, early_stop=-1):
def run(self, max_epoch=1, early_stop=-1,**kwargs):
"""
train the model and validate the model
Expand All @@ -438,6 +440,12 @@ def run(self, max_epoch=1, early_stop=-1):
show_detailed_info : bool
if ``True``, the detailed information will be shown (default: ``True``)
"""
if kwargs.get("print_frequency") is not None:
warnings.warn("The parameter print_frequency is deprecated, the information will be shown in tqdm")
if kwargs.get("show_detailed_info") is not None:
warnings.warn("The parameter show_detailed_info is deprecated, the information will be shown in tqdm")
# model selection method
model_selection = kwargs.get("model_selection", "val")
self.__istrained = True
if self._use_gpu:
self._model = nn.DataParallel(module=self._model) # parallel computing
Expand Down Expand Up @@ -490,7 +498,11 @@ def run(self, max_epoch=1, early_stop=-1):
if 0 < early_stop < self._noUpdateEpoch: # stop when the model has not been updated for long time
print("Training stop! Model has not been improved for over {} epochs.".format(early_stop))
break
self.load_model(self._modelSavePath + '/' + self._modelName + ".pkl")
torch.save(self._model, self._modelSavePath + '/' + self._modelName + "_last.pkl")
if model_selection == "val":
self.load_model(self._modelSavePath + '/' + self._modelName + ".pkl")
elif model_selection == "last":
self.load_model(self._modelSavePath + '/' + self._modelName + "_last.pkl")
self.result_data = self.getCoefs()

def predict(self, dataset):
Expand All @@ -507,6 +519,7 @@ def predict(self, dataset):
dataframe
the Pandas dataframe of the dataset with the predicted result
"""

data = dataset.distances
coef = dataset.x_data
if not self.__istrained:
Expand All @@ -520,7 +533,7 @@ def predict(self, dataset):
weight = self._model(data)
result = self._out(weight.mul(coef)).cpu().detach().numpy()
dataset.dataframe['pred_result'] = result
dataset.dataframe['denormalized_pred_result'] = dataset.rescale(result)
_,dataset.dataframe['denormalized_pred_result'] = dataset.rescale(None,result)
dataset.pred_result = result
return dataset.dataframe

Expand Down Expand Up @@ -581,7 +594,8 @@ def load_model(self, path, use_dict=False, map_location=None):
self._model = self._model.cpu()
self._out = self._out.cpu()
self._modelSavePath = os.path.dirname(path)
self._modelName = os.path.basename(path).split('/')[-1].split('.')[0]
if self._modelName is None:
self._modelName = os.path.basename(path).split('/')[-1].split('.')[0]
self.__istrained = True
self.result_data = self.getCoefs()

Expand Down Expand Up @@ -729,6 +743,7 @@ def reg_result(self, filename=None, model_path=None, use_dict=False, only_return
"""
if model_path is None:
model_path = self._modelSavePath + "/" + self._modelName + ".pkl"

if use_dict:
data = torch.load(model_path, map_location=map_location)
self._model.load_state_dict(data)
Expand Down
2 changes: 1 addition & 1 deletion src/gnnwr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ def __init__(self, dataset, xName: list, yName: list):
self.__formula = yName[0] + '~' + '+'.join(xName)
self.__fit = sm.formula.ols(self.__formula, dataset).fit()
self.params = list(self.__fit.params.to_dict().values())
intercept = self.__fit.params[0]
intercept = self.__fit.params.iloc[0]
self.params = self.params[1:]
self.params.append(intercept)

Expand Down

0 comments on commit 4b16b0c

Please sign in to comment.