Skip to content

Commit

Permalink
[Modify] Modify error in predict_weight()
Browse files Browse the repository at this point in the history
  • Loading branch information
Y-nuclear committed Dec 4, 2023
1 parent e44213f commit ea6066f
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 32 deletions.
5 changes: 5 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,8 @@ scikit_learn>=1.0.2
statsmodels>=0.13.5
torch>=1.8.1
tqdm>=4.63.0

folium~=0.14.0
branca~=0.6.0
scipy~=1.10.1
scikit-learn~=1.2.2
41 changes: 26 additions & 15 deletions src/gnnwr/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,19 +117,19 @@ def scale(self, scale_fn=None, scale_params=None):
if scale_fn == "minmax_scale":
self.scale_fn = "minmax_scale"
x_scale_params = scale_params[0]
# y_scale_params = scale_params[1]
y_scale_params = scale_params[1]
self.x_scale_info = {"min": x_scale_params.data_min_, "max": x_scale_params.data_max_}
self.x_data = x_scale_params.transform(pd.DataFrame(self.x_data, columns=self.x))
# self.y_scale_info = {"min": y_scale_params.data_min_, "max": y_scale_params.data_max_}
# self.y_data = y_scale_params.transform(pd.DataFrame(self.y_data, columns=self.y))
self.y_scale_info = {"min": y_scale_params.data_min_, "max": y_scale_params.data_max_}
self.y_data = y_scale_params.transform(pd.DataFrame(self.y_data, columns=self.y))
elif scale_fn == "standard_scale":
self.scale_fn = "standard_scale"
x_scale_params = scale_params[0]
# y_scale_params = scale_params[1]
y_scale_params = scale_params[1]
self.x_scale_info = {"mean": x_scale_params.mean_, "var": x_scale_params.var_}
self.x_data = x_scale_params.transform(pd.DataFrame(self.x_data, columns=self.x))
# self.y_scale_info = {"mean": y_scale_params.mean_, "var": y_scale_params.var_}
# self.y_data = y_scale_params.transform(pd.DataFrame(self.y_data, columns=self.y))
self.y_scale_info = {"mean": y_scale_params.mean_, "var": y_scale_params.var_}
self.y_data = y_scale_params.transform(pd.DataFrame(self.y_data, columns=self.y))

self.getScaledDataframe()

Expand All @@ -154,17 +154,17 @@ def scale2(self, scale_fn, scale_params):
if scale_fn == "minmax_scale":
self.scale_fn = "minmax_scale"
x_scale_params = scale_params[0]
# y_scale_params = scale_params[1]
y_scale_params = scale_params[1]
# self.x_data = self.x_data * (x_scale_params["max"] - x_scale_params["min"]) + x_scale_params["min"]
self.x_data = (self.x_data - x_scale_params["min"]) / (x_scale_params["max"] - x_scale_params["min"])
# self.y_data = self.y_data * (y_scale_params["max"] - y_scale_params["min"]) + y_scale_params["min"]
self.y_data = (self.y_data - y_scale_params["min"]) / (y_scale_params["max"] - y_scale_params["min"])
elif scale_fn == "standard_scale":
self.scale_fn = "standard_scale"
x_scale_params = scale_params[0]
# y_scale_params = scale_params[1]
y_scale_params = scale_params[1]
# self.x_data = self.x_data * np.sqrt(x_scale_params["var"]) + x_scale_params["mean"]
self.x_data = (self.x_data - x_scale_params['mean']) / np.sqrt(x_scale_params["var"])
# self.y_data = self.y_data * np.sqrt(y_scale_params["var"]) + y_scale_params["mean"]
self.y_data = (self.y_data - y_scale_params['mean']) / np.sqrt(y_scale_params["var"])

self.getScaledDataframe()

Expand Down Expand Up @@ -287,9 +287,11 @@ class predictDataset(Dataset):
:param is_need_STNN: whether need STNN
"""

def __init__(self, data, x_column, process_fn="minmax_scale", scale_info=[], is_need_STNN=False):
def __init__(self, data, x_column, process_fn="minmax_scale", scale_info=None, is_need_STNN=False):

# data = data.astype(np.float32)
if scale_info is None:
scale_info = []
self.dataframe = data
self.x = x_column
if data is None:
Expand Down Expand Up @@ -368,7 +370,7 @@ def rescale(self, x):

return x

def minmax_scaler(self, x, min=[], max=[]):
def minmax_scaler(self, x, min=None, max=None):
"""
function of minmax scaler
Expand All @@ -377,13 +379,17 @@ def minmax_scaler(self, x, min=[], max=[]):
:param max: maximum value of each attribute
:return: Output attribute data
"""
if max is None:
max = []
if min is None:
min = []
if len(min) == 0:
x = (x - x.min(axis=0)) / (x.max(axis=0) - x.min(axis=0))
else:
x = (x - min) / (max - min)
return x

def standard_scaler(self, x, mean=[], std=[]):
def standard_scaler(self, x, mean=None, std=None):
"""
function of standard scaler
Expand All @@ -392,6 +398,10 @@ def standard_scaler(self, x, mean=[], std=[]):
:param std: standard deviation of each attribute
:return: Output attribute data
"""
if std is None:
std = []
if mean is None:
mean = []
if len(mean) == 0:
x = (x - x.mean(axis=0)) / x.std(axis=0)
else:
Expand All @@ -410,7 +420,7 @@ def BasicDistance(x, y):
x = np.float32(x)
y = np.float32(y)
dist = distance.cdist(x, y, 'euclidean')
return dist # np.float32(np.sqrt(np.sum((x[:, np.newaxis, :] - y) ** 2, axis=2)))
return dist


def Manhattan_distance(x, y):
Expand All @@ -425,7 +435,7 @@ def Manhattan_distance(x, y):


def init_dataset(data, test_ratio, valid_ratio, x_column, y_column, spatial_column=None, temp_column=None,
id_column=None, sample_seed=100, process_fn="minmax_scale", batch_size=32, shuffle=True,
id_column=None, sample_seed=42, process_fn="minmax_scale", batch_size=32, shuffle=True,
use_class=baseDataset,
spatial_fun=BasicDistance, temporal_fun=Manhattan_distance, max_val_size=-1, max_test_size=-1,
from_for_cv=0, is_need_STNN=False, Reference=None, simple_distance=True):
Expand All @@ -439,6 +449,7 @@ def init_dataset(data, test_ratio, valid_ratio, x_column, y_column, spatial_colu
:param y_column: output attribute column name
:param spatial_column: spatial attribute column name
:param temp_column: temporal attribute column name
:param id_column: id column name
:param sample_seed: random seed
:param process_fn: data pre-process function
:param batch_size: batch size
Expand Down
41 changes: 26 additions & 15 deletions src/gnnwr/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def init_optimizer(self, optimizer, optimizer_params=None):
upepoch = optimizer_params.get("upepoch", 10000)
uprate = (maxlr - minlr) / upepoch * (upepoch // 20)
decayepoch = optimizer_params.get("decayepoch", 20000)
decayrate = optimizer_params.get("decayrate", 0.1)
decayrate = optimizer_params.get("decayrate", 0.95)
stop_change_epoch = optimizer_params.get("stop_change_epoch", 30000)
stop_lr = optimizer_params.get("stop_lr", 0.001)
lamda_lr = lambda epoch: (epoch // (upepoch // 20)) * uprate + minlr if epoch < upepoch else (
Expand Down Expand Up @@ -422,6 +422,9 @@ def run(self, max_epoch=1, early_stop=-1, print_frequency=50, show_detailed_info
if ``early_stop`` is ``-1``, the training will not stop until the max epoch
print_frequency : int
the frequency of printing the information (default: ``50``)
show_detailed_info : bool
if ``True``, the detailed information will be shown (default: ``True``)
"""
self.__istrained = True
if self._use_gpu:
Expand Down Expand Up @@ -560,7 +563,7 @@ def load_model(self, path, use_dict=False, map_location=None):
path : str
the path of the model
use_dict : bool
whether use dict to load the model (default: ``False``)
whether the function use dict to load the model (default: ``False``)
map_location : str
the location of the model (default: ``None``)
the location can be ``"cpu"`` or ``"cuda"``
Expand Down Expand Up @@ -635,7 +638,7 @@ def result(self, path=None, use_dict=False, map_location=None):
the path of the model(default: ``None``)
| if ``path`` is ``None``, the model will be loaded from ``self._modelSavePath + "/" + self._modelName + ".pkl"``
use_dict : bool
whether use dict to load the model (default: ``False``)
whether the function use dict to load the model (default: ``False``)
| if ``use_dict`` is ``True``, the model will be loaded from ``path`` as dict
map_location : str
the location of the model (default: ``None``)
Expand Down Expand Up @@ -664,24 +667,32 @@ def result(self, path=None, use_dict=False, map_location=None):
logging.info("Test Loss: " + str(self.__testLoss) + "; Test R2: " + str(self.__testr2))
# print result
# basic information
print("--------------------Result Table--------------------\n")
print("--------------------Model Information-----------------")
print("Model Name: |", self._modelName)
print("Model Structure: |\n", self._model)
print("Optimizer: |\n", self._optimizer)
print("independent variable: |", self._train_dataset.x)
print("dependent variable: |", self._train_dataset.y)
print("\n----------------------------------------------------\n")
print("Test Loss: ", self.__testLoss, " Test R2: ", self.__testr2)
if self._valid_r2 is not None and self._valid_r2 != float('-inf'):
print("Train R2: {:5f}".format(self._besttrainr2), " Valid R2: ", self._bestr2)
# OLS
print("\nOLS: |", self._weight)
# Diagnostics
print("R2: |", self.__testr2)
print("RMSE: | {:5f}".format(self._test_diagnosis.RMSE().data))
print("AIC: | {:5f}".format(self._test_diagnosis.AIC()))
print("AICc: | {:5f}".format(self._test_diagnosis.AICc()))
print("F1: | {:5f}".format(self._test_diagnosis.F1_GNN().data))
print("\nOLS weight:|", end=" ")
for i in range(len(self._weight)):
print(" {:.5f}".format(self._weight[i]), end=" ")
print("\n")
print("\n--------------------Result Information----------------")
print("Test Loss: | {:>25.5f}".format(self.__testLoss))
print("Test R2 : | {:>25.5f}".format(self.__testr2))
if self._valid_r2 is not None and self._valid_r2 != float('-inf'):
print("Train R2 : | {:>25.5f}".format(self._besttrainr2))
print("Valid R2 : | {:>25.5f}".format(self._valid_r2))
print("RMSE: | {:>30.5f}".format(self._test_diagnosis.RMSE().data))
print("AIC: | {:>30.5f}".format(self._test_diagnosis.AIC()))
print("AICc: | {:>30.5f}".format(self._test_diagnosis.AICc()))
print("F1: | {:>30.5f}".format(self._test_diagnosis.F1_Global().data))
print("F2: | {:>30.5f}".format(self._test_diagnosis.F2_Global().flatten()[0].data))
F3_Local_dict = self._test_diagnosis.F3_Local()[0]
for key in F3_Local_dict:
width = 30-(len(key) - 4)
print("{}: | {:>{width}.5f}".format(key, F3_Local_dict[key].data, width=width))

def reg_result(self, filename=None, model_path=None, use_dict=False, only_return=False, map_location=None):
"""
Expand Down
60 changes: 58 additions & 2 deletions src/gnnwr/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@
import pandas as pd
import torch
import warnings
import copy
from scipy.stats import f
from scipy.stats import t
import folium
from folium.plugins import HeatMap, MarkerCluster
import branca
Expand Down Expand Up @@ -42,6 +43,7 @@ class DIAGNOSIS:
"""

def __init__(self, weight, x_data, y_data, y_pred):

self.__weight = weight
self.__x_data = x_data
self.__y_data = y_data
Expand All @@ -61,28 +63,82 @@ def __init__(self, weight, x_data, y_data, y_pred):
gtweight_3d = torch.diag_embed(self.__weight)
hatS_temp = torch.matmul(gtweight_3d,
torch.matmul(torch.inverse(torch.matmul(x_data_tile_t, x_data_tile)), x_data_tile_t))
self.__hat_temp = hatS_temp
hatS = torch.matmul(x_data.view(-1, 1, x_data.size(1)), hatS_temp)
hatS = hatS.view(-1, self.__n)
self.__hat = hatS
self.__S = torch.trace(self.__hat)
self.f3_dict = None
self.f3_dict_2 = None

def hat(self):
"""
:return: hat matrix
"""
return self.__hat

def F1_GNN(self):
def F1_Global(self):
"""
:return: F1-test
"""
k1 = self.__n - 2 * torch.trace(self.__hat) + \
torch.trace(torch.mm(self.__hat.transpose(-2, -1), self.__hat))

k2 = self.__n - self.__k - 1
rss_olr = torch.sum(
(torch.mean(self.__y_data) - torch.mm(self.__ols_hat, self.__y_data)) ** 2)
F_value = self.__ssr / k1 / (rss_olr / k2)
# p_value = f.sf(F_value, k1, k2)
return self.__ssr / k1 / (rss_olr / k2)

def F2_Global(self):
"""
:return: F2-test
"""
# A = (I - H) - (I - S)^T*(I - S)
A = (torch.eye(self.__n) - self.__ols_hat) - torch.mm(
(torch.eye(self.__n) - self.__hat).transpose(-2, -1),
(torch.eye(self.__n) - self.__hat))
v1 = torch.trace(A)
# DSS = y^T*A*y
DSS = torch.mm(self.__y_data.transpose(-2, -1), torch.mm(A, self.__y_data))
k2 = self.__n - self.__k - 1
rss_olr = torch.sum(
(torch.mean(self.__y_data) - torch.mm(self.__ols_hat, self.__y_data)) ** 2)

return DSS / v1 / (rss_olr / k2)

def F3_Local(self):
"""
:return: F1-test of each variable
"""

ek_dict = {}
self.f3_dict = {}
self.f3_dict_2 = {}
for i in range(self.__x_data.size(1)):
ek_zeros = torch.zeros([self.__x_data.size(1)])
ek_zeros[i] = 1
ek_dict['ek' + str(i)] = torch.reshape(torch.reshape(torch.tile(ek_zeros.clone().detach(), [self.__n]),
[self.__n, -1]),
[-1, 1, self.__x_data.size(1)])
hatB = torch.matmul(ek_dict['ek' + str(i)], self.__hat_temp)
hatB = torch.reshape(hatB, [-1, self.__n])

J_n = torch.ones([self.__n, self.__n]) / self.__n
L = torch.matmul(hatB.transpose(-2, -1), torch.matmul(torch.eye(self.__n) - J_n, hatB))

vk2 = 1 / self.__n * torch.matmul(self.__y_data.transpose(-2, -1), torch.matmul(L, self.__y_data))
trace_L = torch.trace(1 / self.__n * L)
f3 = torch.squeeze(vk2 / trace_L / (self.__ssr / self.__n))
self.f3_dict['f3_param_' + str(i)] = f3

bk = torch.matmul(hatB, self.__y_data)
vk2_2 = 1 / self.__n * torch.sum((bk - torch.mean(bk)) ** 2)
f3_2 = torch.squeeze(vk2_2 / trace_L / (self.__ssr / self.__n))
self.f3_dict_2['f3_param_' + str(i)] = f3_2
return self.f3_dict, self.f3_dict_2

def AIC(self):
"""
:return: AIC
Expand Down

0 comments on commit ea6066f

Please sign in to comment.