From e7b9e92d5f0a63d55d9612a04a755d8f41d0a107 Mon Sep 17 00:00:00 2001
From: Yzy <2154597198@qq.com>
Date: Thu, 9 Jan 2025 12:46:56 +0800
Subject: [PATCH] [Modify] Replace __test() by evaluate() and fixed the r2 of
 train and valid

---
 src/gnnwr/models.py | 70 ++++++++++++++++++++++++++++++---------------
 1 file changed, 47 insertions(+), 23 deletions(-)

diff --git a/src/gnnwr/models.py b/src/gnnwr/models.py
index 5c2f4a9..c3acc10 100644
--- a/src/gnnwr/models.py
+++ b/src/gnnwr/models.py
@@ -393,7 +393,7 @@ def __valid(self):
             else:
                 self._noUpdateEpoch += 1
 
-    def __test(self):
+    def __evaluate(self, dataset):
         """
         test the network
         """
@@ -401,7 +401,7 @@ def __test(self):
         test_loss = 0
         out_list = torch.tensor([],dtype=torch.float32,device=self._device)
         label_list = torch.tensor([],dtype=torch.float32,device=self._device)
-        data_loader = self._test_dataset.dataloader
+        data_loader = dataset.dataloader # dataset
         x_data = torch.tensor([],dtype=torch.float32,device=self._device)
         y_data = torch.tensor([],dtype=torch.float32,device=self._device)
         y_pred = torch.tensor([],dtype=torch.float32,device=self._device)
@@ -424,11 +424,10 @@ def __test(self):
                     test_loss += loss.item() * data[0].size(0)
                 else:
                     test_loss += loss.item() * data.size(0)  # accumulate the loss
-            test_loss /= len(self._test_dataset)
-            self.__testLoss = test_loss
-            self.__testr2 = 1 - torch.sum((out_list - label_list) ** 2) / torch.sum((label_list - torch.mean(label_list)) ** 2)
-            self._test_diagnosis = DIAGNOSIS(weight_all, x_data, y_data, y_pred)
-            return self._test_diagnosis.R2().data
+            
+            test_loss /= len(dataset)
+
+            return test_loss, DIAGNOSIS(weight_all, x_data, y_data, y_pred)
 
     def run(self, max_epoch=1, early_stop=-1,**kwargs):
         """
@@ -602,10 +601,10 @@ def load_model(self, path, use_dict=False, map_location=None):
             the location can be ``"cpu"`` or ``"cuda"``
         """
         if use_dict:
-            data = torch.load(path, map_location=map_location)
+            data = torch.load(path, map_location=map_location, weights_only=False)
             self._model.load_state_dict(data)
         else:
-            self._model = torch.load(path, map_location=map_location)
+            self._model = torch.load(path, map_location=map_location, weights_only=False)
         if self._use_gpu:
             self._model = self._model.cuda()
             self._out = self._out.cuda()
@@ -631,9 +630,9 @@ def gpumodel_to_cpu(self, path, save_path, use_model=True):
             whether use dict to load the model (default: ``True``)
         """
         if use_model:
-            data = torch.load(path, map_location='cpu').state_dict()
+            data = torch.load(path, map_location='cpu', weights_only=False).state_dict()
         else:
-            data = torch.load(path, map_location='cpu')
+            data = torch.load(path, map_location='cpu', weights_only=False)
         new_state_dict = OrderedDict()
         for k, v in data.items():
             name = k[7:]  # remove module.
@@ -688,10 +687,10 @@ def result(self, path=None, use_dict=False, map_location=None):
         if path is None:
             path = self._modelSavePath + "/" + self._modelName + ".pkl"
         if use_dict:
-            data = torch.load(path, map_location=map_location)
+            data = torch.load(path, map_location=map_location, weights_only=False)
             self._model.load_state_dict(data)
         else:
-            self._model = torch.load(path, map_location=map_location)
+            self._model = torch.load(path, map_location=map_location, weights_only=False)
         if self._use_gpu:
             self._model = nn.DataParallel(module=self._model)  # parallel computing
             self._model = self._model.cuda()
@@ -700,7 +699,13 @@ def result(self, path=None, use_dict=False, map_location=None):
             self._model = self._model.cpu()
             self._out = self._out.cpu()
         with torch.no_grad():
-            self.__test()
+            _ , self._train_diagnosis = self.__evaluate(self._train_dataset)
+            self._trainr2 = self._train_diagnosis.R2().data
+            _ , self._valid_diagnosis = self.__evaluate(self._valid_dataset)
+            self._validr2 = self._valid_diagnosis.R2().data
+            self.__testLoss, self._test_diagnosis = self.__evaluate(self._test_dataset)
+            self.__testr2 = self._test_diagnosis.R2().data
+
 
         logging.info("Test Loss: " + str(self.__testLoss) + "; Test R2: " + str(self.__testr2))
         # print result
@@ -719,9 +724,8 @@ def result(self, path=None, use_dict=False, map_location=None):
         print("\n--------------------Result Information----------------")
         print("Test Loss: | {:>25.5f}".format(self.__testLoss))
         print("Test R2  : | {:>25.5f}".format(self.__testr2))
-        if self._besttrainr2 is not None and self._besttrainr2 != float('-inf'):
-            print("Train R2 : | {:>25.5f}".format(self._besttrainr2))
-            print("Valid R2 : | {:>25.5f}".format(self._bestr2))
+        print("Train R2 : | {:>25.5f}".format(self._trainr2))
+        print("Valid R2 : | {:>25.5f}".format(self._validr2))
         print("RMSE: | {:>30.5f}".format(self._test_diagnosis.RMSE().data))
         print("AIC:  | {:>30.5f}".format(self._test_diagnosis.AIC()))
         print("AICc: | {:>30.5f}".format(self._test_diagnosis.AICc()))
@@ -763,21 +767,23 @@ def reg_result(self, filename=None, model_path=None, use_dict=False, only_return
             model_path = self._modelSavePath + "/" + self._modelName + ".pkl"
             
         if use_dict:
-            data = torch.load(model_path, map_location=map_location)
+            data = torch.load(model_path, map_location=map_location, weights_only=False)
             self._model.load_state_dict(data)
         else:
-            self._model = torch.load(model_path, map_location=map_location)
+            self._model = torch.load(model_path, map_location=map_location, weights_only=False)
 
         if self._use_gpu:
             self._model = nn.DataParallel(module=self._model)
-            self._model = self._model.cuda()
-            self._out = self._out.cuda()
+            self._model,self._out = self._model.cuda(),self._out.cuda()
         else:
-            self._model = self._model.cpu()
-            self._out = self._out.cpu()
+            self._model, self._out = self._model.cpu(), self._out.cpu()
+
         device = torch.device('cuda') if self._use_gpu else torch.device('cpu')
         result = torch.tensor([]).to(torch.float32).to(device)
+        train_data_size = valid_data_size = 0
+        
         with torch.no_grad():
+            # calculate the result of train dataset
             for data, coef, label, data_index in self._train_dataset.dataloader:
                 data, coef, label, data_index = data.to(device), coef.to(device), label.to(device), data_index.to(
                     device)
@@ -785,6 +791,8 @@ def reg_result(self, filename=None, model_path=None, use_dict=False, only_return
                 coefficient = self._model(data).mul(torch.tensor(self._coefficient).to(torch.float32).to(device))
                 output = torch.cat((coefficient, output, data_index), dim=1)
                 result = torch.cat((result, output), 0)
+            train_data_size = len(result)
+            # calculate the result of train dataset
             for data, coef, label, data_index in self._valid_dataset.dataloader:
                 data, coef, label, data_index = data.to(device), coef.to(device), label.to(device), data_index.to(
                     device)
@@ -792,6 +800,8 @@ def reg_result(self, filename=None, model_path=None, use_dict=False, only_return
                 coefficient = self._model(data).mul(torch.tensor(self._coefficient).to(torch.float32).to(device))
                 output = torch.cat((coefficient, output, data_index), dim=1)
                 result = torch.cat((result, output), 0)
+            valid_data_size = len(result) - train_data_size
+            # calculate the result of train dataset
             for data, coef, label, data_index in self._test_dataset.dataloader:
                 data, coef, label, data_index = data.to(device), coef.to(device), label.to(device), data_index.to(
                     device)
@@ -799,6 +809,7 @@ def reg_result(self, filename=None, model_path=None, use_dict=False, only_return
                 coefficient = self._model(data).mul(torch.tensor(self._coefficient).to(torch.float32).to(device))
                 output = torch.cat((coefficient, output, data_index), dim=1)
                 result = torch.cat((result, output), 0)
+
         result = result.cpu().detach().numpy()
         columns = list(self._train_dataset.x)
         for i in range(len(columns)):
@@ -808,8 +819,21 @@ def reg_result(self, filename=None, model_path=None, use_dict=False, only_return
         result = pd.DataFrame(result, columns=columns)
         result[self._train_dataset.id] = result[self._train_dataset.id].astype(np.int32)
         result["Pred_" + self._train_dataset.y[0]] = result["Pred_" + self._train_dataset.y[0]].astype(np.float32)
+
+        # set dataset belong to postprocess
+        result["dataset_belong"] = 'test'
+        result.loc[:train_data_size,"dataset_belong"] = 'train'
+        result.loc[train_data_size:valid_data_size,"dataset_belong"] = 'valid'
+
+        # denormalize pred result
+        if self._train_dataset.y_scale_info:
+            _, result['denormalized_pred_result'] = self._train_dataset.rescale(None,result)
+        else:
+            result['denormalized_pred_result'] = result["Pred_" + self._train_dataset.y[0]]
+        
         if only_return:
             return result
+        
         if filename is not None:
             result.to_csv(filename, index=False)
         else: