From d1291a8e034d7fe26938b86688f78c577da3a5f0 Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Fri, 3 Jan 2025 01:25:17 +0000
Subject: [PATCH 01/11] edit comment

---
 python/graphstorm/config/argument.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/config/argument.py b/python/graphstorm/config/argument.py
index ddb933fc13..53999f5551 100644
--- a/python/graphstorm/config/argument.py
+++ b/python/graphstorm/config/argument.py
@@ -1752,14 +1752,14 @@ def log_report_frequency(self):
     ###################### Model training related ######################
     @property
     def decoder_bias(self):
-        """ Node decoder bias. decoder_bias must be a boolean. Default is False.
+        """ Decoder bias. decoder_bias must be a boolean. Default is False.
         """
         # pylint: disable=no-member
         if hasattr(self, "_decoder_bias"):
             assert self._decoder_bias in [True, False], \
                 "decoder_bias should be in [True, False]"
             return self._decoder_bias
-        # By default, node decoder bias is False
+        # By default, decoder bias is False
         return False
 
     @property

From 57e2518c9e8963e3099cf4022415688e40cae5ee Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Fri, 3 Jan 2025 21:28:07 +0000
Subject: [PATCH 02/11] adding stuff

---
 python/graphstorm/config/argument.py    |  6 +++---
 python/graphstorm/model/edge_decoder.py | 26 ++++++++++++++++++++-----
 python/graphstorm/model/node_decoder.py |  8 ++++----
 3 files changed, 28 insertions(+), 12 deletions(-)

diff --git a/python/graphstorm/config/argument.py b/python/graphstorm/config/argument.py
index 53999f5551..9efcc1082e 100644
--- a/python/graphstorm/config/argument.py
+++ b/python/graphstorm/config/argument.py
@@ -1752,15 +1752,15 @@ def log_report_frequency(self):
     ###################### Model training related ######################
     @property
     def decoder_bias(self):
-        """ Decoder bias. decoder_bias must be a boolean. Default is False.
+        """ Decoder bias. decoder_bias must be a boolean. Default is True.
         """
         # pylint: disable=no-member
         if hasattr(self, "_decoder_bias"):
             assert self._decoder_bias in [True, False], \
                 "decoder_bias should be in [True, False]"
             return self._decoder_bias
-        # By default, decoder bias is False
-        return False
+        # By default, decoder bias is True
+        return True
 
     @property
     def dropout(self):
diff --git a/python/graphstorm/model/edge_decoder.py b/python/graphstorm/model/edge_decoder.py
index 5065283991..7bd3cd7259 100644
--- a/python/graphstorm/model/edge_decoder.py
+++ b/python/graphstorm/model/edge_decoder.py
@@ -132,6 +132,8 @@ class DenseBiDecoder(GSEdgeDecoder):
     norm: str
         Normalization methods. Not used, but reserved for complex DenseBiDecoder child class
         implementation. Default: None.
+    use_bias: bool
+        Whether the edge decoder uses a bias parameter. Default: True.
     """
     def __init__(self,
                  in_units,
@@ -141,7 +143,8 @@ def __init__(self,
                  num_basis=2,
                  dropout_rate=0.0,
                  regression=False,
-                 norm=None):
+                 norm=None,
+                 use_bias=True):
         super().__init__()
 
         self.in_units = in_units
@@ -157,6 +160,7 @@ def __init__(self,
         assert isinstance(target_etype, tuple) and len(target_etype) == 3, \
             "Target etype must be a tuple of a canonical etype."
         self.target_etype = target_etype
+        self.use_bias = use_bias
 
         self._init_model()
 
@@ -171,7 +175,7 @@ def _init_model(self):
         self.dropout = nn.Dropout(self.dropout)
         self.basis_para = nn.Parameter(
             th.randn(self.num_basis, self.in_units, self.in_units))
-        self.combine_basis = nn.Linear(self.num_basis, basis_out, bias=False)
+        self.combine_basis = nn.Linear(self.num_basis, basis_out, bias=self.use_bias)
         self.reset_parameters()
 
         if self.regression:
@@ -331,13 +335,16 @@ class EdgeRegression(GSEdgeDecoder):
     norm: str, optional
         Normalization methods. Not used, but reserved for complex edge regression.
         implementation. Default: None.
+    use_bias: bool
+        Whether the edge decoder uses a bias parameter. Default: True.
     """
     def __init__(self,
                  h_dim,
                  target_etype,
                  out_dim=1,
                  dropout=0,
-                 norm=None):
+                 norm=None,
+                 use_bias=True):
         super(EdgeRegression, self).__init__()
         self._h_dim = h_dim
         self._out_dim = out_dim
@@ -349,6 +356,7 @@ def __init__(self,
             "Target etype must be a tuple of a canonical etype," \
             f"e.g., (src_ntype, etype, dst_ntype), but got {target_etype}."
         self._target_etype = target_etype
+        self._use_bias = use_bias
 
         self._init_model()
 
@@ -360,7 +368,7 @@ def _init_model(self):
         if self._norm is not None:
             logging.warning("Embedding normalization (batch norm or layer norm) "
                             "is not supported in EdgeRegression")
-        self.linear = nn.Linear(h_dim * 2, h_dim, bias=True)
+        self.linear = nn.Linear(h_dim * 2, h_dim, bias=self._use_bias)
         self.relu = nn.ReLU()
         self.dropout = nn.Dropout(self._dropout)
         self.regression_head = nn.Linear(h_dim, out_dim, bias=True)
@@ -500,6 +508,8 @@ class MLPEdgeDecoder(GSEdgeDecoder):
     norm: str
         Normalization methods. Not used, but reserved for complex MLPEdgeDecoder child class
         implementation. Default: None.
+    use_bias: bool
+        Whether the edge decoder uses a bias parameter. Default: True.
     """
     def __init__(self,
                  h_dim,
@@ -510,7 +520,8 @@ def __init__(self,
                  dropout=0,
                  regression=False,
                  num_ffn_layers=0,
-                 norm=None):
+                 norm=None,
+                 use_bias=True):
         super(MLPEdgeDecoder, self).__init__()
         self.h_dim = h_dim
         self.multilabel = multilabel
@@ -526,6 +537,7 @@ def __init__(self,
         assert isinstance(target_etype, tuple) and len(target_etype) == 3, \
             "Target etype must be a tuple of a canonical etype."
         self.target_etype = target_etype
+        self.use_bias = use_bias
 
         self._init_model()
 
@@ -543,6 +555,8 @@ def _init_model(self):
 
         # Here we assume the source and destination nodes have the same dimension.
         self.decoder = nn.Parameter(th.randn(self.h_dim * 2, self.out_dim))
+        if self.use_bias:
+            self.bias = nn.Parameter(th.randn(self.out_dim))
         assert self.num_hidden_layers == 1, "More than one layers not supported"
         nn.init.xavier_uniform_(self.decoder,
                                 gain=nn.init.calculate_gain('relu'))
@@ -574,6 +588,8 @@ def _compute_logits(self, g, h):
             if self.num_ffn_layers > 0:
                 h = self.ngnn_mlp(h)
             out = th.matmul(h, self.decoder)
+            if self.use_bias:
+                out = out + self.bias
         return out
 
     # pylint: disable=unused-argument
diff --git a/python/graphstorm/model/node_decoder.py b/python/graphstorm/model/node_decoder.py
index 48bd95d6fa..4c2f9fd4c9 100644
--- a/python/graphstorm/model/node_decoder.py
+++ b/python/graphstorm/model/node_decoder.py
@@ -39,7 +39,7 @@ class EntityClassifier(GSLayer):
         Normalization methods. Not used, but reserved for complex node classifier
         implementation. Default: None.
     use_bias: bool
-        Whether the node decoder uses a bias parameter. Default: False.
+        Whether the node decoder uses a bias parameter. Default: True.
     """
     def __init__(self,
                  in_dim,
@@ -47,7 +47,7 @@ def __init__(self,
                  multilabel,
                  dropout=0,
                  norm=None,
-                 use_bias=False):
+                 use_bias=True):
         super(EntityClassifier, self).__init__()
         self._in_dim = in_dim
         self._num_classes = num_classes
@@ -170,14 +170,14 @@ class EntityRegression(GSLayer):
         Normalization methods. Not used, but reserved for complex node regression
         implementation. Default: None.
     use_bias: bool
-        Whether the node decoder uses a bias parameter. Default: False.
+        Whether the node decoder uses a bias parameter. Default: True.
     """
     def __init__(self,
                  h_dim,
                  dropout=0,
                  out_dim=1,
                  norm=None,
-                 use_bias=False):
+                 use_bias=True):
         super(EntityRegression, self).__init__()
         self._h_dim = h_dim
         self._out_dim = out_dim

From 4b8cf4538feb745ea90a837b0ca7f63797753536 Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Fri, 3 Jan 2025 21:58:14 +0000
Subject: [PATCH 03/11] adding bias

---
 python/graphstorm/model/edge_decoder.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/python/graphstorm/model/edge_decoder.py b/python/graphstorm/model/edge_decoder.py
index 7bd3cd7259..bb33b59345 100644
--- a/python/graphstorm/model/edge_decoder.py
+++ b/python/graphstorm/model/edge_decoder.py
@@ -556,7 +556,7 @@ def _init_model(self):
         # Here we assume the source and destination nodes have the same dimension.
         self.decoder = nn.Parameter(th.randn(self.h_dim * 2, self.out_dim))
         if self.use_bias:
-            self.bias = nn.Parameter(th.randn(self.out_dim))
+            self.bias = nn.Parameter(th.zeros(self.out_dim))
         assert self.num_hidden_layers == 1, "More than one layers not supported"
         nn.init.xavier_uniform_(self.decoder,
                                 gain=nn.init.calculate_gain('relu'))
@@ -722,6 +722,8 @@ class MLPEFeatEdgeDecoder(MLPEdgeDecoder):
     norm: str
         Normalization methods. Not used, but reserved for complex MLPEFeatEdgeDecoder child
         class implementation. Default: None.
+    use_bias: bool
+        Whether the edge decoder uses a bias parameter. Default: True.
     """
     def __init__(self,
                  h_dim,
@@ -732,7 +734,8 @@ def __init__(self,
                  dropout=0,
                  regression=False,
                  num_ffn_layers=0,
-                 norm=None):
+                 norm=None,
+                 use_bias=True):
         self.feat_dim = feat_dim
         super(MLPEFeatEdgeDecoder, self).__init__(h_dim=h_dim,
                                                   out_dim=out_dim,
@@ -741,7 +744,8 @@ def __init__(self,
                                                   dropout=dropout,
                                                   regression=regression,
                                                   num_ffn_layers=num_ffn_layers,
-                                                  norm=norm)
+                                                  norm=norm,
+                                                  use_bias=use_bias)
 
     def _init_model(self):
         """ Init decoder model
@@ -763,6 +767,8 @@ def _init_model(self):
         # combine output of nn_decoder and feat_decoder
         self.combine_decoder = nn.Parameter(th.randn(self.h_dim * 2, self.h_dim))
         self.decoder = nn.Parameter(th.randn(self.h_dim, self.out_dim))
+        if self.use_bias:
+            self.bias = nn.Parameter(th.zeros(self.out_dim))
         self.dropout = nn.Dropout(self.dropout)
 
         self.nn_decoder_norm = None
@@ -835,6 +841,8 @@ def _compute_logits(self, g, h, e_h):
                 combine_h = self.combine_norm(combine_h)
             combine_h = self.relu(combine_h)
             out = th.matmul(combine_h, self.decoder)
+            if self.use_bias:
+                out = out + self.bias
 
         return out
 

From 29e384a90c027bbb003f641f3c0eca759e3ebf64 Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Fri, 3 Jan 2025 22:10:32 +0000
Subject: [PATCH 04/11] link up with gsf

---
 python/graphstorm/gsf.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/python/graphstorm/gsf.py b/python/graphstorm/gsf.py
index fa1ed4189c..9202025e8a 100644
--- a/python/graphstorm/gsf.py
+++ b/python/graphstorm/gsf.py
@@ -439,7 +439,8 @@ def create_builtin_reconstruct_efeat_decoder(g, decoder_input_dim, config, train
     decoder = EdgeRegression(decoder_input_dim,
                              target_etype=target_etype,
                              out_dim=feat_dim,
-                             dropout=dropout)
+                             dropout=dropout,
+                             use_bias=config.decoder_bias)
 
     loss_func = RegressionLossFunc()
     return decoder, loss_func
@@ -617,14 +618,16 @@ def create_builtin_edge_decoder(g, decoder_input_dim, config, train_task):
                                      dropout_rate=dropout,
                                      regression=False,
                                      target_etype=target_etype,
-                                     norm=config.decoder_norm)
+                                     norm=config.decoder_norm,
+                                     use_bias=config.decoder_bias)
         elif decoder_type == "MLPDecoder":
             decoder = MLPEdgeDecoder(decoder_input_dim,
                                      num_classes,
                                      multilabel=config.multilabel,
                                      target_etype=target_etype,
                                      num_ffn_layers=config.num_ffn_layers_in_decoder,
-                                     norm=config.decoder_norm)
+                                     norm=config.decoder_norm,
+                                     use_bias=config.decoder_bias)
         elif decoder_type == "MLPEFeatEdgeDecoder":
             decoder_edge_feat = config.decoder_edge_feat
             assert decoder_edge_feat is not None, \
@@ -648,7 +651,8 @@ def create_builtin_edge_decoder(g, decoder_input_dim, config, train_task):
                 target_etype=target_etype,
                 dropout=config.dropout,
                 num_ffn_layers=config.num_ffn_layers_in_decoder,
-                norm=config.decoder_norm)
+                norm=config.decoder_norm,
+                use_bias=config.decoder_bias)
         else:
             assert False, f"decoder {decoder_type} is not supported."
 
@@ -680,7 +684,8 @@ def create_builtin_edge_decoder(g, decoder_input_dim, config, train_task):
                                      target_etype=target_etype,
                                      dropout_rate=dropout,
                                      regression=True,
-                                     norm=config.decoder_norm)
+                                     norm=config.decoder_norm,
+                                     use_bias=config.decoder_bias)
         elif decoder_type == "MLPDecoder":
             decoder = MLPEdgeDecoder(decoder_input_dim,
                                      1,
@@ -688,7 +693,8 @@ def create_builtin_edge_decoder(g, decoder_input_dim, config, train_task):
                                      target_etype=target_etype,
                                      regression=True,
                                      num_ffn_layers=config.num_ffn_layers_in_decoder,
-                                     norm=config.decoder_norm)
+                                     norm=config.decoder_norm,
+                                     use_bias=config.decoder_bias)
         elif decoder_type == "MLPEFeatEdgeDecoder":
             decoder_edge_feat = config.decoder_edge_feat
             assert decoder_edge_feat is not None, \
@@ -713,7 +719,8 @@ def create_builtin_edge_decoder(g, decoder_input_dim, config, train_task):
                 dropout=config.dropout,
                 regression=True,
                 num_ffn_layers=config.num_ffn_layers_in_decoder,
-                norm=config.decoder_norm)
+                norm=config.decoder_norm,
+                use_bias=config.decoder_bias)
         else:
             assert False, "decoder not supported"
         loss_func = RegressionLossFunc()

From 8a8a388005ec687a8919e485257fbca7c18e3577 Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Wed, 8 Jan 2025 20:48:19 +0000
Subject: [PATCH 05/11] tests

---
 python/graphstorm/model/__init__.py |   3 +-
 tests/unit-tests/test_decoder.py    | 278 +++++++++++++++++++++++++++-
 2 files changed, 279 insertions(+), 2 deletions(-)

diff --git a/python/graphstorm/model/__init__.py b/python/graphstorm/model/__init__.py
index 6630a58b8c..1702114209 100644
--- a/python/graphstorm/model/__init__.py
+++ b/python/graphstorm/model/__init__.py
@@ -61,7 +61,8 @@
                            LinkPredictWeightedRotatEDecoder,
                            LinkPredictTransEDecoder,
                            LinkPredictContrastiveTransEDecoder,
-                           LinkPredictWeightedTransEDecoder)
+                           LinkPredictWeightedTransEDecoder,
+                           EdgeRegression)
 
 from .gnn_encoder_base import GraphConvEncoder
 
diff --git a/tests/unit-tests/test_decoder.py b/tests/unit-tests/test_decoder.py
index 0a033fc2dc..55bbc2b6f4 100644
--- a/tests/unit-tests/test_decoder.py
+++ b/tests/unit-tests/test_decoder.py
@@ -30,7 +30,10 @@
                               LinkPredictRotatEDecoder,
                               LinkPredictContrastiveRotatEDecoder,
                               LinkPredictTransEDecoder,
-                              LinkPredictContrastiveTransEDecoder)
+                              LinkPredictContrastiveTransEDecoder,
+                              DenseBiDecoder,
+                              EdgeRegression,
+                              MLPEdgeDecoder)
 from graphstorm.dataloading import (BUILTIN_LP_UNIFORM_NEG_SAMPLER,
                                     BUILTIN_LP_JOINT_NEG_SAMPLER)
 from graphstorm.eval.utils import (calc_distmult_pos_score,
@@ -983,6 +986,264 @@ def test_MLPEFeatEdgeDecoder(h_dim, feat_dim, out_dim, num_ffn_layers):
         pred = out.argmax(dim=1)
         assert_almost_equal(prediction.cpu().numpy(), pred.cpu().numpy())
 
+@pytest.mark.parametrize("in_units", [16, 64])
+@pytest.mark.parametrize("num_classes", [4, 8])
+def test_DenseBiDecoder(in_units, num_classes):
+
+    u = th.tensor([0, 0])
+    v = th.tensor([1, 2])
+    edge_type = ("n0", "r0", "n1")
+    g = dgl.heterograph({
+        edge_type: (u, v)
+    })
+
+    h = {
+        "n0": th.ones(g.num_nodes("n0"), in_units),
+        "n1": th.ones(g.num_nodes("n1"), in_units)
+    }
+
+    # Test bias doesn't exist on combine_basis nn.Linear
+    decoder = DenseBiDecoder(
+        in_units=in_units,
+        num_classes=num_classes,
+        multilabel=False,
+        target_etype=edge_type,
+        use_bias=False
+    )
+    assert not decoder.combine_basis.bias
+
+    # Test classification by tricking decoder
+    decoder = DenseBiDecoder(
+        in_units=in_units,
+        num_classes=num_classes,
+        multilabel=False,
+        target_etype=edge_type,
+        use_bias=True
+    )
+
+    assert decoder.in_dims == in_units
+    assert decoder.out_dims == num_classes
+    assert not hasattr(decoder, "regression_head")
+    assert decoder.use_bias
+
+    decoder.eval()
+    with th.no_grad():
+        INCREMENT_VALUE = 10 # Trick the decoder to predict a specific class
+
+        # Test classification when bias = 0
+        TARGET_CLASS = 2
+        th.nn.init.ones_(decoder.basis_para)
+        th.nn.init.ones_(decoder.combine_basis.weight)
+        th.nn.init.zeros_(decoder.combine_basis.bias)
+        decoder.combine_basis.weight[TARGET_CLASS][0] += INCREMENT_VALUE # Trick decoder
+
+        prediction = decoder.predict(g, h)
+        assert th.all(prediction == TARGET_CLASS)
+
+        # Test classification with nonzero bias
+        TARGET_CLASS = 3
+        th.nn.init.ones_(decoder.basis_para)
+        th.nn.init.ones_(decoder.combine_basis.weight)
+        th.nn.init.zeros_(decoder.combine_basis.bias)
+        decoder.combine_basis.bias[TARGET_CLASS] += INCREMENT_VALUE # Trick decoder
+
+        prediction = decoder.predict(g, h)
+        assert th.all(prediction == TARGET_CLASS)
+
+@pytest.mark.parametrize("in_dim", [16, 64])
+@pytest.mark.parametrize("out_dim", [1, 8])
+def test_EdgeRegression(in_dim, out_dim):
+
+    u = th.tensor([0, 0])
+    v = th.tensor([1, 2])
+    edge_type = ("n0", "r0", "n1")
+    g = dgl.heterograph({
+        edge_type: (u, v)
+    })
+
+    h = {
+        "n0": th.ones(g.num_nodes("n0"), in_dim),
+        "n1": th.ones(g.num_nodes("n1"), in_dim)
+    }
+
+    # Test bias doesn't exist on linear layer
+    decoder = EdgeRegression(
+        h_dim=in_dim,
+        out_dim=out_dim,
+        target_etype=edge_type,
+        use_bias=False
+    )
+
+    try:
+        th.nn.init.zeros_(decoder.linear.bias)
+    except AttributeError:
+        pass
+    else:
+        raise AssertionError('Expected no bias.') 
+
+    # Test cases when bias exists (zero and nonzero)
+    decoder = EdgeRegression(
+        h_dim=in_dim,
+        out_dim=out_dim,
+        target_etype=edge_type,
+        use_bias=True
+    )
+
+    assert decoder.in_dims == in_dim
+    assert decoder.out_dims == out_dim
+
+    decoder.eval()
+    with th.no_grad():
+        th.nn.init.eye_(decoder.linear.weight)
+        th.nn.init.eye_(decoder.regression_head.weight)
+        th.nn.init.zeros_(decoder.linear.bias)
+        th.nn.init.zeros_(decoder.regression_head.bias)
+
+        # Test regression output, should be all 1s because of identity matrix weights and 0 bias.
+        prediction = decoder.predict(g, h)
+        assert th.all(prediction == 1)
+
+        # Test non-zero bias, should be all equal to TEST_BIAS_VALUE+1.
+        TEST_BIAS_VALUE = 7
+        th.nn.init.constant_(decoder.linear.bias, TEST_BIAS_VALUE)
+        th.nn.init.eye_(decoder.linear.weight)
+        th.nn.init.eye_(decoder.regression_head.weight)
+        th.nn.init.zeros_(decoder.regression_head.bias)
+
+        prediction = decoder.predict(g, h)
+        assert th.all(prediction == TEST_BIAS_VALUE+1)
+
+@pytest.mark.parametrize("in_dim", [16, 64])
+@pytest.mark.parametrize("out_dim", [4, 8])
+@pytest.mark.parametrize("num_ffn_layers", [0, 2])
+def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
+
+    u = th.tensor([0, 0])
+    v = th.tensor([1, 2])
+    edge_type = ("n0", "r0", "n1")
+    g = dgl.heterograph({
+        edge_type: (u, v)
+    })
+
+    h = {
+        "n0": th.ones(g.num_nodes("n0"), in_dim),
+        "n1": th.ones(g.num_nodes("n1"), in_dim)
+    }
+
+    # Test classification
+    # Test bias doesn't exist on decoder
+    decoder = MLPEdgeDecoder(
+        h_dim=in_dim,
+        out_dim=out_dim,
+        multilabel=False,
+        target_etype=edge_type,
+        num_ffn_layers=num_ffn_layers,
+        use_bias=False
+    )
+    assert not hasattr(decoder, "bias")
+    assert not hasattr(decoder, "regression_head")
+
+    # Test classification by tricking decoder
+    decoder = MLPEdgeDecoder(
+        h_dim=in_dim,
+        out_dim=out_dim,
+        multilabel=False,
+        target_etype=edge_type,
+        num_ffn_layers=num_ffn_layers,
+        use_bias=True
+    )
+
+    assert decoder.in_dims == in_dim
+    assert decoder.out_dims == out_dim
+    assert hasattr(decoder, "bias")
+    assert not hasattr(decoder, "regression_head")
+    assert decoder.use_bias
+
+    decoder.eval()
+    with th.no_grad():
+        INCREMENT_VALUE = 10 # Trick the decoder to predict a specific class
+
+        # Test classification when bias = 0
+        TARGET_CLASS = 2
+        # Set up MLP for testing
+        for layer in decoder.ngnn_mlp.ngnn_gnn:
+            th.nn.init.eye_(layer)
+        th.nn.init.eye_(decoder.decoder)
+        th.nn.init.zeros_(decoder.bias)
+        decoder.decoder[0][TARGET_CLASS] += INCREMENT_VALUE # Trick decoder
+
+        prediction = decoder.predict(g, h)
+        assert th.all(prediction == TARGET_CLASS)
+
+        # Test classification with nonzero bias
+        TARGET_CLASS = 3
+        # Set up MLP for testing
+        for layer in decoder.ngnn_mlp.ngnn_gnn:
+            th.nn.init.eye_(layer)
+        th.nn.init.eye_(decoder.decoder)
+        th.nn.init.zeros_(decoder.bias)
+        decoder.bias[TARGET_CLASS] += INCREMENT_VALUE # Trick decoder
+
+        prediction = decoder.predict(g, h)
+        assert th.all(prediction == TARGET_CLASS)
+    
+
+    # Test regression
+    # Test bias doesn't exist on decoder
+    decoder = MLPEdgeDecoder(
+        h_dim=in_dim,
+        out_dim=out_dim,
+        multilabel=False,
+        target_etype=edge_type,
+        num_ffn_layers=num_ffn_layers,
+        use_bias=False,
+        regression=True
+    )
+    assert not hasattr(decoder, "bias")
+    assert hasattr(decoder, "regression_head")
+
+    decoder = MLPEdgeDecoder(
+        h_dim=in_dim,
+        out_dim=out_dim,
+        multilabel=False,
+        target_etype=edge_type,
+        num_ffn_layers=num_ffn_layers,
+        use_bias=True,
+        regression=True
+    )
+
+    assert decoder.in_dims == in_dim
+    assert decoder.out_dims == 1
+    assert hasattr(decoder, "bias")
+    assert hasattr(decoder, "regression_head")
+    assert decoder.use_bias
+
+    decoder.eval()
+    with th.no_grad():
+        # Test regression output, should be all 1s because of identity matrix weights and 1s tensor input.
+        # Set up MLP for testing
+        for layer in decoder.ngnn_mlp.ngnn_gnn:
+            th.nn.init.eye_(layer)
+        th.nn.init.eye_(decoder.decoder)
+        th.nn.init.zeros_(decoder.bias)
+        th.nn.init.eye_(decoder.regression_head.weight)
+        th.nn.init.zeros_(decoder.regression_head.bias)
+        # Test regression output, should be all 1s because of identity matrix weights and 0 bias.
+        prediction = decoder.predict(g, h)
+        assert th.all(prediction == 1)
+
+        # Test non-zero bias, should be all equal to TEST_BIAS_VALUE+1 because input is 1s.
+        # Set up MLP for testing
+        for layer in decoder.ngnn_mlp.ngnn_gnn:
+            th.nn.init.eye_(layer)
+        th.nn.init.eye_(decoder.decoder)
+        TEST_BIAS_VALUE = 6
+        th.nn.init.eye_(decoder.regression_head.weight)
+        th.nn.init.zeros_(decoder.regression_head.bias)
+        th.nn.init.constant_(decoder.bias, TEST_BIAS_VALUE)
+        prediction = decoder.predict(g, h)
+        assert th.all(prediction == TEST_BIAS_VALUE+1)
+
 @pytest.mark.parametrize("in_dim", [16, 64])
 @pytest.mark.parametrize("out_dim", [1, 8])
 def test_EntityRegression(in_dim, out_dim):
@@ -1086,3 +1347,18 @@ def test_EntityClassifier(in_dim, num_classes):
 
     test_MLPEFeatEdgeDecoder(16,8,2,0)
     test_MLPEFeatEdgeDecoder(16,32,2,2)
+
+    test_DenseBiDecoder(16, 4)
+    test_DenseBiDecoder(16, 8)
+    test_DenseBiDecoder(64, 4)
+    test_DenseBiDecoder(64, 8)
+
+    test_EdgeRegression(16, 1)
+    test_EdgeRegression(16, 8)
+    test_EdgeRegression(64, 1)
+    test_EdgeRegression(64, 8)
+
+    test_MLPEdgeDecoder(16, 4, 0)
+    test_MLPEdgeDecoder(16, 8, 2)
+    test_MLPEdgeDecoder(64, 4, 2)
+    test_MLPEdgeDecoder(64, 8, 0)

From 67cf8f4f6e33a28bc730242c4bbd7248bf8dfb3d Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Wed, 8 Jan 2025 22:06:39 +0000
Subject: [PATCH 06/11] adding test

---
 python/graphstorm/model/edge_decoder.py |   6 +-
 tests/unit-tests/test_decoder.py        | 134 +++++++++++++++++++++++-
 2 files changed, 137 insertions(+), 3 deletions(-)

diff --git a/python/graphstorm/model/edge_decoder.py b/python/graphstorm/model/edge_decoder.py
index bb33b59345..18dfb612fb 100644
--- a/python/graphstorm/model/edge_decoder.py
+++ b/python/graphstorm/model/edge_decoder.py
@@ -806,10 +806,12 @@ def _compute_logits(self, g, h, e_h):
                 The minibatch graph
             h: dict of Tensors
                 The dictionary containing the embeddings
+            e_h: dict of Tensor
+                The input edge embeddings in the format of {(src_ntype, etype, dst_ntype): emb}.
             Returns
             -------
-            th.Tensor
-                Output of forward
+            out: Tensor
+                Output of forward.
         """
         assert e_h is not None, "edge feature is required"
         with g.local_scope():
diff --git a/tests/unit-tests/test_decoder.py b/tests/unit-tests/test_decoder.py
index 55bbc2b6f4..4137f84bca 100644
--- a/tests/unit-tests/test_decoder.py
+++ b/tests/unit-tests/test_decoder.py
@@ -1228,7 +1228,6 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
         th.nn.init.zeros_(decoder.bias)
         th.nn.init.eye_(decoder.regression_head.weight)
         th.nn.init.zeros_(decoder.regression_head.bias)
-        # Test regression output, should be all 1s because of identity matrix weights and 0 bias.
         prediction = decoder.predict(g, h)
         assert th.all(prediction == 1)
 
@@ -1244,6 +1243,139 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
         prediction = decoder.predict(g, h)
         assert th.all(prediction == TEST_BIAS_VALUE+1)
 
+@pytest.mark.parametrize("in_dim", [16, 64])
+@pytest.mark.parametrize("out_dim", [4, 8])
+@pytest.mark.parametrize("feat_dim", [8, 32])
+@pytest.mark.parametrize("num_ffn_layers", [0, 2])
+def test_MLPEFeatEdgeDecoder_hardcoded(in_dim, out_dim, feat_dim, num_ffn_layers):
+
+    def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
+        th.nn.init.eye_(decoder.nn_decoder)
+        th.nn.init.eye_(decoder.feat_decoder)
+        for layer in decoder.ngnn_mlp.ngnn_gnn:
+            th.nn.init.eye_(layer)
+        th.nn.init.eye_(decoder.combine_decoder)
+        th.nn.init.eye_(decoder.decoder)
+
+    u = th.tensor([0, 0])
+    v = th.tensor([1, 2])
+    edge_type = ("n0", "r0", "n1")
+    g = dgl.heterograph({
+        edge_type: (u, v)
+    })
+
+    h = {
+        "n0": th.ones(g.num_nodes("n0"), in_dim),
+        "n1": th.ones(g.num_nodes("n1"), in_dim)
+    }
+
+    efeat = {edge_type: th.ones(g.num_edges(edge_type), feat_dim)}
+
+    # Test classification
+    # Test bias doesn't exist on decoder
+    decoder = MLPEFeatEdgeDecoder(
+        h_dim=in_dim,
+        feat_dim=feat_dim,
+        out_dim=out_dim,
+        multilabel=False,
+        target_etype=edge_type,
+        num_ffn_layers=num_ffn_layers,
+        use_bias=False
+    )
+    assert not hasattr(decoder, "bias")
+    assert not hasattr(decoder, "regression_head")
+
+    # Test classification by tricking decoder
+    decoder = MLPEFeatEdgeDecoder(
+        h_dim=in_dim,
+        feat_dim=feat_dim,
+        out_dim=out_dim,
+        multilabel=False,
+        target_etype=edge_type,
+        num_ffn_layers=num_ffn_layers,
+        use_bias=True
+    )
+
+    assert decoder.in_dims == in_dim
+    assert decoder.out_dims == out_dim
+    assert hasattr(decoder, "bias")
+    assert not hasattr(decoder, "regression_head")
+    assert decoder.use_bias
+
+    decoder.eval()
+    with th.no_grad():
+        INCREMENT_VALUE = 10 # Trick the decoder to predict a specific class
+
+        # Test classification when bias = 0
+        TARGET_CLASS = 2
+        prepareMLPEFeatEdgeDecoder(decoder)
+        decoder.decoder[0][TARGET_CLASS] += INCREMENT_VALUE # Trick decoder
+
+        prediction = decoder.predict(g, h, efeat)
+        assert th.all(prediction == TARGET_CLASS)
+
+        # Test classification with nonzero bias
+        TARGET_CLASS = 3
+        prepareMLPEFeatEdgeDecoder(decoder)
+        th.nn.init.zeros_(decoder.bias)
+        decoder.bias[TARGET_CLASS] += INCREMENT_VALUE # Trick decoder
+
+        prediction = decoder.predict(g, h, efeat)
+        assert th.all(prediction == TARGET_CLASS)
+    
+
+    # Test regression
+    # Test bias doesn't exist on decoder
+    decoder = MLPEFeatEdgeDecoder(
+        h_dim=in_dim,
+        feat_dim=feat_dim,
+        out_dim=out_dim,
+        multilabel=False,
+        target_etype=edge_type,
+        num_ffn_layers=num_ffn_layers,
+        use_bias=False,
+        regression=True
+    )
+    assert not hasattr(decoder, "bias")
+    assert hasattr(decoder, "regression_head")
+
+    decoder = MLPEFeatEdgeDecoder(
+        h_dim=in_dim,
+        feat_dim=feat_dim,
+        out_dim=out_dim,
+        multilabel=False,
+        target_etype=edge_type,
+        num_ffn_layers=num_ffn_layers,
+        use_bias=True,
+        regression=True
+    )
+
+    assert decoder.in_dims == in_dim
+    assert decoder.out_dims == 1
+    assert hasattr(decoder, "bias")
+    assert hasattr(decoder, "regression_head")
+    assert decoder.use_bias
+
+    decoder.eval()
+    with th.no_grad():
+        # Test regression output, should be all 1s because of identity matrix weights and 1s tensor input.
+        prepareMLPEFeatEdgeDecoder(decoder)
+        th.nn.init.zeros_(decoder.bias)
+        th.nn.init.eye_(decoder.regression_head.weight)
+        th.nn.init.zeros_(decoder.regression_head.bias)
+        prediction = decoder.predict(g, h, efeat)
+        assert th.all(prediction == 1)
+
+        # Test non-zero bias, should be all equal to TEST_BIAS_VALUE+1 because input is 1s.
+        # Set up MLP for testing
+        prepareMLPEFeatEdgeDecoder(decoder)
+        TEST_BIAS_VALUE = 6
+        th.nn.init.eye_(decoder.regression_head.weight)
+        th.nn.init.zeros_(decoder.regression_head.bias)
+        th.nn.init.constant_(decoder.bias, TEST_BIAS_VALUE)
+        prediction = decoder.predict(g, h, efeat)
+        assert th.all(prediction == TEST_BIAS_VALUE+1)
+
 @pytest.mark.parametrize("in_dim", [16, 64])
 @pytest.mark.parametrize("out_dim", [1, 8])
 def test_EntityRegression(in_dim, out_dim):

From af31487ff8eb508023e186fc1c84182fb3809f15 Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Wed, 8 Jan 2025 22:09:31 +0000
Subject: [PATCH 07/11] adding tests to main

---
 tests/unit-tests/test_decoder.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/unit-tests/test_decoder.py b/tests/unit-tests/test_decoder.py
index 4137f84bca..d62811faa4 100644
--- a/tests/unit-tests/test_decoder.py
+++ b/tests/unit-tests/test_decoder.py
@@ -1480,6 +1480,11 @@ def test_EntityClassifier(in_dim, num_classes):
     test_MLPEFeatEdgeDecoder(16,8,2,0)
     test_MLPEFeatEdgeDecoder(16,32,2,2)
 
+    test_MLPEFeatEdgeDecoder_hardcoded(16,4,8,0)
+    test_MLPEFeatEdgeDecoder_hardcoded(16,8,32,0)
+    test_MLPEFeatEdgeDecoder_hardcoded(64,8,8,2)
+    test_MLPEFeatEdgeDecoder_hardcoded(64,4,32,2)
+
     test_DenseBiDecoder(16, 4)
     test_DenseBiDecoder(16, 8)
     test_DenseBiDecoder(64, 4)

From addf5e0caf4a997f01d6c45133c3d919c36e2255 Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Wed, 8 Jan 2025 22:27:10 +0000
Subject: [PATCH 08/11] fix tests

---
 tests/unit-tests/test_decoder.py | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/tests/unit-tests/test_decoder.py b/tests/unit-tests/test_decoder.py
index d62811faa4..3028d8330f 100644
--- a/tests/unit-tests/test_decoder.py
+++ b/tests/unit-tests/test_decoder.py
@@ -1024,7 +1024,6 @@ def test_DenseBiDecoder(in_units, num_classes):
     assert decoder.in_dims == in_units
     assert decoder.out_dims == num_classes
     assert not hasattr(decoder, "regression_head")
-    assert decoder.use_bias
 
     decoder.eval()
     with th.no_grad():
@@ -1074,12 +1073,7 @@ def test_EdgeRegression(in_dim, out_dim):
         use_bias=False
     )
 
-    try:
-        th.nn.init.zeros_(decoder.linear.bias)
-    except AttributeError:
-        pass
-    else:
-        raise AssertionError('Expected no bias.') 
+    assert not decoder.linear.bias
 
     # Test cases when bias exists (zero and nonzero)
     decoder = EdgeRegression(
@@ -1169,7 +1163,6 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
         for layer in decoder.ngnn_mlp.ngnn_gnn:
             th.nn.init.eye_(layer)
         th.nn.init.eye_(decoder.decoder)
-        th.nn.init.zeros_(decoder.bias)
         decoder.decoder[0][TARGET_CLASS] += INCREMENT_VALUE # Trick decoder
 
         prediction = decoder.predict(g, h)
@@ -1181,7 +1174,6 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
         for layer in decoder.ngnn_mlp.ngnn_gnn:
             th.nn.init.eye_(layer)
         th.nn.init.eye_(decoder.decoder)
-        th.nn.init.zeros_(decoder.bias)
         decoder.bias[TARGET_CLASS] += INCREMENT_VALUE # Trick decoder
 
         prediction = decoder.predict(g, h)
@@ -1225,7 +1217,6 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
         for layer in decoder.ngnn_mlp.ngnn_gnn:
             th.nn.init.eye_(layer)
         th.nn.init.eye_(decoder.decoder)
-        th.nn.init.zeros_(decoder.bias)
         th.nn.init.eye_(decoder.regression_head.weight)
         th.nn.init.zeros_(decoder.regression_head.bias)
         prediction = decoder.predict(g, h)
@@ -1317,7 +1308,6 @@ def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
         # Test classification with nonzero bias
         TARGET_CLASS = 3
         prepareMLPEFeatEdgeDecoder(decoder)
-        th.nn.init.zeros_(decoder.bias)
         decoder.bias[TARGET_CLASS] += INCREMENT_VALUE # Trick decoder
 
         prediction = decoder.predict(g, h, efeat)
@@ -1360,7 +1350,6 @@ def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
     with th.no_grad():
         # Test regression output, should be all 1s because of identity matrix weights and 1s tensor input.
         prepareMLPEFeatEdgeDecoder(decoder)
-        th.nn.init.zeros_(decoder.bias)
         th.nn.init.eye_(decoder.regression_head.weight)
         th.nn.init.zeros_(decoder.regression_head.bias)
         prediction = decoder.predict(g, h, efeat)

From 49c17af517f413b5874f1588f256ad4ddc485cb8 Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Fri, 10 Jan 2025 03:45:47 +0000
Subject: [PATCH 09/11] address comments

---
 python/graphstorm/model/edge_decoder.py |  16 ++-
 python/graphstorm/model/node_decoder.py |   6 +
 tests/unit-tests/data_utils.py          |  26 ++++
 tests/unit-tests/test_decoder.py        | 167 ++++++++++++------------
 4 files changed, 126 insertions(+), 89 deletions(-)

diff --git a/python/graphstorm/model/edge_decoder.py b/python/graphstorm/model/edge_decoder.py
index 18dfb612fb..b2ece2150d 100644
--- a/python/graphstorm/model/edge_decoder.py
+++ b/python/graphstorm/model/edge_decoder.py
@@ -134,6 +134,9 @@ class DenseBiDecoder(GSEdgeDecoder):
         implementation. Default: None.
     use_bias: bool
         Whether the edge decoder uses a bias parameter. Default: True.
+
+    .. versionchanged:: 0.4.0
+        Add a new argument "use_bias" so users can control whether decoders have bias.
     """
     def __init__(self,
                  in_units,
@@ -337,6 +340,9 @@ class EdgeRegression(GSEdgeDecoder):
         implementation. Default: None.
     use_bias: bool
         Whether the edge decoder uses a bias parameter. Default: True.
+
+    .. versionchanged:: 0.4.0
+        Add a new argument "use_bias" so users can control whether decoders have bias.
     """
     def __init__(self,
                  h_dim,
@@ -510,6 +516,9 @@ class MLPEdgeDecoder(GSEdgeDecoder):
         implementation. Default: None.
     use_bias: bool
         Whether the edge decoder uses a bias parameter. Default: True.
+
+    .. versionchanged:: 0.4.0
+        Add a new argument "use_bias" so users can control whether decoders have bias.
     """
     def __init__(self,
                  h_dim,
@@ -575,8 +584,8 @@ def _compute_logits(self, g, h):
                 The dictionary containing the embeddings
             Returns
             -------
-            th.Tensor
-                Output of forward
+            out
+                Output of forward.
         """
         with g.local_scope():
             u, v = g.edges(etype=self.target_etype)
@@ -724,6 +733,9 @@ class MLPEFeatEdgeDecoder(MLPEdgeDecoder):
         class implementation. Default: None.
     use_bias: bool
         Whether the edge decoder uses a bias parameter. Default: True.
+
+    .. versionchanged:: 0.4.0
+        Add a new argument "use_bias" so users can control whether decoders have bias.
     """
     def __init__(self,
                  h_dim,
diff --git a/python/graphstorm/model/node_decoder.py b/python/graphstorm/model/node_decoder.py
index 4c2f9fd4c9..1e735f664b 100644
--- a/python/graphstorm/model/node_decoder.py
+++ b/python/graphstorm/model/node_decoder.py
@@ -40,6 +40,9 @@ class EntityClassifier(GSLayer):
         implementation. Default: None.
     use_bias: bool
         Whether the node decoder uses a bias parameter. Default: True.
+
+    .. versionchanged:: 0.4.0
+        Add a new argument "use_bias" so users can control whether decoders have bias.
     """
     def __init__(self,
                  in_dim,
@@ -171,6 +174,9 @@ class EntityRegression(GSLayer):
         implementation. Default: None.
     use_bias: bool
         Whether the node decoder uses a bias parameter. Default: True.
+
+    .. versionchanged:: 0.4.0
+        Add a new argument "use_bias" so users can control whether decoders have bias.
     """
     def __init__(self,
                  h_dim,
diff --git a/tests/unit-tests/data_utils.py b/tests/unit-tests/data_utils.py
index 00b2b6be27..8648102975 100644
--- a/tests/unit-tests/data_utils.py
+++ b/tests/unit-tests/data_utils.py
@@ -46,6 +46,32 @@ def generate_mask(idx, length):
     th_mask = th.tensor(mask, dtype=th.bool)
     return th_mask
 
+def generate_dummy_constant_graph(in_units):
+    """
+    Generate a dummy heterogeneous graph to test edge decoder.
+
+    Return
+    -------
+    g: a heterogeneous graph.
+
+    h: node embeddings.
+
+    edge_type: graph schema ("n0", "r0", "n1")
+    """
+    u = th.tensor([0, 0])
+    v = th.tensor([1, 2])
+    edge_type = ("n0", "r0", "n1")
+    g = dgl.heterograph({
+        edge_type: (u, v)
+    })
+
+    h = {
+        "n0": th.ones(g.num_nodes("n0"), in_units),
+        "n1": th.ones(g.num_nodes("n1"), in_units)
+    }
+
+    return g, h, edge_type
+
 def generate_dummy_hetero_graph_for_efeat_gnn(is_random=True):
     """
     generate a dummy heterogeneous graph to test the get_edge_feat_size() method.
diff --git a/tests/unit-tests/test_decoder.py b/tests/unit-tests/test_decoder.py
index 3028d8330f..ea3895f250 100644
--- a/tests/unit-tests/test_decoder.py
+++ b/tests/unit-tests/test_decoder.py
@@ -44,7 +44,7 @@
 
 from numpy.testing import assert_equal
 
-from data_utils import generate_dummy_hetero_graph
+from data_utils import generate_dummy_hetero_graph, generate_dummy_constant_graph
 
 def _check_scores(score, pos_score, neg_scores, etype, num_neg, batch_size):
     # pos scores
@@ -990,80 +990,72 @@ def test_MLPEFeatEdgeDecoder(h_dim, feat_dim, out_dim, num_ffn_layers):
 @pytest.mark.parametrize("num_classes", [4, 8])
 def test_DenseBiDecoder(in_units, num_classes):
 
-    u = th.tensor([0, 0])
-    v = th.tensor([1, 2])
-    edge_type = ("n0", "r0", "n1")
-    g = dgl.heterograph({
-        edge_type: (u, v)
-    })
+    g, h, edge_type = generate_dummy_constant_graph(in_units)
 
-    h = {
-        "n0": th.ones(g.num_nodes("n0"), in_units),
-        "n1": th.ones(g.num_nodes("n1"), in_units)
-    }
-
-    # Test bias doesn't exist on combine_basis nn.Linear
-    decoder = DenseBiDecoder(
-        in_units=in_units,
-        num_classes=num_classes,
-        multilabel=False,
-        target_etype=edge_type,
-        use_bias=False
-    )
-    assert not decoder.combine_basis.bias
-
-    # Test classification by tricking decoder
-    decoder = DenseBiDecoder(
-        in_units=in_units,
-        num_classes=num_classes,
-        multilabel=False,
-        target_etype=edge_type,
-        use_bias=True
-    )
-
-    assert decoder.in_dims == in_units
-    assert decoder.out_dims == num_classes
-    assert not hasattr(decoder, "regression_head")
-
-    decoder.eval()
     with th.no_grad():
-        INCREMENT_VALUE = 10 # Trick the decoder to predict a specific class
-
-        # Test classification when bias = 0
+        # We trick the decoder to predict TARGET_CLASS by adding 
+        # INCREMENT_VALUE at TARGET_CLASS' index in the combine_basis weight matrix.
+        # Because basis matrices are set to identity, the output of the bases are all 
+        # equal. So, by modifying the weight of combine_basis, we trick the decoder
+        # to force it to predict TARGET_CLASS. 
+        INCREMENT_VALUE = 100
         TARGET_CLASS = 2
-        th.nn.init.ones_(decoder.basis_para)
-        th.nn.init.ones_(decoder.combine_basis.weight)
-        th.nn.init.zeros_(decoder.combine_basis.bias)
+        decoder = DenseBiDecoder(
+            in_units=in_units,
+            num_classes=num_classes,
+            multilabel=False,
+            target_etype=edge_type,
+            use_bias=False
+        )
+        decoder.eval()
+
+        # Test bias doesn't exist on combine_basis nn.Linear
+        assert decoder.combine_basis.bias is None
+
+        for i in range(decoder.num_basis):
+            decoder.basis_para[i, :, :] = th.eye(in_units)
+        th.nn.init.eye_(decoder.combine_basis.weight)
         decoder.combine_basis.weight[TARGET_CLASS][0] += INCREMENT_VALUE # Trick decoder
 
         prediction = decoder.predict(g, h)
         assert th.all(prediction == TARGET_CLASS)
+        assert prediction.shape[0] == g.num_edges()
+
 
         # Test classification with nonzero bias
+        # Same approach as above, but this time we modify the bias instead of combine_basis 
+        # to force the decoder to predict TARGET_CLASS.
         TARGET_CLASS = 3
-        th.nn.init.ones_(decoder.basis_para)
-        th.nn.init.ones_(decoder.combine_basis.weight)
+        decoder = DenseBiDecoder(
+            in_units=in_units,
+            num_classes=num_classes,
+            multilabel=False,
+            target_etype=edge_type,
+            use_bias=True
+        )
+        decoder.eval()
+
+        assert decoder.combine_basis.bias is not None
+
+        assert decoder.in_dims == in_units
+        assert decoder.out_dims == num_classes
+        assert not hasattr(decoder, "regression_head")
+
+        for i in range(decoder.num_basis):
+            decoder.basis_para[i, :, :] = th.eye(in_units)
+        th.nn.init.eye_(decoder.combine_basis.weight)
         th.nn.init.zeros_(decoder.combine_basis.bias)
         decoder.combine_basis.bias[TARGET_CLASS] += INCREMENT_VALUE # Trick decoder
 
         prediction = decoder.predict(g, h)
         assert th.all(prediction == TARGET_CLASS)
+        assert prediction.shape[0] == g.num_edges()
 
 @pytest.mark.parametrize("in_dim", [16, 64])
 @pytest.mark.parametrize("out_dim", [1, 8])
 def test_EdgeRegression(in_dim, out_dim):
 
-    u = th.tensor([0, 0])
-    v = th.tensor([1, 2])
-    edge_type = ("n0", "r0", "n1")
-    g = dgl.heterograph({
-        edge_type: (u, v)
-    })
-
-    h = {
-        "n0": th.ones(g.num_nodes("n0"), in_dim),
-        "n1": th.ones(g.num_nodes("n1"), in_dim)
-    }
+    g, h, edge_type = generate_dummy_constant_graph(in_dim)
 
     # Test bias doesn't exist on linear layer
     decoder = EdgeRegression(
@@ -1073,7 +1065,7 @@ def test_EdgeRegression(in_dim, out_dim):
         use_bias=False
     )
 
-    assert not decoder.linear.bias
+    assert decoder.linear.bias is None
 
     # Test cases when bias exists (zero and nonzero)
     decoder = EdgeRegression(
@@ -1085,9 +1077,12 @@ def test_EdgeRegression(in_dim, out_dim):
 
     assert decoder.in_dims == in_dim
     assert decoder.out_dims == out_dim
+    assert decoder.linear.bias is not None
 
     decoder.eval()
     with th.no_grad():
+        # By setting weights to identity matrices and bias to 0, we should get all 1s
+        # in the output because our inputs are all 1s. 
         th.nn.init.eye_(decoder.linear.weight)
         th.nn.init.eye_(decoder.regression_head.weight)
         th.nn.init.zeros_(decoder.linear.bias)
@@ -1096,8 +1091,9 @@ def test_EdgeRegression(in_dim, out_dim):
         # Test regression output, should be all 1s because of identity matrix weights and 0 bias.
         prediction = decoder.predict(g, h)
         assert th.all(prediction == 1)
+        assert prediction.shape == (g.num_edges(), out_dim)
 
-        # Test non-zero bias, should be all equal to TEST_BIAS_VALUE+1.
+        # Test non-zero bias, the output should be all equal to TEST_BIAS_VALUE+1.
         TEST_BIAS_VALUE = 7
         th.nn.init.constant_(decoder.linear.bias, TEST_BIAS_VALUE)
         th.nn.init.eye_(decoder.linear.weight)
@@ -1106,23 +1102,14 @@ def test_EdgeRegression(in_dim, out_dim):
 
         prediction = decoder.predict(g, h)
         assert th.all(prediction == TEST_BIAS_VALUE+1)
+        assert prediction.shape == (g.num_edges(), out_dim)
 
 @pytest.mark.parametrize("in_dim", [16, 64])
 @pytest.mark.parametrize("out_dim", [4, 8])
 @pytest.mark.parametrize("num_ffn_layers", [0, 2])
 def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
 
-    u = th.tensor([0, 0])
-    v = th.tensor([1, 2])
-    edge_type = ("n0", "r0", "n1")
-    g = dgl.heterograph({
-        edge_type: (u, v)
-    })
-
-    h = {
-        "n0": th.ones(g.num_nodes("n0"), in_dim),
-        "n1": th.ones(g.num_nodes("n1"), in_dim)
-    }
+    g, h, edge_type = generate_dummy_constant_graph(in_dim)
 
     # Test classification
     # Test bias doesn't exist on decoder
@@ -1137,7 +1124,6 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
     assert not hasattr(decoder, "bias")
     assert not hasattr(decoder, "regression_head")
 
-    # Test classification by tricking decoder
     decoder = MLPEdgeDecoder(
         h_dim=in_dim,
         out_dim=out_dim,
@@ -1155,7 +1141,12 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
 
     decoder.eval()
     with th.no_grad():
-        INCREMENT_VALUE = 10 # Trick the decoder to predict a specific class
+        # We trick the decoder to predict TARGET_CLASS by adding 
+        # INCREMENT_VALUE at TARGET_CLASS' index in the decoder weight matrix.
+        # Because all layers of the MLP are set to identity, the outputs will
+        # be the same as the inputs, all 1s. So, by modifying the weight of the 
+        # decoder, we force it to predict TARGET_CLASS. 
+        INCREMENT_VALUE = 10
 
         # Test classification when bias = 0
         TARGET_CLASS = 2
@@ -1167,6 +1158,7 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
 
         prediction = decoder.predict(g, h)
         assert th.all(prediction == TARGET_CLASS)
+        assert prediction.shape[0] == g.num_edges()
 
         # Test classification with nonzero bias
         TARGET_CLASS = 3
@@ -1178,6 +1170,7 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
 
         prediction = decoder.predict(g, h)
         assert th.all(prediction == TARGET_CLASS)
+        assert prediction.shape[0] == g.num_edges()
     
 
     # Test regression
@@ -1221,6 +1214,7 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
         th.nn.init.zeros_(decoder.regression_head.bias)
         prediction = decoder.predict(g, h)
         assert th.all(prediction == 1)
+        assert prediction.shape == (g.num_edges(), 1)
 
         # Test non-zero bias, should be all equal to TEST_BIAS_VALUE+1 because input is 1s.
         # Set up MLP for testing
@@ -1233,12 +1227,13 @@ def test_MLPEdgeDecoder(in_dim, out_dim, num_ffn_layers):
         th.nn.init.constant_(decoder.bias, TEST_BIAS_VALUE)
         prediction = decoder.predict(g, h)
         assert th.all(prediction == TEST_BIAS_VALUE+1)
+        assert prediction.shape == (g.num_edges(), 1)
 
 @pytest.mark.parametrize("in_dim", [16, 64])
 @pytest.mark.parametrize("out_dim", [4, 8])
 @pytest.mark.parametrize("feat_dim", [8, 32])
 @pytest.mark.parametrize("num_ffn_layers", [0, 2])
-def test_MLPEFeatEdgeDecoder_hardcoded(in_dim, out_dim, feat_dim, num_ffn_layers):
+def test_MLPEFeatDecoder_Constant_Inputs(in_dim, out_dim, feat_dim, num_ffn_layers):
 
     def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
         th.nn.init.eye_(decoder.nn_decoder)
@@ -1248,17 +1243,7 @@ def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
         th.nn.init.eye_(decoder.combine_decoder)
         th.nn.init.eye_(decoder.decoder)
 
-    u = th.tensor([0, 0])
-    v = th.tensor([1, 2])
-    edge_type = ("n0", "r0", "n1")
-    g = dgl.heterograph({
-        edge_type: (u, v)
-    })
-
-    h = {
-        "n0": th.ones(g.num_nodes("n0"), in_dim),
-        "n1": th.ones(g.num_nodes("n1"), in_dim)
-    }
+    g, h, edge_type = generate_dummy_constant_graph(in_dim)
 
     efeat = {edge_type: th.ones(g.num_edges(edge_type), feat_dim)}
 
@@ -1276,7 +1261,6 @@ def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
     assert not hasattr(decoder, "bias")
     assert not hasattr(decoder, "regression_head")
 
-    # Test classification by tricking decoder
     decoder = MLPEFeatEdgeDecoder(
         h_dim=in_dim,
         feat_dim=feat_dim,
@@ -1295,6 +1279,11 @@ def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
 
     decoder.eval()
     with th.no_grad():
+        # We trick the decoder to predict TARGET_CLASS by adding 
+        # INCREMENT_VALUE at TARGET_CLASS' index in the decoder weight matrix.
+        # Because all layers of the MLP, edge feature decoder, and nn decoder 
+        # are set to identity, the outputs will be the same as the inputs, all 1s. 
+        # So, by modifying the weight of the decoder, we force it to predict TARGET_CLASS. 
         INCREMENT_VALUE = 10 # Trick the decoder to predict a specific class
 
         # Test classification when bias = 0
@@ -1304,6 +1293,7 @@ def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
 
         prediction = decoder.predict(g, h, efeat)
         assert th.all(prediction == TARGET_CLASS)
+        assert prediction.shape[0] == g.num_edges()
 
         # Test classification with nonzero bias
         TARGET_CLASS = 3
@@ -1312,6 +1302,7 @@ def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
 
         prediction = decoder.predict(g, h, efeat)
         assert th.all(prediction == TARGET_CLASS)
+        assert prediction.shape[0] == g.num_edges()
     
 
     # Test regression
@@ -1354,6 +1345,7 @@ def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
         th.nn.init.zeros_(decoder.regression_head.bias)
         prediction = decoder.predict(g, h, efeat)
         assert th.all(prediction == 1)
+        assert prediction.shape == (g.num_edges(), 1)
 
         # Test non-zero bias, should be all equal to TEST_BIAS_VALUE+1 because input is 1s.
         # Set up MLP for testing
@@ -1364,6 +1356,7 @@ def prepareMLPEFeatEdgeDecoder(decoder: MLPEFeatEdgeDecoder):
         th.nn.init.constant_(decoder.bias, TEST_BIAS_VALUE)
         prediction = decoder.predict(g, h, efeat)
         assert th.all(prediction == TEST_BIAS_VALUE+1)
+        assert prediction.shape == (g.num_edges(), 1)
 
 @pytest.mark.parametrize("in_dim", [16, 64])
 @pytest.mark.parametrize("out_dim", [1, 8])
@@ -1469,10 +1462,10 @@ def test_EntityClassifier(in_dim, num_classes):
     test_MLPEFeatEdgeDecoder(16,8,2,0)
     test_MLPEFeatEdgeDecoder(16,32,2,2)
 
-    test_MLPEFeatEdgeDecoder_hardcoded(16,4,8,0)
-    test_MLPEFeatEdgeDecoder_hardcoded(16,8,32,0)
-    test_MLPEFeatEdgeDecoder_hardcoded(64,8,8,2)
-    test_MLPEFeatEdgeDecoder_hardcoded(64,4,32,2)
+    test_MLPEFeatDecoder_Constant_Inputs(16,4,8,0)
+    test_MLPEFeatDecoder_Constant_Inputs(16,8,32,0)
+    test_MLPEFeatDecoder_Constant_Inputs(64,8,8,2)
+    test_MLPEFeatDecoder_Constant_Inputs(64,4,32,2)
 
     test_DenseBiDecoder(16, 4)
     test_DenseBiDecoder(16, 8)

From 8e7e40c975966c9fa3c220ea6615ef25756b38ae Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Fri, 10 Jan 2025 06:26:00 +0000
Subject: [PATCH 10/11] fix test

---
 python/graphstorm/model/edge_decoder.py | 3 ---
 tests/unit-tests/test_gsf.py            | 5 +++++
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/python/graphstorm/model/edge_decoder.py b/python/graphstorm/model/edge_decoder.py
index b2ece2150d..8b61ac231f 100644
--- a/python/graphstorm/model/edge_decoder.py
+++ b/python/graphstorm/model/edge_decoder.py
@@ -340,9 +340,6 @@ class EdgeRegression(GSEdgeDecoder):
         implementation. Default: None.
     use_bias: bool
         Whether the edge decoder uses a bias parameter. Default: True.
-
-    .. versionchanged:: 0.4.0
-        Add a new argument "use_bias" so users can control whether decoders have bias.
     """
     def __init__(self,
                  h_dim,
diff --git a/tests/unit-tests/test_gsf.py b/tests/unit-tests/test_gsf.py
index e692777bc6..f48ae9e2f4 100644
--- a/tests/unit-tests/test_gsf.py
+++ b/tests/unit-tests/test_gsf.py
@@ -205,6 +205,7 @@ def test_create_builtin_edge_decoder():
             "num_ffn_layers_in_decoder": 0,
             "multilabel_weights": None,
             "imbalance_class_weights": None,
+            "decoder_bias": True,
         }
     )
     decoder, loss_func = create_builtin_edge_decoder(g, decoder_input_dim, config, train_task)
@@ -225,6 +226,7 @@ def test_create_builtin_edge_decoder():
             "num_ffn_layers_in_decoder": 0,
             "alpha": None,
             "gamma": None,
+            "decoder_bias": True,
         }
     )
     decoder, loss_func = create_builtin_edge_decoder(g, decoder_input_dim, config, train_task)
@@ -246,6 +248,7 @@ def test_create_builtin_edge_decoder():
             "num_ffn_layers_in_decoder": 0,
             "alpha": 0.3,
             "gamma": 3.,
+            "decoder_bias": True,
         }
     )
     decoder, loss_func = create_builtin_edge_decoder(g, decoder_input_dim, config, train_task)
@@ -263,6 +266,7 @@ def test_create_builtin_edge_decoder():
             "decoder_type": "DenseBiDecoder",
             "num_decoder_basis": 2,
             "decoder_norm": None,
+            "decoder_bias": False,
         }
     )
     decoder, loss_func = create_builtin_edge_decoder(g, decoder_input_dim, config, train_task)
@@ -278,6 +282,7 @@ def test_create_builtin_edge_decoder():
             "decoder_type": "MLPDecoder",
             "num_ffn_layers_in_decoder": 0,
             "decoder_norm": None,
+            "decoder_bias": False,
         }
     )
     decoder, loss_func = create_builtin_edge_decoder(g, decoder_input_dim, config, train_task)

From e89fc5cef2b0ba9dd59e5c4441758930f3e1ad11 Mon Sep 17 00:00:00 2001
From: Ronald Xu <ronaldxu@amazon.com>
Date: Fri, 10 Jan 2025 07:14:20 +0000
Subject: [PATCH 11/11] address comment

---
 python/graphstorm/model/edge_decoder.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/graphstorm/model/edge_decoder.py b/python/graphstorm/model/edge_decoder.py
index 8b61ac231f..bbc30a7f3c 100644
--- a/python/graphstorm/model/edge_decoder.py
+++ b/python/graphstorm/model/edge_decoder.py
@@ -581,7 +581,7 @@ def _compute_logits(self, g, h):
                 The dictionary containing the embeddings
             Returns
             -------
-            out
+            out: th.Tensor
                 Output of forward.
         """
         with g.local_scope():