Merge pull request #245 from ljwolf/lee_bugs

Bug fixes across Lee and swap to survival function for #243
pysal · May 26, 2023 · 7f3b9cf · 7f3b9cf
2 parents 165e139 + fe407b2
commit 7f3b9cf
Show file tree

Hide file tree

Showing 17 changed files with 197 additions and 16 deletions.
diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml
@@ -37,6 +37,9 @@
            - ci/310-DEV.yaml
            - ci/310-numba-DEV.yaml
            - ci/311.yaml
+           - ci/311-numba.yaml
+           - ci/311-DEV.yaml
+           - ci/311-numba-DEV.yaml
          include:
            - environment-file: ci/310-numba.yaml
              os: macos-latest

diff --git a/ci/310-DEV.yaml b/ci/310-DEV.yaml
@@ -13,6 +13,7 @@ dependencies:
   - pytest
   - pytest-cov
   - pytest-xdist
+  - rtree
   # optional
   - geopandas>=0.12.0
   - shapely>=2
diff --git a/ci/310-numba-DEV.yaml b/ci/310-numba-DEV.yaml
@@ -14,6 +14,7 @@ dependencies:
   - pytest
   - pytest-cov
   - pytest-xdist
+  - rtree
   # optional
   - geopandas>=0.12.0
   - shapely>=2
diff --git a/ci/310.yaml b/ci/310.yaml
@@ -14,6 +14,7 @@ dependencies:
   - pytest
   - pytest-cov
   - pytest-xdist
+  - rtree
   # optional
   - geopandas>=0.7.0
   - shapely>=2
diff --git a/ci/311-DEV.yaml b/ci/311-DEV.yaml
@@ -0,0 +1,28 @@
+name: test
+channels:
+  - conda-forge
+dependencies:
+  - python=3.11
+  # required
+  - geos
+  - pandas>=1.0
+  - scikit-learn
+  - scipy>=1.0
+  # testing
+  - codecov
+  - matplotlib
+  - pytest
+  - pytest-cov
+  - pytest-xdist
+  - rtree
+  # doc build
+  - nbsphinx
+  - numpydoc
+  - sphinx
+  - sphinxcontrib-bibtex
+  - sphinxcontrib-napoleon
+  - sphinx_bootstrap_theme
+  - pip:
+    - git+https://github.com/pysal/libpysal@main
+    - git+https://github.com/geopandas/geopandas@main
+    - git+https://github.com/toblerity/shapely@main
diff --git a/ci/311-numba-DEV.yaml b/ci/311-numba-DEV.yaml
@@ -0,0 +1,30 @@
+name: test
+channels:
+  - conda-forge
+dependencies:
+  - python=3.11
+  # required
+  - geos
+  - pandas>=1.0
+  - scikit-learn
+  - scipy>=1.0
+  - numba
+  # testing
+  - codecov
+  - matplotlib
+  - pytest
+  - pytest-cov
+  - pytest-xdist
+  - rtree
+  # optional
+  # doc build
+  - nbsphinx
+  - numpydoc
+  - sphinx
+  - sphinxcontrib-bibtex
+  - sphinxcontrib-napoleon
+  - sphinx_bootstrap_theme
+  - pip:
+    - git+https://github.com/pysal/libpysal@main
+    - git+https://github.com/geopandas/geopandas@main
+    - git+https://github.com/toblerity/shapely@main
diff --git a/ci/311-numba.yaml b/ci/311-numba.yaml
@@ -0,0 +1,28 @@
+name: test
+channels:
+  - conda-forge
+dependencies:
+  - python=3.11
+  # required
+  - libpysal
+  - pandas>=1.0
+  - scikit-learn
+  - scipy>=1.0
+  - numba
+  # testing
+  - codecov
+  - matplotlib
+  - pytest
+  - pytest-cov
+  - pytest-xdist
+  - rtree
+  # optional
+  - geopandas>=0.7.0
+  - shapely>=2.0
+  # doc build
+  - nbsphinx
+  - numpydoc
+  - sphinx
+  - sphinxcontrib-bibtex
+  - sphinxcontrib-napoleon
+  - sphinx_bootstrap_theme
diff --git a/ci/311.yaml b/ci/311.yaml
@@ -14,6 +14,7 @@ dependencies:
   - pytest
   - pytest-cov
   - pytest-xdist
+  - rtree
   # optional
   - geopandas>=0.7.0
   - shapely>=2

diff --git a/ci/38-numba.yaml b/ci/38-numba.yaml
@@ -15,6 +15,7 @@ dependencies:
   - pytest
   - pytest-cov
   - pytest-xdist
+  - rtree
   # optional
   - geopandas>=0.7.0
   - shapely>=2
diff --git a/ci/38.yaml b/ci/38.yaml
@@ -14,6 +14,7 @@ dependencies:
   - pytest
   - pytest-cov
   - pytest-xdist
+  - rtree
   # optional
   - geopandas>=0.7.0
   - shapely>=2
diff --git a/ci/39-numba.yaml b/ci/39-numba.yaml
@@ -15,6 +15,7 @@ dependencies:
   - pytest
   - pytest-cov
   - pytest-xdist
+  - rtree
   # optional
   - geopandas>=0.7.0
   - shapely>=2
diff --git a/ci/39.yaml b/ci/39.yaml
@@ -14,6 +14,7 @@ dependencies:
   - pytest
   - pytest-cov
   - pytest-xdist
+  - rtree
   # optional
   - geopandas>=0.7.0
   - shapely>=2
diff --git a/esda/geary.py b/esda/geary.py
@@ -127,8 +127,8 @@ def __init__(self, y, w, transformation="r", permutations=999):
         self.z_norm = de / self.seC_norm
         self.z_rand = de / self.seC_rand
         if de > 0:
-            self.p_norm = 1 - stats.norm.cdf(self.z_norm)
-            self.p_rand = 1 - stats.norm.cdf(self.z_rand)
+            self.p_norm = stats.norm.sf(self.z_norm)
+            self.p_rand = stats.norm.sf(self.z_rand)
         else:
             self.p_norm = stats.norm.cdf(self.z_norm)
             self.p_rand = stats.norm.cdf(self.z_rand)
@@ -147,7 +147,7 @@ def __init__(self, y, w, transformation="r", permutations=999):
             self.seC_sim = np.array(sim).std()
             self.VC_sim = self.seC_sim**2
             self.z_sim = (self.C - self.EC_sim) / self.seC_sim
-            self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim))
+            self.p_z_sim = stats.norm.sf(np.abs(self.z_sim))
 
     @property
     def _statistic(self):

diff --git a/esda/getisord.py b/esda/getisord.py
@@ -412,7 +412,7 @@ def __init__(
         self.permutations = permutations
         self.star = star
         self.calc()
-        self.p_norm = 1 - stats.norm.cdf(np.abs(self.Zs))
+        self.p_norm = stats.norm.sf(np.abs(self.Zs))
         if permutations:
             self.p_sim, self.rGs = _crand_plus(
                 y,
@@ -432,7 +432,7 @@ def __init__(
                 self.seG_sim = sim.std(axis=0)
                 self.VG_sim = self.seG_sim * self.seG_sim
                 self.z_sim = (self.Gs - self.EG_sim) / self.seG_sim
-                self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim))
+                self.p_z_sim = stats.norm.sf(np.abs(self.z_sim))
 
     def __crand(self, keep_simulations):
         y = self.y

diff --git a/esda/lee.py b/esda/lee.py
@@ -189,7 +189,7 @@ def fit(self, x, y):
         Z = numpy.column_stack((x, y))
 
         standard_connectivity = sparse.csc_matrix(
-            self.connectivity / self.connectivity.sum(axis=1)
+            self.connectivity / self.connectivity.sum(axis=1).reshape(-1, 1)
         )
 
         n, _ = x.shape
@@ -221,11 +221,11 @@ def fit(self, x, y):
                 random_neighbor_y = y[random_neighbors]
 
                 self.reference_distribution_[i] = (
-                    (weight * random_neighbor_y - y.mean()).sum(axis=1).squeeze()
-                )
+                    (weight * random_neighbor_y).sum(axis=1) - y.mean()
+                ).squeeze()
                 self.reference_distribution_[i] *= (
-                    (weight * random_neighbor_x - x.mean()).sum(axis=1).squeeze()
-                )
+                    (weight * random_neighbor_x).sum(axis=1) - x.mean()
+                ).squeeze()
             above = self.reference_distribution_ >= self.associations_.reshape(-1, 1)
             larger = above.sum(axis=1)
             extreme = numpy.minimum(larger, self.permutations - larger)

diff --git a/esda/moran.py b/esda/moran.py
@@ -168,8 +168,8 @@ def __init__(
         self.z_rand = (self.I - self.EI) / self.seI_rand
 
         if self.z_norm > 0:
-            self.p_norm = 1 - stats.norm.cdf(self.z_norm)
-            self.p_rand = 1 - stats.norm.cdf(self.z_rand)
+            self.p_norm = stats.norm.sf(self.z_norm)
+            self.p_rand = stats.norm.sf(self.z_rand)
         else:
             self.p_norm = stats.norm.cdf(self.z_norm)
             self.p_rand = stats.norm.cdf(self.z_rand)
@@ -193,7 +193,7 @@ def __init__(
             self.VI_sim = self.seI_sim**2
             self.z_sim = (self.I - self.EI_sim) / self.seI_sim
             if self.z_sim > 0:
-                self.p_z_sim = 1 - stats.norm.cdf(self.z_sim)
+                self.p_z_sim = stats.norm.sf(self.z_sim)
             else:
                 self.p_z_sim = stats.norm.cdf(self.z_sim)
 
@@ -424,7 +424,7 @@ def __init__(self, x, y, w, transformation="r", permutations=PERMUTATIONS):
             self.VI_sim = self.seI_sim**2
             self.z_sim = (self.I - self.EI_sim) / self.seI_sim
             if self.z_sim > 0:
-                self.p_z_sim = 1 - stats.norm.cdf(self.z_sim)
+                self.p_z_sim = stats.norm.sf(self.z_sim)
             else:
                 self.p_z_sim = stats.norm.cdf(self.z_sim)
 
@@ -1056,7 +1056,7 @@ def __init__(
                 self.seI_sim = self.sim.std(axis=0)
                 self.VI_sim = self.seI_sim * self.seI_sim
                 self.z_sim = (self.Is - self.EI_sim) / self.seI_sim
-                self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim))
+                self.p_z_sim = stats.norm.sf(np.abs(self.z_sim))
             else:
                 self.sim = self.rlisas = None
                 self.EI_sim = np.nan
@@ -1361,7 +1361,7 @@ def __init__(
                 self.seI_sim = sim.std(axis=0)
                 self.VI_sim = self.seI_sim * self.seI_sim
                 self.z_sim = (self.Is - self.EI_sim) / self.seI_sim
-                self.p_z_sim = 1 - stats.norm.cdf(np.abs(self.z_sim))
+                self.p_z_sim = stats.norm.sf(np.abs(self.z_sim))
 
     def __calc(self, w, zx, zy):
         zly = slag(w, zy)

diff --git a/esda/tests/test_lee.py b/esda/tests/test_lee.py
@@ -0,0 +1,84 @@
+import unittest
+import libpysal
+import geopandas
+from libpysal.common import pandas, RTOL, ATOL
+from .. import lee
+import numpy
+
+
+PANDAS_EXTINCT = pandas is None
+
+class Lee_Tester(unittest.TestCase):
+    def setUp(self):
+        self.data = geopandas.read_file(libpysal.examples.get_path("columbus.shp"))
+        self.w = libpysal.weights.Queen.from_dataframe(self.data)
+        self.w.transform = 'r'
+        self.x = self.data[['HOVAL']].values
+        self.y = self.data[['CRIME']].values
+
+    def test_global(self):
+        numpy.random.seed(2478879)
+        result = lee.Spatial_Pearson(connectivity=self.w.sparse).fit(self.x, self.y)
+        known = numpy.array([[ 0.30136527, -0.23625603],
+                             [-0.23625603,  0.53512008]])
+        numpy.testing.assert_allclose(known,result.association_, rtol=RTOL, atol=ATOL)
+        numpy.testing.assert_array_equal(result.reference_distribution_.shape, (999,2,2))
+        first_rep = numpy.array([[ 0.22803705, -0.08053692],
+                                 [-0.08053692,  0.18897318]])
+
+        second_rep = numpy.array([[ 0.14179274, -0.06962692],
+                                  [-0.06962692,  0.13688337]])
+        numpy.testing.assert_allclose(first_rep, result.reference_distribution_[0],
+                                      rtol=RTOL, atol=ATOL)
+        numpy.testing.assert_allclose(second_rep, result.reference_distribution_[1],
+                                      rtol=RTOL, atol=ATOL)
+
+        known_significance = numpy.array([[0.125, 0.026],
+                                          [0.026, 0.001]])
+        numpy.testing.assert_allclose(known_significance, result.significance_, 
+                                      rtol=RTOL, atol=ATOL)
+
+    def test_local(self):
+        numpy.random.seed(2478879)
+        result = lee.Spatial_Pearson_Local(connectivity=self.w.sparse).fit(self.x, self.y)
+        known_locals = numpy.array([ 0.10246023, -0.24169198, -0.1308714 ,  
+                                     0.00895543, -0.16080899, -0.00950808, 
+                                     -0.14615398, -0.0627634 ,  0.00661232, 
+                                     -0.42354628, -0.73121006,  0.02060548,  
+                                     0.05187356,  0.06515283, -0.64400723,
+                                    -0.37489818, -2.06573667, -0.10931854,  
+                                    0.50823848, -0.06338637, -0.10559429,  
+                                    0.03282849, -0.86618915, -0.62333825, 
+                                    -0.40910044,-0.41866868, -0.00702983, 
+                                    -0.4246288 , -0.52142507, -0.22481772,
+                                    0.1931263 , -1.39355214,  0.02036755,  
+                                    0.22896308, -0.00240854, -0.30405211, 
+                                    -0.66950406, -0.21481868, -0.60320158, 
+                                    -0.38117303, -0.45584563,  0.32019362, 
+                                    -0.02818729, -0.02214172,  0.05587915,
+                                    0.0295999 , -0.78818135,  0.16854472,  
+                                    0.2378127 ])
+        numpy.testing.assert_allclose(known_locals, result.associations_, 
+                                      rtol=RTOL, atol=ATOL)
+        significances = numpy.array([0.154, 0.291, 0.358, 0.231, 0.146, 
+                                     0.335, 0.325, 0.388, 0.244, 0.111, 
+                                     0.019, 0.165, 0.136, 0.073, 0.014, 
+                                     0.029, 0.002, 0.376, 0.003, 0.265, 
+                                     0.449, 0.121, 0.072, 0.006, 0.036, 
+                                     0.06 , 0.355, 0.01 , 0.017, 0.168, 
+                                     0.022, 0.003, 0.217, 0.016, 0.337, 
+                                     0.137, 0.015, 0.128, 0.11 , 0.09 , 
+                                     0.168, 0.031, 0.457, 0.44 , 0.141,
+                                     0.249, 0.158, 0.018, 0.031])
+        numpy.testing.assert_allclose(significances, result.significance_,
+                                      rtol=RTOL, atol=ATOL)
+
+suite = unittest.TestSuite()
+test_classes = [Lee_Tester] 
+for i in test_classes:
+    a = unittest.TestLoader().loadTestsFromTestCase(i)
+    suite.addTest(a)
+
+if __name__ == '__main__':
+    runner = unittest.TextTestRunner()
+    runner.run(suite)