Skip to content

Commit

Permalink
Use a naive sparse histogram.
Browse files Browse the repository at this point in the history
  • Loading branch information
delucchi-cmu committed Dec 20, 2024
1 parent 0cd372a commit 56e9c90
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 50 deletions.
76 changes: 45 additions & 31 deletions src/hats/pixel_math/sparse_histogram.py
Original file line number Diff line number Diff line change
@@ -1,50 +1,37 @@
"""Sparse 1-D histogram of healpix pixel counts."""

import numpy as np
from scipy.sparse import csc_array, load_npz, save_npz, sparray

import hats.pixel_math.healpix_shim as hp


class SparseHistogram:
"""Wrapper around scipy's sparse array."""
"""Wrapper around a naive sparse array, that is just non-zero indexes and counts."""

def __init__(self, sparse_array):
if not isinstance(sparse_array, sparray):
raise ValueError("The sparse array must be a scipy sparse array.")
if sparse_array.format != "csc":
raise ValueError("The sparse array must be a Compressed Sparse Column array.")
self.sparse_array = sparse_array

def add(self, other):
"""Add in another sparse histogram, updating this wrapper's array.
Args:
other (SparseHistogram): the wrapper containing the addend
"""
if not isinstance(other, SparseHistogram):
raise ValueError("Both addends should be SparseHistogram.")
if self.sparse_array.shape != other.sparse_array.shape:
raise ValueError(
"The histogram partials have incompatible sizes due to different healpix orders."
)
self.sparse_array += other.sparse_array
def __init__(self, indexes, counts, order):
if len(indexes) != len(counts):
raise ValueError("indexes and counts must be same length")

Check warning on line 13 in src/hats/pixel_math/sparse_histogram.py

View check run for this annotation

Codecov / codecov/patch

src/hats/pixel_math/sparse_histogram.py#L13

Added line #L13 was not covered by tests
self.indexes = indexes
self.counts = counts
self.order = order

def to_array(self):
"""Convert the sparse array to a dense numpy array.
Returns:
dense 1-d numpy array.
"""
return self.sparse_array.toarray()[0]
dense = np.zeros(hp.order2npix(self.order), dtype=np.int64)
dense[self.indexes] = self.counts
return dense

def to_file(self, file_name):
"""Persist the sparse array to disk.
NB: this saves as a sparse array, and so will likely have lower space requirements
than saving the corresponding dense 1-d numpy array.
"""
save_npz(file_name, self.sparse_array)
np.savez(file_name, indexes=self.indexes, counts=self.counts, order=self.order)

def to_dense_file(self, file_name):
"""Persist the DENSE array to disk as a numpy array."""
Expand All @@ -61,8 +48,7 @@ def make_empty(cls, healpix_order=10):
Returns:
new sparse histogram
"""
histo = csc_array((1, hp.order2npix(healpix_order)), dtype=np.int64)
return cls(histo)
return cls([], [], healpix_order)

@classmethod
def make_from_counts(cls, indexes, counts_at_indexes, healpix_order=10):
Expand All @@ -86,9 +72,7 @@ def make_from_counts(cls, indexes, counts_at_indexes, healpix_order=10):
Returns:
new sparse histogram
"""
row = np.array(np.zeros(len(indexes), dtype=np.int64))
histo = csc_array((counts_at_indexes, (row, indexes)), shape=(1, hp.order2npix(healpix_order)))
return cls(histo)
return cls(indexes, counts_at_indexes, healpix_order)

@classmethod
def from_file(cls, file_name):
Expand All @@ -97,5 +81,35 @@ def from_file(cls, file_name):
Returns:
new sparse histogram
"""
histo = load_npz(file_name)
return cls(histo)
npzfile = np.load(file_name)
return cls(npzfile["indexes"], npzfile["counts"], npzfile["order"])


class HistogramAggregator:
"""Utility for aggregating sparse histograms."""

def __init__(self, order):
self.order = order
self.full_histogram = np.zeros(hp.order2npix(order), dtype=np.int64)

def add(self, other):
"""Add in another sparse histogram, updating this wrapper's array.
Args:
other (SparseHistogram): the wrapper containing the addend
"""
if not isinstance(other, SparseHistogram):
raise ValueError("Both addends should be SparseHistogram.")
if self.order != other.order:
raise ValueError(
"The histogram partials have incompatible sizes due to different healpix orders."
)
if len(other.indexes) == 0:
return

Check warning on line 108 in src/hats/pixel_math/sparse_histogram.py

View check run for this annotation

Codecov / codecov/patch

src/hats/pixel_math/sparse_histogram.py#L108

Added line #L108 was not covered by tests
self.full_histogram[other.indexes] += other.counts

def to_sparse(self):
"""Return a SparseHistogram, based on non-zero values in this aggregation."""
indexes = self.full_histogram.nonzero()[0]
counts = self.full_histogram[indexes]
return SparseHistogram(indexes, counts, self.order)
36 changes: 17 additions & 19 deletions tests/hats/pixel_math/test_sparse_histogram.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,9 @@
import numpy.testing as npt
import pytest
from numpy import frombuffer
from scipy.sparse import csr_array

import hats.pixel_math.healpix_shim as hp
from hats.pixel_math.sparse_histogram import SparseHistogram
from hats.pixel_math.sparse_histogram import HistogramAggregator, SparseHistogram


def test_make_empty():
Expand Down Expand Up @@ -42,39 +41,38 @@ def test_add_same_order():

partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 0)

partial_histogram_left.add(partial_histogram_right)
total_histogram = HistogramAggregator(0)
total_histogram.add(partial_histogram_left)
total_histogram.add(partial_histogram_right)

expected = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 146]
npt.assert_array_equal(partial_histogram_left.to_array(), expected)
npt.assert_array_equal(total_histogram.full_histogram, expected)

sparse = total_histogram.to_sparse()
npt.assert_array_equal(sparse.indexes, [10, 11])
npt.assert_array_equal(sparse.counts, [4, 146])
npt.assert_array_equal(sparse.order, 0)


def test_add_different_order():
"""Test that we can NOT add histograms of different healpix orders."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)

partial_histogram_right = SparseHistogram.make_from_counts([10, 11], [4, 15], 1)

total_histogram = HistogramAggregator(0)
total_histogram.add(partial_histogram_left)
with pytest.raises(ValueError, match="partials have incompatible sizes"):
partial_histogram_left.add(partial_histogram_right)
total_histogram.add(partial_histogram_right)


def test_add_different_type():
"""Test that we can NOT add histograms of different healpix orders."""
partial_histogram_left = SparseHistogram.make_from_counts([11], [131], 0)

total_histogram = HistogramAggregator(0)
total_histogram.add(partial_histogram_left)
with pytest.raises(ValueError, match="addends should be SparseHistogram"):
partial_histogram_left.add(5)
total_histogram.add(5)

with pytest.raises(ValueError, match="addends should be SparseHistogram"):
partial_histogram_left.add([1, 2, 3, 4, 5])


def test_init_bad_inputs():
"""Test that the SparseHistogram type requires a compressed sparse column
as its sole `sparse_array` argument."""
with pytest.raises(ValueError, match="must be a scipy sparse array"):
SparseHistogram(5)

with pytest.raises(ValueError, match="must be a Compressed Sparse Column"):
row_sparse_array = csr_array((1, 12), dtype=np.int64)
SparseHistogram(row_sparse_array)
total_histogram.add(([1, 2, 3, 4, 5], [1, 2, 3, 4, 5], 0))

0 comments on commit 56e9c90

Please sign in to comment.