Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Account for mask byte in chunk size calculation #211

Merged
merged 1 commit into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 16 additions & 14 deletions xee/ext.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,8 @@
'double': np.float64,
}

# While this documentation says that the limit is 10 MB...
# https://developers.google.com/earth-engine/guides/usage#request_payload_size
# actual byte limit seems to depend on other factors. This has been found via
# trial & error.
# Earth Engine image:computePixels request is limited to 48 MB
# https://developers.google.com/earth-engine/reference/rest/v1/projects.image/computePixels
REQUEST_BYTE_LIMIT = 2**20 * 48 # 48 MBs

# Xee uses the ee.ImageCollection.toList function for slicing into an
Expand All @@ -80,10 +78,12 @@
_TO_LIST_WARNING_LIMIT = 10000


# Used in ext_test.py.
def _check_request_limit(chunks: Dict[str, int], dtype_size: int, limit: int):
"""Checks that the actual number of bytes exceeds the limit."""
index, width, height = chunks['index'], chunks['width'], chunks['height']
actual_bytes = index * width * height * dtype_size
# Add one for the mask byte (Earth Engine bytes-per-pixel accounting).
actual_bytes = index * width * height * (dtype_size + 1)
if actual_bytes > limit:
raise ValueError(
f'`chunks="auto"` failed! Actual bytes {actual_bytes!r} exceeds limit'
Expand All @@ -105,7 +105,7 @@ class EarthEngineStore(common.AbstractDataStore):
# "Safe" default chunks that won't exceed the request limit.
PREFERRED_CHUNKS: Dict[str, int] = {
'index': 48,
'width': 512,
'width': 256,
'height': 256,
}

Expand Down Expand Up @@ -352,20 +352,22 @@ def _auto_chunks(
# height and width follow round numbers (powers of two) and allocate the
# remaining bytes available for the index length. To illustrate this logic,
# let's follow through with an example where:
# request_byte_limit = 2 ** 20 * 10 # = 10 MBs
# request_byte_limit = 2 ** 20 * 48 # = 48 MBs
# dtype_bytes = 8
log_total = np.log2(request_byte_limit) # e.g.=23.32...
log_dtype = np.log2(dtype_bytes) # e.g.=3

log_total = np.log2(request_byte_limit) # e.g.=25.58...
# Add one for the mask byte (Earth Engine bytes-per-pixel accounting).
log_dtype = np.log2(dtype_bytes + 1) # e.g.=3.16...
log_limit = 10 * (log_total // 10) # e.g.=20
log_index = log_total - log_limit # e.g.=3.32...
log_index = log_total - log_limit # e.g.=5.58...

# Motivation: How do we divide a number N into the closest sum of two ints?
d = (log_limit - np.ceil(log_dtype)) / 2 # e.g.=17/2=8.5
wd, ht = np.ceil(d), np.floor(d) # e.g. wd=9, ht=8
d = (log_limit - np.ceil(log_dtype)) / 2 # e.g.=16/2=8.0
wd, ht = np.ceil(d), np.floor(d) # e.g. wd=8, ht=8

# Put back to byte space, then round to the nearst integer number of bytes.
index = int(np.rint(2**log_index)) # e.g.=10
width = int(np.rint(2**wd)) # e.g.=512
index = int(np.rint(2**log_index)) # e.g.=48
width = int(np.rint(2**wd)) # e.g.=256
height = int(np.rint(2**ht)) # e.g.=256

return {'index': index, 'width': width, 'height': height}
Expand Down
44 changes: 31 additions & 13 deletions xee/ext_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,32 @@ class EEStoreStandardDatatypesTest(parameterized.TestCase):
dict(
testcase_name='int8',
dtype=np.dtype('int8'),
expected_chunks={'index': 48, 'width': 1024, 'height': 1024},
expected_chunks={'index': 48, 'width': 1024, 'height': 512},
),
dict(
testcase_name='int32',
dtype=np.dtype('int32'),
expected_chunks={'index': 48, 'width': 512, 'height': 512},
expected_chunks={'index': 48, 'width': 512, 'height': 256},
),
dict(
testcase_name='int64',
dtype=np.dtype('int64'),
expected_chunks={'index': 48, 'width': 512, 'height': 256},
expected_chunks={'index': 48, 'width': 256, 'height': 256},
),
dict(
testcase_name='float32',
dtype=np.dtype('float32'),
expected_chunks={'index': 48, 'width': 512, 'height': 512},
expected_chunks={'index': 48, 'width': 512, 'height': 256},
),
dict(
testcase_name='float64',
dtype=np.dtype('float64'),
expected_chunks={'index': 48, 'width': 512, 'height': 256},
expected_chunks={'index': 48, 'width': 256, 'height': 256},
),
dict(
testcase_name='complex64',
dtype=np.dtype('complex64'),
expected_chunks={'index': 48, 'width': 512, 'height': 256},
expected_chunks={'index': 48, 'width': 256, 'height': 256},
),
)
def test_auto_chunks__handles_standard_dtypes(self, dtype, expected_chunks):
Expand All @@ -49,7 +49,7 @@ def test_auto_chunks__handles_standard_dtypes(self, dtype, expected_chunks):
)


class EEStoreTest(absltest.TestCase):
class EEStoreTest(parameterized.TestCase):

def test_auto_chunks__handles_range_of_dtype_sizes(self):
dt = 0
Expand All @@ -59,18 +59,36 @@ def test_auto_chunks__handles_range_of_dtype_sizes(self):
except ValueError:
self.fail(f'Could not handle data type size {dt}.')

def test_auto_chunks__is_optimal_for_powers_of_two(self):
for p in range(10):
dt = 2**p
chunks = xee.EarthEngineStore._auto_chunks(dt)
def test_auto_chunks__matches_observed_values(self):
observed_results = {
1: 50331648,
2: 37748736,
4: 31457280,
8: 28311552,
16: 26738688,
32: 25952256,
64: 25559040,
128: 25362432,
256: 25264128,
512: 25214976,
}

for dtype_bytes, expected_bytes in observed_results.items():
chunks = xee.EarthEngineStore._auto_chunks(dtype_bytes)
actual_bytes = np.prod(list(chunks.values())) * (
dtype_bytes + 1
) # added +1 to account for the mask byte
self.assertEqual(
xee.REQUEST_BYTE_LIMIT, np.prod(list(chunks.values())) * dt
expected_bytes,
actual_bytes,
f'dtype_bytes: {dtype_bytes}, Expected: {expected_bytes}, '
f'Actual: {actual_bytes}, Chunks: {chunks}',
)

def test_exceeding_byte_limit__raises_error(self):
dtype_size = 8
# does not fail
chunks = {'index': 48, 'width': 512, 'height': 256}
chunks = {'index': 48, 'width': 256, 'height': 256}
ext._check_request_limit(chunks, dtype_size, xee.REQUEST_BYTE_LIMIT)

# fails
Expand Down
Loading