Skip to content

Commit

Permalink
AutoShotWrap grid override (#338)
Browse files Browse the repository at this point in the history
* Added AutoShopWrap grid override.  This allows ingestion with gun indexed by unwrapping shot_point for shot_lines.

* Add documentation to cover segy_to_mdio AutoShotWrap grid override.

* Resolve merge conflicts.

* Update Dockerfile to align with nox pipeline checks.

* Merge poetry.lock.

* Fix ingestion tests.

* Fix issues with tests.

* Switch to poetry.lock from main to resolve conflict.

* Add some more explainination on ShotGumGeometryType..

* Linting updates.

* Remove unused code.

* Update .devcontainer/Dockerfile

* Update tests/integration/conftest.py

* update long text syntax

* Fix linting.

---------

Co-authored-by: Altay Sansal <[email protected]>
  • Loading branch information
markspec and tasansal authored Mar 10, 2024
1 parent f19762c commit 40ed5e0
Show file tree
Hide file tree
Showing 5 changed files with 261 additions and 11 deletions.
10 changes: 5 additions & 5 deletions .devcontainer/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
ARG PYTHON_VERSION=3.11
ARG PYTHON_VERSION=3.12
ARG LINUX_DISTRO=bookworm

FROM mcr.microsoft.com/devcontainers/python:1-${PYTHON_VERSION}-${LINUX_DISTRO}

# Install git for nox pre-commit
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
git \
&& rm -rf /var/lib/apt/lists/*
&& apt-get install -y --no-install-recommends \
git \
&& rm -rf /var/lib/apt/lists/*

# Poetry
ARG POETRY_VERSION="1.6.1"
ARG POETRY_VERSION="1.8.2"
RUN if [ "${POETRY_VERSION}" != "none" ]; then bash -c "umask 0002 && pip3 install poetry==${POETRY_VERSION}"; fi

# Nox
Expand Down
29 changes: 28 additions & 1 deletion src/mdio/converters/segy.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ def grid_density_qc(grid: Grid, num_traces: int) -> None:

logger.debug(f"Dimensions: {dims}")
logger.debug(f"num_traces = {num_traces}")
logger.debug(f"grid_traces = {grid_traces}")
logger.debug(f"sparsity = {grid_traces / num_traces}")

grid_sparsity_ratio_limit = os.getenv("MDIO__GRID__SPARSITY_RATIO_LIMIT", 10)
try:
Expand Down Expand Up @@ -270,7 +272,7 @@ def segy_to_mdio(
... mdio_path_or_buffer="s3://bucket/shot_file.mdio",
... index_bytes=(17, 137, 13),
... index_lengths=(4, 2, 4),
... index_names=("shot", "cable", "channel"),
... index_names=("shot_point", "cable", "channel"),
... chunksize=(8, 2, 128, 1024),
... grid_overrides={"ChannelWrap": True, "ChannelsPerCable": 800},
... )
Expand All @@ -283,6 +285,31 @@ def segy_to_mdio(
>>> grid_overrides={"AutoChannelWrap": True,
"AutoChannelTraceQC": 1000000}
For ingestion of pre-stack streamer data where the user needs to
access/index *common-channel gathers* (single gun) then the following
strategy can be used to densely ingest while indexing on gun number:
>>> segy_to_mdio(
... segy_path="prefix/shot_file.segy",
... mdio_path_or_buffer="s3://bucket/shot_file.mdio",
... index_bytes=(133, 171, 17, 137, 13),
... index_lengths=(2, 2, 4, 2, 4),
... index_names=("shot_line", "gun", "shot_point", "cable", "channel"),
... chunksize=(1, 1, 8, 1, 128, 1024),
... grid_overrides={
... "AutoShotWrap": True,
... "AutoChannelWrap": True,
... "AutoChannelTraceQC": 1000000
... },
... )
For AutoShotWrap and AutoChannelWrap to work, the user must provide
"shot_line", "gun", "shot_point", "cable", "channel". For improved
common-channel performance consider modifying the chunksize to be
(1, 1, 32, 1, 32, 2048) for good common-shot and common-channel
performance or (1, 1, 128, 1, 1, 2048) for common-channel
performance.
For cases with no well-defined trace header for indexing a NonBinned
grid override is provided.This creates the index and attributes an
incrementing integer to the trace for the index based on first in first
Expand Down
126 changes: 125 additions & 1 deletion src/mdio/segy/geometry.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,28 @@ class StreamerShotGeometryType(Enum):
C = auto()


class ShotGunGeometryType(Enum):
r"""Shot geometry template types for multi-gun acquisition.
For shot lines with multiple guns, we can have two configurations for
numbering shot_point. The desired index is to have the shot point index
for a given gun to be dense and unique (configuration A). Typically the
shot_point is unique for the line and therefore is not dense for each
gun (configuration B).
Configuration A:
Gun 1 -> 1------------------20
Gun 2 -> 1------------------20
Configuration B:
Gun 1 -> 1------------------39
Gun 2 -> 2------------------40
"""
A = auto()
B = auto()


def analyze_streamer_headers(
index_headers: dict[str, npt.NDArray],
) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray, StreamerShotGeometryType]:
Expand Down Expand Up @@ -91,6 +113,7 @@ def analyze_streamer_headers(

# Check channel numbers do not overlap for case B
geom_type = StreamerShotGeometryType.B

for idx1, cable1 in enumerate(unique_cables):
min_val1 = cable_chan_min[idx1]
max_val1 = cable_chan_max[idx1]
Expand Down Expand Up @@ -124,13 +147,65 @@ def analyze_streamer_headers(
return unique_cables, cable_chan_min, cable_chan_max, geom_type


def analyze_shotlines_for_guns(
index_headers: dict[str, npt.NDArray],
) -> tuple[npt.NDArray, npt.NDArray, ShotGunGeometryType]:
"""Check input headers for SEG-Y input to help determine geometry of shots and guns.
This function reads in trace_qc_count headers and finds the unique gun values.
The function then checks to ensure shot numbers are dense.
Args:
index_headers: numpy array with index headers
Returns:
tuple of unique_shot_lines, unique_guns_in_shot_line, geom_type
"""
# Find unique cable ids
unique_shot_lines = np.sort(np.unique(index_headers["shot_line"]))
unique_guns = np.sort(np.unique(index_headers["gun"]))
logger.info(f"unique_shot_lines: {unique_shot_lines}")
logger.info(f"unique_guns: {unique_guns}")

# Find channel min and max values for each cable
# unique_guns_in_shot_line = np.empty(unique_shot_lines.shape)
unique_guns_in_shot_line = dict()

geom_type = ShotGunGeometryType.B
# Check shot numbers are still unique if div/num_guns
for shot_line in unique_shot_lines:
shot_line_mask = index_headers["shot_line"] == shot_line
shot_current_sl = index_headers["shot_point"][shot_line_mask]
gun_current_sl = index_headers["gun"][shot_line_mask]

unique_guns_sl = np.sort(np.unique(gun_current_sl))
num_guns_sl = unique_guns_sl.shape[0]
# unique_guns_in_shot_line[idx] = list(unique_guns_sl)
unique_guns_in_shot_line[str(shot_line)] = list(unique_guns_sl)

for gun in unique_guns_sl:
gun_mask = gun_current_sl == gun
shots_current_sl_gun = shot_current_sl[gun_mask]
num_shots_sl = np.unique(shots_current_sl_gun).shape[0]
mod_shots = np.floor(shots_current_sl_gun / num_guns_sl)
if len(np.unique(mod_shots)) != num_shots_sl:
msg = (
f"Shot line {shot_line} has {num_shots_sl} when using div by "
f"{num_guns_sl} (num_guns) has {np.unique(mod_shots)} unique mod shots."
)
logger.info(msg)
geom_type = ShotGunGeometryType.A
return unique_shot_lines, unique_guns_in_shot_line, geom_type
return unique_shot_lines, unique_guns_in_shot_line, geom_type


def create_counter(
depth: int,
total_depth: int,
unique_headers: dict[str, npt.NDArray],
header_names: list[str],
):
"""Helper funtion to create dictionary tree for counting trace key for auto index."""
"""Helper function to create dictionary tree for counting trace key for auto index."""
if depth == total_depth:
return 0

Expand Down Expand Up @@ -490,6 +565,54 @@ def transform(
return index_headers


class AutoShotWrap(GridOverrideCommand):
"""Automatically determine ShotGun acquisition type."""

required_keys = {"shot_line", "gun", "shot_point", "cable", "channel"}
required_parameters = None

def validate(
self,
index_headers: dict[str, npt.NDArray],
grid_overrides: dict[str, bool | int],
) -> None:
"""Validate if this transform should run on the type of data."""
self.check_required_keys(index_headers)
self.check_required_params(grid_overrides)

def transform(
self,
index_headers: dict[str, npt.NDArray],
grid_overrides: dict[str, bool | int],
) -> dict[str, npt.NDArray]:
"""Perform the grid transform."""
self.validate(index_headers, grid_overrides)

result = analyze_shotlines_for_guns(index_headers)
unique_shot_lines, unique_guns_in_shot_line, geom_type = result
logger.info(f"Ingesting dataset as shot type: {geom_type.name}")

# TODO: Add strict=True and remove noqa when min Python is 3.10
max_num_guns = 1
for shot_line in unique_shot_lines:
logger.info(
f"shot_line: {shot_line} has guns: {unique_guns_in_shot_line[str(shot_line)]}"
)
num_guns = len(unique_guns_in_shot_line[str(shot_line)])
if num_guns > max_num_guns:
max_num_guns = num_guns

# This might be slow and potentially could be improved with a rewrite
# to prevent so many lookups
if geom_type == ShotGunGeometryType.B:
for shot_line in unique_shot_lines:
shot_line_idxs = np.where(index_headers["shot_line"][:] == shot_line)
index_headers["shot_point"][shot_line_idxs] = np.floor(
index_headers["shot_point"][shot_line_idxs] / max_num_guns
)
return index_headers


class GridOverrider:
"""Executor for grid overrides.
Expand All @@ -503,6 +626,7 @@ def __init__(self):
"""Define allowed overrides and parameters here."""
self.commands = {
"AutoChannelWrap": AutoChannelWrap(),
"AutoShotWrap": AutoShotWrap(),
"CalculateCable": CalculateCable(),
"ChannelWrap": ChannelWrap(),
"NonBinned": NonBinned(),
Expand Down
17 changes: 16 additions & 1 deletion tests/integration/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ def create_segy_mock_4d(
shots: list,
cables: list,
receivers_per_cable: list,
guns: list | None = None,
chan_header_type: StreamerShotGeometryType = StreamerShotGeometryType.A,
index_receivers: bool = True,
) -> str:
Expand Down Expand Up @@ -55,20 +56,30 @@ def create_segy_mock_4d(
index_receivers = False

shot_headers = np.hstack([np.repeat(shot, total_chan) for shot in shots])

gun_per_shot = []
for shot in shots:
gun_per_shot.append(guns[(shot % len(guns))])
gun_headers = np.hstack([np.repeat(gun, total_chan) for gun in gun_per_shot])

cable_headers = np.tile(cable_headers, shot_count)
channel_headers = np.tile(channel_headers, shot_count)

with segyio.create(segy_file, spec) as f:
for trc_idx in range(trace_count):
shot = shot_headers[trc_idx]
gun = gun_headers[trc_idx]
cable = cable_headers[trc_idx]
channel = channel_headers[trc_idx]
source_line = 1

# offset is byte location 37 - offset 4 bytes
# fldr is byte location 9 - shot 4 byte
# ep is byte location 17 - shot 4 byte
# stae is byte location 137 - cable 2 byte
# tracf is byte location 13 - channel 4 byte
# grnors is byte location 171 - gun 2 bytes
# styp is byte location 133 - source_line 2 bytes

if index_receivers:
f.header[trc_idx].update(
Expand All @@ -77,6 +88,8 @@ def create_segy_mock_4d(
ep=shot,
stae=cable,
tracf=channel,
grnors=gun,
styp=source_line,
)
else:
f.header[trc_idx].update(
Expand All @@ -98,7 +111,8 @@ def create_segy_mock_4d(
def segy_mock_4d_shots(fake_segy_tmp: str) -> dict[StreamerShotGeometryType, str]:
"""Generate mock 4D shot SEG-Y files."""
num_samples = 25
shots = [2, 3, 5]
shots = [2, 3, 5, 6, 7, 8, 9]
guns = [1, 2]
cables = [0, 101, 201, 301]
receivers_per_cable = [1, 5, 7, 5]

Expand All @@ -112,6 +126,7 @@ def segy_mock_4d_shots(fake_segy_tmp: str) -> dict[StreamerShotGeometryType, str
cables=cables,
receivers_per_cable=receivers_per_cable,
chan_header_type=chan_header_type,
guns=guns,
)

return segy_paths
Loading

0 comments on commit 40ed5e0

Please sign in to comment.