Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-37038: Wait for exposures for some ingests. #62

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions src/enqueue.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,13 @@ def enqueue_objects(objects):
for o in objects:
if regexp.search(o):
info = Info.from_path(o)
pipe.lpush(f"QUEUE:{info.bucket}", o)
logger.info("Enqueued %s to %s", o, info.bucket)
if info.needs_exposure():
if not r.hexists("EXPSEEN", info.exp_id):
pipe.lpush(f"EXPWAIT:{info.exp_id}", o)
logger.info("Wait for exposure %s: %s", info.exp_id, o)
else:
pipe.lpush(f"QUEUE:{info.bucket}", o)
logger.info("Enqueued %s to %s", o, info.bucket)
info_list.append(info)
pipe.execute()
return info_list
Expand Down
13 changes: 13 additions & 0 deletions src/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.

import logging
import re
from dataclasses import dataclass
from typing import Self

Expand Down Expand Up @@ -74,6 +75,12 @@ def from_path(url: str) -> Self:
else:
return ExposureInfo(url)

def needs_exposure(self) -> bool:
return False

def is_raw(self) -> bool:
return False


@dataclass
class ExposureInfo(Info):
Expand Down Expand Up @@ -123,6 +130,12 @@ def __init__(self, path):
logger.exception("Unable to parse: %s", path)
raise

def needs_exposure(self) -> bool:
return self.filename.endswith("_guider.fits")

def is_raw(self) -> bool:
return bool(re.search(r"\d\.fits", self.filename))


@dataclass
class LfaInfo(Info):
Expand Down
70 changes: 56 additions & 14 deletions src/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,13 @@
import os
import socket
import time
from typing import Callable

import astropy.io.fits
import requests
from lsst.daf.butler import Butler
from lsst.daf.butler import Butler, DatasetRef
from lsst.obs.base import DefineVisitsTask, RawIngestTask
from lsst.obs.lsst import ingest_guider
from lsst.resources import ResourcePath

from info import Info
Expand Down Expand Up @@ -82,9 +84,17 @@ def on_success(datasets):
logger.debug("%s", info)
with r.pipeline() as pipe:
pipe.lrem(worker_queue, 0, info.path)
if info.is_raw():
if pipe.hsetnx("EXPSEEN", info.exp_id, str(time.time())):
pipe.hexpire("EXPSEEN", 48 * 3600, info.exp_id)
pipe.hset(f"FILE:{info.path}", "ingest_time", str(time.time()))
pipe.hincrby(f"INGEST:{info.bucket}:{info.instrument}", f"{info.obs_day}", 1)
pipe.execute()
if info.is_raw():
wait_queue = f"EXPWAIT:{info.exp_id}"
if r.llen(wait_queue) > 0:
while r.lmove(wait_queue, worker_queue, "RIGHT", "LEFT"):
pass
if webhook_uri:
resp = requests.post(webhook_uri, json=info.__dict__, timeout=0.5)
logger.info("Webhook response: %s", resp)
Expand Down Expand Up @@ -186,8 +196,39 @@ def record_groups(resources: list[ResourcePath]) -> None:
pipe.execute()


def ingest_batch(
resources: list[ResourcePath], ingest_func: Callable[[list[ResourcePath]], list[DatasetRef]]
) -> list[DatasetRef]:
refs = []
try:
refs.extend(ingest_func(resources))
except Exception:
logger.exception("Error while ingesting %s, retrying one by one", resources)
for resource in resources:
try:
refs.extend(ingest_func([resource]))
except Exception:
logger.exception("Error while ingesting %s", resource)
info = Info.from_path(resource.geturl())
r.lrem(worker_queue, 0, info.path)
return refs


def guider_ingest(resources: list[ResourcePath]) -> list[DatasetRef]:
global butler
return ingest_guider(
butler,
resources,
transfer="direct",
on_success=on_success,
on_ingest_failure=on_ingest_failure,
on_metadata_failure=on_metadata_failure,
)


def main():
"""Ingest FITS files from a Redis queue."""
global butler
logger.info("Initializing Butler from %s", butler_repo)
butler = Butler(butler_repo, writeable=True)
ingest_config = RawIngestTask.ConfigClass()
Expand Down Expand Up @@ -219,19 +260,20 @@ def main():
record_groups(resources)

logger.info("Ingesting %s", resources)
refs = None
try:
refs = ingester.run(resources)
except Exception:
logger.exception("Error while ingesting %s, retrying one by one", resources)
refs = []
for resource in resources:
try:
refs.extend(ingester.run([resource]))
except Exception:
logger.exception("Error while ingesting %s", resource)
info = Info.from_path(resource.geturl())
r.lrem(worker_queue, 0, info.path)

normal_resources = []
guider_resources = []
for resource in resources:
if resource.basename().endswith("_guider.fits"):
guider_resources.append(resource)
else:
normal_resources.append(resource)

refs = []
if guider_resources:
refs.extend(ingest_batch(guider_resources, guider_ingest))
if normal_resources:
refs.extend(ingest_batch(normal_resources, ingester.run))

# Define visits if we ingested anything
if not is_lfa and refs:
Expand Down
Loading