From 4b6e0217e9acd66e7699712560bb0c70502c730a Mon Sep 17 00:00:00 2001 From: Brian Brondel Date: Mon, 14 Oct 2024 15:08:29 -0700 Subject: [PATCH 01/10] Implement code to fill in missing columns --- python/lsst/consdb/hinfo_fix_missing.py | 255 ++++++++++++++++++++++++ 1 file changed, 255 insertions(+) create mode 100644 python/lsst/consdb/hinfo_fix_missing.py diff --git a/python/lsst/consdb/hinfo_fix_missing.py b/python/lsst/consdb/hinfo_fix_missing.py new file mode 100644 index 00000000..1b8ef714 --- /dev/null +++ b/python/lsst/consdb/hinfo_fix_missing.py @@ -0,0 +1,255 @@ +import re +from math import radians, sin +from typing import Any + +import sqlalchemy as sa +from astropy.coordinates import AltAz, EarthLocation, SkyCoord +from astropy.time import Time +from sqlalchemy.dialects import postgresql +from sqlalchemy.orm import Session + +from .utils import setup_logging, setup_postgres + + +class Fixer: + """A collection of functions that try to patch up missing data columns.""" + + def __init__(self, exposure_rec: dict[str, Any], logger=None): + """Calls all fixup methods. + + Given an `exposure_rec` dictionary, apply all fixup methods to the + dictionary in one go. Call this to apply the fixup methods. To add + more fixup methods, just add them to this class, make sure the + method name starts with "fix_", and make sure the signature matches: + def fix_whatever(self, exposure_rec: dict[str, Any]) + The method should return a dictionary with keys to update. + + """ + self.logger = logger if logger is not None else setup_logging("hinfo_fix_missing") + self.updates: dict[str, Any] + self.updates = {} + for attr_name in dir(self): + if attr_name.startswith("fix_"): + attr = getattr(self, attr_name) + self.updates = {**self.updates, **attr(exposure_rec)} + + def fix_telescope_position(self, exposure_rec: dict[str, Any]) -> dict[str, Any]: + """Attempts to fill in missing sky position columns. + + It relies on s_ra and s_dec being present, and does + calculations to obtain altitude, azimuth and zenith + distance for the beginning, middle, and end of the + exposure as well as airmass. + + It modifies the `exposure_rec` dictionary and returns nothing. + + Parameters + ---------- + exposure_rec : `dict[str, Any]` + The exposure record dictionary, (almost) ready to + be copied into consolidated database exposure + table. + + Returns + ------- + dict[str, Any] + Fixed columns in the original `exposure_rec` dictionary. + """ + + if exposure_rec["airmass"] is not None: + # No need to do calculations, it's already done. + return dict() + + if exposure_rec["s_ra"] is None or exposure_rec["s_dec"] is None: + # Bail out because we don't have enough info. + return dict() + + # Convert from RA and Dec + s_ra, s_dec = map(lambda x: float(exposure_rec[x]), ("s_ra", "s_dec")) + location = EarthLocation.of_site("LSST") + obstimes = Time( + [ + exposure_rec["obs_start_mjd"], + exposure_rec["exp_midpt_mjd"], + exposure_rec["obs_end_mjd"], + ], + format="mjd", + scale="tai", + ) + coord = SkyCoord(s_ra, s_dec, unit="deg") + altaz = coord.transform_to(AltAz(obstime=obstimes, location=location)) + + # Get altaz calculations + altitude_start, altitude, altitude_end = altaz.alt.deg + azimuth_start, azimuth, azimuth_end = altaz.az.deg + zenith_distance_start, zenith_distance, zenith_distance_end = altaz.zen.deg + + # Use K&Y 1989 model to compute airmass from altitude + airmass = None + if altitude >= 0 and altitude <= 90: + airmass = 1 / (sin(radians(altitude)) + 0.50572 * (altitude + 6.07995) ** -1.6364) + + # Load them into the update dictionary. + update = {} + calculations = { + "altitude_start": altitude_start, + "altitude": altitude, + "altitude_end": altitude_end, + "azimuth_start": azimuth_start, + "azimuth": azimuth, + "azimuth_end": azimuth_end, + "zenith_distance_start": zenith_distance_start, + "zenith_distance": zenith_distance, + "zenith_distance_end": zenith_distance_end, + "airmass": airmass, + } + for k, v in calculations.items(): + if exposure_rec[k] is None and v is not None: + update[k] = v + self.logger.debug(f"Inferring column: {k}") + return update + + def fix_band(self, exposure_rec: dict[str, Any]) -> dict[str, Any]: + """Tries to identify band, if not provided. + + This function relies on the physical_filter column to + indicate the band. It uses two formats: + * u_01 => u band, seen in LSSTComCam + * SDSSr_65mm~empty => r band, seen in LATISS + + It modifies the `exposure_rec` dictionary and returns + nothing. + + Parameters + ---------- + exposure_rec : `dict[str, Any]` + The exposure record dictionary, (almost) ready to + be copied into consolidated database exposure + table. + + Returns + ------- + dict[str, Any] + Fixed columns in the original `exposure_rec` dictionary. + """ + if exposure_rec["band"] is not None: + # Band already set + return dict() + + if exposure_rec["physical_filter"] is None: + # Can't infer band from physical_filter + return dict() + + for band in "ugrizy": + if f"SDSS{band}" in exposure_rec["physical_filter"]: + exposure_rec["band"] = band + self.logger.debug("Inferring column: band") + return {"band": band} + + if re.fullmatch(f"{band}_[0-9]+", exposure_rec["physical_filter"]): + self.logger.debug("Inferring column: band") + exposure_rec["band"] = band + return {"band": band} + + return dict() + + def fix_dark_time(self, exposure_rec: dict[str, Any]) -> dict[str, Any]: + """Tries to fill in missing dark_time information from the record. + + The interval from exposure start time to end time seems to be + a really good proxy for dark_time. If that's not available, we + fall back on the exposure time as an estimate. + + Parameters + ---------- + exposure_rec : `dict[str, Any]` + The exposure record dictionary, (almost) ready to + be copied into consolidated database exposure + table. + + Returns + ------- + dict[str, Any] + Fixed columns in the original `exposure_rec` dictionary. + """ + if exposure_rec["dark_time"] is not None: + return dict() + + if exposure_rec["obs_start_mjd"] is not None and exposure_rec["obs_end_mjd"] is not None: + self.logger.debug("Inferring column: dark_time") + return {"dark_time": 86400 * (exposure_rec["obs_end_mjd"] - exposure_rec["obs_start_mjd"])} + + if exposure_rec["exp_time"] is None: + return dict() + + # Fall back to exposure time + self.logger.debug("Inferring column: dark_time") + return {"dark_time": exposure_rec["exp_time"]} + + +def fixup(schema: str, day_obs: int | None = None, seq_num: int | None = None) -> None: + """Fixes a specified row in the exposure table. + + The row specified by `day_obs` and `seq_num` will be modified + if any columns can be inferred from the existing data. If + `day_obs` and `seq_num` are None, then the entire table + will be recalculated. + + Parameters + ---------- + day_obs : int | None + The day of the observation to modify, or None to operate + on the whole table. + + seq_num : int | None + The image sequence number to modify, or None to operate + on the whole table. + """ + engine = setup_postgres() + session = Session(bind=engine) + + # Set up a database query + md = sa.MetaData(schema=f"cdb_{schema}") + md.reflect(engine) + exposure_table = md.tables[f"cdb_{schema}.exposure"] + stmt = sa.select(exposure_table) + if day_obs is not None and seq_num is not None: + stmt = stmt.where( + exposure_table.c.day_obs == day_obs, + exposure_table.c.seq_num == seq_num, + ) + + # Run and process the query + rows = session.execute(stmt) + for row in rows: + # Try to re-calculate any missing columns + exposure_rec = {col.name: getattr(row, col.name) for col in exposure_table.columns} + updates = Fixer(exposure_rec).updates + + if len(updates) > 0: + # There are values to update, so send them to the DB + + stmt = ( + sa.update(exposure_table) + .where( + exposure_table.c.day_obs == row.day_obs, + exposure_table.c.seq_num == row.seq_num, + ) + .values(updates) + ) + session.execute(stmt) + session.commit() + + +if __name__ == "__main__": + import sys + + schema = sys.argv[1] + day_obs = None + seq_num = None + + if len(sys.argv) > 2: + day_obs = int(sys.argv[2]) + seq_num = int(sys.argv[3]) + + fixup(schema, day_obs, seq_num) From c151184ec2447f33e24f22ffeccdea8067f96b88 Mon Sep 17 00:00:00 2001 From: Brian Brondel Date: Mon, 14 Oct 2024 15:13:26 -0700 Subject: [PATCH 02/10] Add fixes for missing data as part of hinfo's process --- python/lsst/consdb/hinfo.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/lsst/consdb/hinfo.py b/python/lsst/consdb/hinfo.py index 42492208..bbfee304 100644 --- a/python/lsst/consdb/hinfo.py +++ b/python/lsst/consdb/hinfo.py @@ -20,6 +20,7 @@ from sqlalchemy import MetaData, Table from sqlalchemy.dialects.postgresql import insert +from .hinfo_fix_missing import Fixer from .utils import setup_logging, setup_postgres if TYPE_CHECKING: @@ -361,6 +362,9 @@ def process_resource(resource: ResourcePath, instrument_dict: dict, update: bool for field, keyword in OI_MAPPING.items(): exposure_rec[field] = process_column(keyword, obs_info) + # Add missing data as best we can + exposure_rec.update(Fixer(exposure_rec)) + stmt = insert(instrument_obj.exposure_table).values(exposure_rec) if update: stmt = stmt.on_conflict_do_update(index_elements=["exposure_id"], set_=exposure_rec) From 4be5b3e8b6bf94c551392c072bba2df086096c33 Mon Sep 17 00:00:00 2001 From: Brian Brondel Date: Mon, 14 Oct 2024 15:16:03 -0700 Subject: [PATCH 03/10] Remove unused import --- python/lsst/consdb/hinfo_fix_missing.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/lsst/consdb/hinfo_fix_missing.py b/python/lsst/consdb/hinfo_fix_missing.py index 1b8ef714..607b39f0 100644 --- a/python/lsst/consdb/hinfo_fix_missing.py +++ b/python/lsst/consdb/hinfo_fix_missing.py @@ -5,7 +5,6 @@ import sqlalchemy as sa from astropy.coordinates import AltAz, EarthLocation, SkyCoord from astropy.time import Time -from sqlalchemy.dialects import postgresql from sqlalchemy.orm import Session from .utils import setup_logging, setup_postgres From 88c0a9d2ddb5ff94606e197ba434acbd9331e88f Mon Sep 17 00:00:00 2001 From: Brian Brondel Date: Mon, 14 Oct 2024 15:32:12 -0700 Subject: [PATCH 04/10] Fix typo --- python/lsst/consdb/hinfo.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lsst/consdb/hinfo.py b/python/lsst/consdb/hinfo.py index bbfee304..5b37fe08 100644 --- a/python/lsst/consdb/hinfo.py +++ b/python/lsst/consdb/hinfo.py @@ -363,7 +363,7 @@ def process_resource(resource: ResourcePath, instrument_dict: dict, update: bool exposure_rec[field] = process_column(keyword, obs_info) # Add missing data as best we can - exposure_rec.update(Fixer(exposure_rec)) + exposure_rec.update(Fixer(exposure_rec).updates) stmt = insert(instrument_obj.exposure_table).values(exposure_rec) if update: From cd5003105f1fd741a0b2a2b999a3cf192034d006 Mon Sep 17 00:00:00 2001 From: Brian Brondel Date: Mon, 14 Oct 2024 16:07:26 -0700 Subject: [PATCH 05/10] Add basic sanity check on RA and dec --- python/lsst/consdb/hinfo_fix_missing.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/lsst/consdb/hinfo_fix_missing.py b/python/lsst/consdb/hinfo_fix_missing.py index 607b39f0..f3d8989a 100644 --- a/python/lsst/consdb/hinfo_fix_missing.py +++ b/python/lsst/consdb/hinfo_fix_missing.py @@ -63,6 +63,10 @@ def fix_telescope_position(self, exposure_rec: dict[str, Any]) -> dict[str, Any] # Bail out because we don't have enough info. return dict() + if exposure_rec["s_ra"] == 0. and exposure_rec["s_dec"] == 0.: + # Bail out because ra and dec don't appear to be valid. + return dict() + # Convert from RA and Dec s_ra, s_dec = map(lambda x: float(exposure_rec[x]), ("s_ra", "s_dec")) location = EarthLocation.of_site("LSST") From eb1317d812e37df73c20b5098d71fb3b9a87801a Mon Sep 17 00:00:00 2001 From: Brian Brondel Date: Mon, 14 Oct 2024 16:12:11 -0700 Subject: [PATCH 06/10] Fix formatting --- python/lsst/consdb/hinfo_fix_missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lsst/consdb/hinfo_fix_missing.py b/python/lsst/consdb/hinfo_fix_missing.py index f3d8989a..73b5a4b0 100644 --- a/python/lsst/consdb/hinfo_fix_missing.py +++ b/python/lsst/consdb/hinfo_fix_missing.py @@ -63,7 +63,7 @@ def fix_telescope_position(self, exposure_rec: dict[str, Any]) -> dict[str, Any] # Bail out because we don't have enough info. return dict() - if exposure_rec["s_ra"] == 0. and exposure_rec["s_dec"] == 0.: + if exposure_rec["s_ra"] == 0.0 and exposure_rec["s_dec"] == 0.0: # Bail out because ra and dec don't appear to be valid. return dict() From 63f94a08c877cea979cda0835fa98dbb2608dc14 Mon Sep 17 00:00:00 2001 From: Brian Brondel Date: Thu, 17 Oct 2024 08:45:30 -0700 Subject: [PATCH 07/10] Clarify variable names/magic numbers --- python/lsst/consdb/hinfo_fix_missing.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/python/lsst/consdb/hinfo_fix_missing.py b/python/lsst/consdb/hinfo_fix_missing.py index 73b5a4b0..7f0cc0a3 100644 --- a/python/lsst/consdb/hinfo_fix_missing.py +++ b/python/lsst/consdb/hinfo_fix_missing.py @@ -90,7 +90,10 @@ def fix_telescope_position(self, exposure_rec: dict[str, Any]) -> dict[str, Any] # Use K&Y 1989 model to compute airmass from altitude airmass = None if altitude >= 0 and altitude <= 90: - airmass = 1 / (sin(radians(altitude)) + 0.50572 * (altitude + 6.07995) ** -1.6364) + a_ky89 = 0.50572 + b_ky89 = 6.07995 + c_ky89 = 1.6364 + airmass = 1 / (sin(radians(altitude)) + a_ky89 * (altitude + b_ky89) ** -c_ky89) # Load them into the update dictionary. update = {} @@ -106,10 +109,10 @@ def fix_telescope_position(self, exposure_rec: dict[str, Any]) -> dict[str, Any] "zenith_distance_end": zenith_distance_end, "airmass": airmass, } - for k, v in calculations.items(): - if exposure_rec[k] is None and v is not None: - update[k] = v - self.logger.debug(f"Inferring column: {k}") + for key, value in calculations.items(): + if exposure_rec[key] is None and value is not None: + update[key] = value + self.logger.debug(f"Inferring column: {key}") return update def fix_band(self, exposure_rec: dict[str, Any]) -> dict[str, Any]: @@ -180,7 +183,10 @@ def fix_dark_time(self, exposure_rec: dict[str, Any]) -> dict[str, Any]: if exposure_rec["obs_start_mjd"] is not None and exposure_rec["obs_end_mjd"] is not None: self.logger.debug("Inferring column: dark_time") - return {"dark_time": 86400 * (exposure_rec["obs_end_mjd"] - exposure_rec["obs_start_mjd"])} + seconds_per_day = 86400 + return { + "dark_time": seconds_per_day * (exposure_rec["obs_end_mjd"] - exposure_rec["obs_start_mjd"]) + } if exposure_rec["exp_time"] is None: return dict() From 9aa667948e5948daf8520f0cb82c164b245e2d1a Mon Sep 17 00:00:00 2001 From: Brian Brondel Date: Mon, 21 Oct 2024 08:36:13 -0700 Subject: [PATCH 08/10] Cleaner syntax for dictionary update Co-authored-by: Kian-Tat Lim --- python/lsst/consdb/hinfo_fix_missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lsst/consdb/hinfo_fix_missing.py b/python/lsst/consdb/hinfo_fix_missing.py index 7f0cc0a3..678982ad 100644 --- a/python/lsst/consdb/hinfo_fix_missing.py +++ b/python/lsst/consdb/hinfo_fix_missing.py @@ -30,7 +30,7 @@ def fix_whatever(self, exposure_rec: dict[str, Any]) for attr_name in dir(self): if attr_name.startswith("fix_"): attr = getattr(self, attr_name) - self.updates = {**self.updates, **attr(exposure_rec)} + self.updates.update(attr(exposure_rec)) def fix_telescope_position(self, exposure_rec: dict[str, Any]) -> dict[str, Any]: """Attempts to fill in missing sky position columns. From 1fd9c6f6f503df08aec4389d9479d42081afdb8a Mon Sep 17 00:00:00 2001 From: Brian Brondel Date: Mon, 21 Oct 2024 08:37:17 -0700 Subject: [PATCH 09/10] Site name LSST -> Rubin Co-authored-by: Kian-Tat Lim --- python/lsst/consdb/hinfo_fix_missing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/lsst/consdb/hinfo_fix_missing.py b/python/lsst/consdb/hinfo_fix_missing.py index 678982ad..987cc116 100644 --- a/python/lsst/consdb/hinfo_fix_missing.py +++ b/python/lsst/consdb/hinfo_fix_missing.py @@ -69,7 +69,7 @@ def fix_telescope_position(self, exposure_rec: dict[str, Any]) -> dict[str, Any] # Convert from RA and Dec s_ra, s_dec = map(lambda x: float(exposure_rec[x]), ("s_ra", "s_dec")) - location = EarthLocation.of_site("LSST") + location = EarthLocation.of_site("Rubin") obstimes = Time( [ exposure_rec["obs_start_mjd"], From 76b305e60b4cf015e148e741dafd52d11f63838f Mon Sep 17 00:00:00 2001 From: Brian Brondel Date: Thu, 14 Nov 2024 11:12:58 -0300 Subject: [PATCH 10/10] Invoke fixup from hinfo --- python/lsst/consdb/hinfo.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/python/lsst/consdb/hinfo.py b/python/lsst/consdb/hinfo.py index 5b37fe08..e7b2165f 100644 --- a/python/lsst/consdb/hinfo.py +++ b/python/lsst/consdb/hinfo.py @@ -345,6 +345,8 @@ def process_resource(resource: ResourcePath, instrument_dict: dict, update: bool info = dict() content = yaml.safe_load(resource.read()) + fixup_columns: list[str] = [] + for header in content["PRIMARY"]: info[header["keyword"]] = header["value"] instrument_obj = instrument_dict[info["CONTRLLR"]] @@ -352,8 +354,12 @@ def process_resource(resource: ResourcePath, instrument_dict: dict, update: bool info["translator"] = instrument_obj.translator for column, column_def in KW_MAPPING.items(): exposure_rec[column] = process_column(column_def, info) + if exposure_rec[column] is None: + fixup_columns.append(column) for column, column_def in instrument_obj.instrument_mapping.items(): exposure_rec[column] = process_column(column_def, info) + if exposure_rec[column] is None: + fixup_columns.append(column) obs_info_obj = ObservationInfo(info, translator_class=instrument_obj.translator) obs_info = dict() @@ -361,9 +367,11 @@ def process_resource(resource: ResourcePath, instrument_dict: dict, update: bool obs_info[keyword] = getattr(obs_info_obj, keyword) for field, keyword in OI_MAPPING.items(): exposure_rec[field] = process_column(keyword, obs_info) + if exposure_rec[field] is None: + fixup_columns.append(column) # Add missing data as best we can - exposure_rec.update(Fixer(exposure_rec).updates) + exposure_rec.update(Fixer(info, obs_info, fixup_columns).updates) stmt = insert(instrument_obj.exposure_table).values(exposure_rec) if update: