Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DM-47535: Actually save calibrated catalog in ReprocessVisitImage. #81

Merged
merged 4 commits into from
Dec 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 38 additions & 28 deletions python/lsst/drp/tasks/reprocess_visit_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

import lsst.afw.image as afwImage
import lsst.afw.table as afwTable
import lsst.geom
import lsst.meas.algorithms
import lsst.meas.deblender
import lsst.meas.extensions.photometryKron
Expand Down Expand Up @@ -238,32 +239,32 @@ class ReprocessVisitImageTask(pipeBase.PipelineTask):
ConfigClass = ReprocessVisitImageConfig
_DefaultName = "reprocessVisitImage"

def __init__(self, sources_schema=None, **kwargs):
def __init__(self, schema=None, **kwargs):
super().__init__(**kwargs)

if sources_schema is None:
sources_schema = afwTable.SourceTable.makeMinimalSchema()
if schema is None:
schema = afwTable.SourceTable.makeMinimalSchema()

afwTable.CoordKey.addErrorFields(sources_schema)
afwTable.CoordKey.addErrorFields(schema)
self.makeSubtask("snap_combine")
self.makeSubtask("repair")
self.makeSubtask("detection", schema=sources_schema)
self.makeSubtask("sky_sources", schema=sources_schema)
self.makeSubtask("deblend", schema=sources_schema)
self.makeSubtask("measurement", schema=sources_schema)
self.makeSubtask("normalized_calibration_flux", schema=sources_schema)
self.makeSubtask("apply_aperture_correction", schema=sources_schema)
self.makeSubtask("catalog_calculation", schema=sources_schema)
self.makeSubtask("set_primary_flags", schema=sources_schema, isSingleFrame=True)
self.makeSubtask("post_calculations", schema=sources_schema)
self.makeSubtask("detection", schema=schema)
self.makeSubtask("sky_sources", schema=schema)
self.makeSubtask("deblend", schema=schema)
self.makeSubtask("measurement", schema=schema)
self.makeSubtask("normalized_calibration_flux", schema=schema)
self.makeSubtask("apply_aperture_correction", schema=schema)
self.makeSubtask("catalog_calculation", schema=schema)
self.makeSubtask("set_primary_flags", schema=schema, isSingleFrame=True)
self.makeSubtask("post_calculations", schema=schema)
self.makeSubtask("compute_summary_stats")

sources_schema.addField(
schema.addField(
"visit",
type="I",
doc="Visit this source appeared on.",
)
sources_schema.addField(
schema.addField(
"detector",
type="U",
doc="Detector this source appeared on.",
Expand All @@ -272,18 +273,18 @@ def __init__(self, sources_schema=None, **kwargs):
# These fields will be propagated from finalizeCharacterization.
# It might be better to get them from the finalized catalog instead
# (if it output a schema), so the docstrings exactly match.
sources_schema.addField(
schema.addField(
"calib_psf_candidate",
type="Flag",
doc="Set if the source was a candidate for PSF determination, "
"as determined from FinalizeCharacterizationTask.",
)
sources_schema.addField(
schema.addField(
"calib_psf_reserved",
type="Flag",
doc="set if source was reserved from PSF determination by FinalizeCharacterizationTask.",
)
sources_schema.addField(
schema.addField(
"calib_psf_used",
type="Flag",
doc="Set if source was used in the PSF determination by FinalizeCharacterizationTask.",
Expand All @@ -294,28 +295,33 @@ def __init__(self, sources_schema=None, **kwargs):
# These fields are only here to satisfy the SDM schema, and will
# be removed from there as they are misleading (because we don't
# propagate this information from gbdes/fgcmcal).
sources_schema.addField(
schema.addField(
"calib_photometry_used",
type="Flag",
doc="Unused; placeholder for SDM schemas.",
)
sources_schema.addField(
schema.addField(
"calib_photometry_reserved",
type="Flag",
doc="Unused; placeholder for SDM schemas.",
)
sources_schema.addField(
schema.addField(
"calib_astrometry_used",
type="Flag",
doc="Unused; placeholder for SDM schemas.",
)
sources_schema.addField(
schema.addField(
"calib_astrometry_reserved",
type="Flag",
doc="Unused; placeholder for SDM schemas.",
)

self.sources_schema = afwTable.SourceCatalog(sources_schema)
# This pre-calibration schema is the one that most methods should use.
self.schema = schema
# The final catalog will have calibrated flux columns, which we add to
# the init-output schema by calibrating our zero-length catalog with an
# arbitrary dummy PhotoCalib.
dummy_photo_calib = afwImage.PhotoCalib(1.0, 0, bbox=lsst.geom.Box2I())
self.sources_schema = dummy_photo_calib.calibrateCatalog(afwTable.SourceCatalog(schema))

def runQuantum(self, butlerQC, inputRefs, outputRefs):
inputs = butlerQC.get(inputRefs)
Expand Down Expand Up @@ -491,7 +497,11 @@ def run(
result.exposure.info.setSummaryStats(
self.compute_summary_stats.run(result.exposure, result.sources_footprints, background)
)
self._apply_photo_calib(result.exposure, result.sources_footprints, photo_calib)
result.sources_footprints = self._apply_photo_calib(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, shoot! Thanks for catching this. I see I didn't have a test that the _flux columns were in the output in test_reprocess_visit_image.py. Can you please add that? Either a test of existence, and/or a check that the brightest PsfFlux_flux value matches self.photo_calib*self.fluxes[-1] (which is the brightest in the input catalog).

Also, while you're at it, please fix _apply_photo_calib() to not re-use sources_footprints, since that might be how I managed to confuse myself. photo_calib.calibrateCatalog not being in-place is different from many of our other catalog operations, but there's a good reason it can't be in-place. But I still screwed it up.

Copy link
Member Author

@TallJimbo TallJimbo Nov 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In thinking about adding at test to drp_tasks (note that this is now already tested in ci_hsc and ci_imsim) I thought, "oh, I'll test that the schema init-output is consistent with the output catalog." And then I realized that the schema is not going to be consistent, because it doesn't have the flux fields, and we don't have a good way to make it consistent.

At this point I'm inclined to just remove the sources_schema connection, as I don't anticipate anything consuming it, but I'd be interested in your thoughts on that.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, nevermind, I bet we do have an easy way to fix it: since the init-output is a catalog with no rows, I think we can just call calibrateCatalog with a dummy PhotoCalib in __init__.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, given that the tests in drp_pipe are all mocks-based, adding a test there is considerably harder than upgrading the ones in ci_hsc and ci_hsc, so that's what I've done - those now check that the schema and the output catalog are consistent, and the existing test on LocalPhotoCalib checks that the flux columns are present. I'm starting a new Jenkins run now to make sure that all works.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, good catch: I'd forgotten about that schema; we probably want a check in this package's tests that the sources_schema produced by __init__ matches the schema of the output sources catalog, since it's not something that needs ci_* to check.

I added that schema connection because when I was working on reprocessVisitImage, I had to add an output schema to finalizeCharacterization to take as an input here, even though Eli had had a similar thought of "this is the final point in the pipeline, nobody's ever going to need the schema".

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't want us to push tests into ci_* that could really be done in this package instead. Those packages take far too long to run, and I think we already have too much testing dependency on them.

Copy link
Member Author

@TallJimbo TallJimbo Dec 1, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I had the same thought, but the tests in drp_tasks don't actually call run; that's completely mocked out. So there's no source catalog here to check.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What? Yes they do! You can put that test at the end of this one!

https://github.com/lsst/drp_tasks/blob/main/tests/test_reprocess_visit_image.py#L186

Copy link
Member Author

@TallJimbo TallJimbo Dec 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah, I saw that the test case was called CalibrateImageTasksTests and didn't look deeper, but now that I have I think that name is a copy-paste bug. Anyhow, yes, I'll put the new test there (and rename the test case).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, tests are in place in drp_tasks.

result.exposure,
result.sources_footprints,
photo_calib,
)
result.sources = result.sources_footprints.asAstropy()

return result
Expand All @@ -516,7 +526,7 @@ def _find_sources(self, exposure, background, calib_sources, id_generator):
sources
Catalog that was detected and measured on the exposure.
"""
table = afwTable.SourceTable.make(self.sources_schema.schema, id_generator.make_table_id_factory())
table = afwTable.SourceTable.make(self.schema, id_generator.make_table_id_factory())

self.repair.run(exposure=exposure)
detections = self.detection.run(table=table, exposure=exposure, background=background)
Expand Down Expand Up @@ -592,11 +602,11 @@ def _apply_photo_calib(self, exposure, sources_footprints, photo_calib):
Star catalog with flux/magnitude columns computed from the
supplied PhotoCalib.
"""
sources_footprints = photo_calib.calibrateCatalog(sources_footprints)
calibrated_sources_footprints = photo_calib.calibrateCatalog(sources_footprints)
exposure.maskedImage = photo_calib.calibrateImage(exposure.maskedImage)
identity = afwImage.PhotoCalib(1.0, photo_calib.getCalibrationErr(), bbox=exposure.getBBox())
exposure.setPhotoCalib(identity)
return sources_footprints
return calibrated_sources_footprints


def combine_backgrounds(initial_pvi_background, sky_corr):
Expand Down
9 changes: 8 additions & 1 deletion tests/test_reprocess_visit_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def make_visit_summary(summary=None, psf=None, wcs=None, photo_calib=None, detec
return summary


class CalibrateImageTaskTests(lsst.utils.tests.TestCase):
class ReprocessVisitImageTaskTests(lsst.utils.tests.TestCase):
def setUp(self):
# Different x/y dimensions so they're easy to distinguish in a plot,
# and non-zero minimum, to help catch xy0 errors.
Expand Down Expand Up @@ -211,6 +211,13 @@ def test_run(self):
# Faintest non-sky source should be marked as used.
flux_sorted = result.sources[result.sources.argsort("slot_CalibFlux_instFlux")]
self.assertTrue(flux_sorted[~flux_sorted["sky_source"]]["calib_psf_used"][0])
# Test that the schema init-output agrees with the catalog output.
self.assertEqual(task.sources_schema.schema, result.sources_footprints.schema)
# The flux/instFlux ratio should be the LocalPhotoCalib value.
for record in result.sources_footprints:
self.assertAlmostEqual(
record["base_PsfFlux_flux"] / record["base_PsfFlux_instFlux"], record["base_LocalPhotoCalib"]
)


class ReprocessVisitImageTaskRunQuantumTests(lsst.utils.tests.TestCase):
Expand Down
Loading