Skip to content

Commit

Permalink
[Issue #3271] Setup structure of opportunity attachment transformatio…
Browse files Browse the repository at this point in the history
…n (minus file logic) (#3443)

## Summary
Fixes #3271 

### Time to review: __10 mins__

## Changes proposed
Add a subtask to transform the opportunity attachment table

DOES NOT handle the files themselves (next PR will handle that)

## Context for reviewers
The transformation code is bulky and has a lot of repeated patterns that
needed to be setup. This PR gets all of that setup and working for the
opportunity attachment table, except for the logic regarding moving the
file from the staging table to our s3 bucket (which has a lot of extra
fun complexities that will need to be handled special). Every other
column in the table we want is copied over and functionally working with
these changes.

The follow-up to this likely will refactor a bit to make the file logic
reasonable, but figured it was best to start with a pattern that looks
like the other transformations we've done.

## Additional information
From a clean database, you can run this locally by doing `make console`
and then using our factories to generate data
```py
f.StagingTsynopsisAttachmentFactory.create_batch(size=50)
exit()
```

I then modified `TransformOracleDataTask` to only run the
TransformOpportunity and new TransformOpportunityAttachment classes
which successfully copied to my local opportunity_attachment table as
expected:
![Screenshot 2025-01-07 at 3 28
25 PM](https://github.com/user-attachments/assets/f7183af3-97fa-4de1-a963-3a6eefea7bb9)
  • Loading branch information
chouinar authored Jan 15, 2025
1 parent e22d602 commit 412d01e
Show file tree
Hide file tree
Showing 6 changed files with 334 additions and 31 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import logging
from typing import Sequence

import src.data_migration.transformation.transform_constants as transform_constants
import src.data_migration.transformation.transform_util as transform_util
from src.constants.lookup_constants import OpportunityAttachmentType
from src.data_migration.transformation.subtask.abstract_transform_subtask import (
AbstractTransformSubTask,
)
from src.db.models.opportunity_models import Opportunity, OpportunityAttachment
from src.db.models.staging.attachment import TsynopsisAttachment

logger = logging.getLogger(__name__)


class TransformOpportunityAttachment(AbstractTransformSubTask):

def transform_records(self) -> None:

# Fetch staging attachment / our attachment / opportunity groups
records = self.fetch_with_opportunity(
TsynopsisAttachment,
OpportunityAttachment,
[TsynopsisAttachment.syn_att_id == OpportunityAttachment.attachment_id],
)

self.process_opportunity_attachment_group(records)

def process_opportunity_attachment_group(
self,
records: Sequence[
tuple[TsynopsisAttachment, OpportunityAttachment | None, Opportunity | None]
],
) -> None:
for source_attachment, target_attachment, opportunity in records:
try:
self.process_opportunity_attachment(
source_attachment, target_attachment, opportunity
)
except ValueError:
self.increment(
transform_constants.Metrics.TOTAL_ERROR_COUNT,
prefix=transform_constants.OPPORTUNITY_ATTACHMENT,
)
logger.exception(
"Failed to process opportunity attachment",
extra=transform_util.get_log_extra_opportunity_attachment(source_attachment),
)

def process_opportunity_attachment(
self,
source_attachment: TsynopsisAttachment,
target_attachment: OpportunityAttachment | None,
opportunity: Opportunity | None,
) -> None:

self.increment(
transform_constants.Metrics.TOTAL_RECORDS_PROCESSED,
prefix=transform_constants.OPPORTUNITY_ATTACHMENT,
)

extra = transform_util.get_log_extra_opportunity_attachment(source_attachment)
logger.info("Processing opportunity attachment", extra=extra)

if source_attachment.is_deleted:
# TODO - https://github.com/HHS/simpler-grants-gov/issues/3322
# deletes are more complex because of s3
# this just handles deleting the DB record at the moment
self._handle_delete(
source=source_attachment,
target=target_attachment,
record_type=transform_constants.OPPORTUNITY_ATTACHMENT,
extra=extra,
)

elif opportunity is None:
# This shouldn't be possible as the incoming data has foreign keys, but as a safety net
# we'll make sure the opportunity actually exists
raise ValueError(
"Opportunity attachment cannot be processed as the opportunity for it does not exist"
)

else:
# To avoid incrementing metrics for records we fail to transform, record
# here whether it's an insert/update and we'll increment after transforming
is_insert = target_attachment is None

logger.info("Transforming and upserting opportunity attachment", extra=extra)

transformed_opportunity_attachment = transform_opportunity_attachment(
source_attachment, target_attachment
)

# TODO - we'll need to handle more with the s3 files here
if is_insert:
self.increment(
transform_constants.Metrics.TOTAL_RECORDS_INSERTED,
prefix=transform_constants.OPPORTUNITY_ATTACHMENT,
)
self.db_session.add(transformed_opportunity_attachment)
else:
self.increment(
transform_constants.Metrics.TOTAL_RECORDS_UPDATED,
prefix=transform_constants.OPPORTUNITY_ATTACHMENT,
)
self.db_session.merge(transformed_opportunity_attachment)

logger.info("Processed opportunity attachment", extra=extra)
source_attachment.transformed_at = self.transform_time


def transform_opportunity_attachment(
source_attachment: TsynopsisAttachment, incoming_attachment: OpportunityAttachment | None
) -> OpportunityAttachment:

log_extra = transform_util.get_log_extra_opportunity_attachment(source_attachment)

if incoming_attachment is None:
logger.info("Creating new opportunity attachment record", extra=log_extra)

# We always create a new record here and merge it in the calling function
# this way if there is any error doing the transformation, we don't modify the existing one.
target_attachment = OpportunityAttachment(
attachment_id=source_attachment.syn_att_id,
opportunity_id=source_attachment.opportunity_id,
# TODO - we'll eventually remove attachment type, for now just arbitrarily set the value
opportunity_attachment_type=OpportunityAttachmentType.OTHER,
# TODO - in https://github.com/HHS/simpler-grants-gov/issues/3322
# we'll actually handle the file location logic with s3
file_location="TODO", # TODO - next PR
mime_type=source_attachment.mime_type,
file_name=source_attachment.file_name,
file_description=source_attachment.file_desc,
file_size_bytes=source_attachment.file_lob_size,
created_by=source_attachment.creator_id,
updated_by=source_attachment.last_upd_id,
legacy_folder_id=source_attachment.syn_att_folder_id,
)

transform_util.transform_update_create_timestamp(
source_attachment, target_attachment, log_extra=log_extra
)

return target_attachment
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
FUNDING_CATEGORY = "funding_category"
FUNDING_INSTRUMENT = "funding_instrument"
AGENCY = "agency"
OPPORTUNITY_ATTACHMENT = "opportunity_attachment"


class Metrics(StrEnum):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,9 @@
TransformFundingInstrument,
)
from src.data_migration.transformation.subtask.transform_opportunity import TransformOpportunity
from src.data_migration.transformation.subtask.transform_opportunity_attachment import (
TransformOpportunityAttachment,
)
from src.data_migration.transformation.subtask.transform_opportunity_summary import (
TransformOpportunitySummary,
)
Expand All @@ -42,6 +45,9 @@ class TransformOracleDataTaskConfig(PydanticBaseEnvConfig):
enable_funding_category: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_CATEGORY
enable_funding_instrument: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_FUNDING_INSTRUMENT
enable_agency: bool = True # TRANSFORM_ORACLE_DATA_ENABLE_AGENCY
enable_opportunity_attachment: bool = (
False # TRANSFORM_ORACLE_DATA_ENABLE_OPPORTUNITY_ATTACHMENT
)


class TransformOracleDataTask(Task):
Expand Down Expand Up @@ -85,3 +91,6 @@ def run_task(self) -> None:
if self.transform_config.enable_agency:
TransformAgency(self).run()
TransformAgencyHierarchy(self).run()

if self.transform_config.enable_opportunity_attachment:
TransformOpportunityAttachment(self).run()
9 changes: 9 additions & 0 deletions api/src/data_migration/transformation/transform_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
OpportunityAssistanceListing,
OpportunitySummary,
)
from src.db.models.staging.attachment import TsynopsisAttachment
from src.db.models.staging.opportunity import Topportunity, TopportunityCfda
from src.db.models.staging.staging_base import StagingBase
from src.util import datetime_util
Expand Down Expand Up @@ -535,3 +536,11 @@ def get_log_extra_funding_instrument(source_funding_instrument: SourceFundingIns
"revision_number": getattr(source_funding_instrument, "revision_number", None),
"table_name": source_funding_instrument.__tablename__,
}


def get_log_extra_opportunity_attachment(source_attachment: TsynopsisAttachment) -> dict:
return {
"opportunity_id": source_attachment.opportunity_id,
"syn_att_id": source_attachment.syn_att_id,
"att_revision_number": source_attachment.att_revision_number,
}
64 changes: 64 additions & 0 deletions api/tests/src/data_migration/transformation/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
LinkOpportunitySummaryFundingInstrument,
Opportunity,
OpportunityAssistanceListing,
OpportunityAttachment,
OpportunitySummary,
)
from tests.conftest import BaseTestClass
Expand Down Expand Up @@ -330,6 +331,33 @@ def setup_agency(
return tgroups


def setup_opportunity_attachment(
create_existing: bool,
opportunity: Opportunity,
is_delete: bool = False,
is_already_processed: bool = False,
source_values: dict | None = None,
):
if source_values is None:
source_values = {}

synopsis_attachment = f.StagingTsynopsisAttachmentFactory.create(
opportunity=None,
opportunity_id=opportunity.opportunity_id,
is_deleted=is_delete,
already_transformed=is_already_processed,
**source_values,
)

if create_existing:
f.OpportunityAttachmentFactory.create(
attachment_id=synopsis_attachment.syn_att_id,
opportunity=opportunity,
)

return synopsis_attachment


def validate_matching_fields(
source, destination, fields: list[Tuple[str, str]], expect_all_to_match: bool
):
Expand Down Expand Up @@ -760,3 +788,39 @@ def validate_agency(
validate_matching_fields(
tgroup_map, agency.agency_contact_info, agency_contact_field_mapping, expect_values_to_match
)


def validate_opportunity_attachment(
db_session,
source_attachment,
expect_in_db: bool = True,
expect_values_to_match: bool = True,
):

opportunity_attachment = (
db_session.query(OpportunityAttachment)
.filter(OpportunityAttachment.attachment_id == source_attachment.syn_att_id)
.one_or_none()
)

if not expect_in_db:
assert opportunity_attachment is None
return

assert opportunity_attachment is not None
validate_matching_fields(
source_attachment,
opportunity_attachment,
[
("syn_att_id", "attachment_id"),
("opportunity_id", "opportunity_id"),
("mime_type", "mime_type"),
("file_name", "file_name"),
("file_desc", "file_description"),
("file_lob_size", "file_size_bytes"),
("creator_id", "created_by"),
("last_upd_id", "updated_by"),
("syn_att_folder_id", "legacy_folder_id"),
],
expect_values_to_match,
)
Loading

0 comments on commit 412d01e

Please sign in to comment.