Skip to content

Commit

Permalink
Merge pull request #5689 from uktrade/great_url_none
Browse files Browse the repository at this point in the history
Great URL can be null
  • Loading branch information
baarkerlounger authored Oct 10, 2024
2 parents 1ecdcdb + 6a0254f commit 4bd89cb
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 39 deletions.
12 changes: 6 additions & 6 deletions datahub/company_activity/tasks/ingest_great_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
logger = logging.getLogger(__name__)
env = environ.Env()
REGION = env('AWS_DEFAULT_REGION', default='eu-west-2')
DATE_FORMAT = '%Y-%m-%d %H:%M:%S.%f'
DATE_FORMAT = '%Y-%m-%dT%H:%M:%S.%fZ'


def ingest_great_data(bucket, file):
Expand Down Expand Up @@ -82,13 +82,13 @@ def json_to_model(self, jsn):
actor = jsn.get('actor', {})
if not actor:
actor_id = None
actor_type = None
else:
actor_id = actor['id'].split(':')[-1]
actor_type = actor['type'].split(':')[-1]
actor_type = actor.get('type', '').split(':')[-1]
actor_blacklisted_reason = str(actor.get('dit:blackListedReason', '') or '')
values = {
'published': obj['published'],
'url': obj.get('url', ''),
'url': str(obj.get('url', '') or ''),

'attributed_to_type': attributed_to_type,
'attributed_to_id': attributed_to_id,
Expand All @@ -112,10 +112,10 @@ def json_to_model(self, jsn):

'actor_type': actor_type,
'actor_id': actor_id,
'actor_dit_email_address': actor.get('dit:emailAddress', None),
'actor_dit_email_address': actor.get('dit:emailAddress', ''),
'actor_dit_is_blacklisted': actor.get('dit:isBlacklisted', None),
'actor_dit_is_whitelisted': actor.get('dit:isWhitelisted', None),
'actor_dit_blacklisted_reason': actor.get('dit:blackListedReason', None),
'actor_dit_blacklisted_reason': actor_blacklisted_reason,
}
Great.objects.update_or_create(
form_id=form_id,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from datahub.company_activity.models import Great, IngestedFile
from datahub.company_activity.tasks.ingest_company_activity import BUCKET, GREAT_PREFIX
from datahub.company_activity.tasks.ingest_great_data import (
GreatIngestionTask, ingest_great_data, REGION,
DATE_FORMAT, GreatIngestionTask, ingest_great_data, REGION,
)
from datahub.company_activity.tests.factories import (
CompanyActivityGreatFactory,
Expand Down Expand Up @@ -109,13 +109,13 @@ def test_skip_unchanged_records(self, test_file_path):
Test that we skip updating records whose published date is older than the last
file ingestion date
"""
yesterday = datetime.now() - timedelta(1)
yesterday = datetime.strftime(datetime.now() - timedelta(1), DATE_FORMAT)
CompanyActivityIngestedFileFactory(created_on=datetime.now())
record = json.dumps(dict(
object={
'id': 'dit:directoryFormsApi:Submission:5249',
'published': yesterday,
'attributedT': {
'attributedTo': {
'type': 'dit:directoryFormsApi:SubmissionAction:gov-notify-email',
'id': 'dit:directoryFormsApi:SubmissionType:export-support-service',
},
Expand Down Expand Up @@ -156,38 +156,9 @@ def test_invalid_country_code(self):
"object": {
"id": "dit:directoryFormsApi:Submission:5249",
"published": "2024-09-19T14:00:34.069Z",
"attributedTo": {
"type": "dit:directoryFormsApi:SubmissionAction:gov-notify-email",
"id": "dit:directoryFormsApi:SubmissionType:export-support-service"
},
"url": "https://kane.net/",
"dit:directoryFormsApi:Submission:Meta": {
"action_name": "gov-notify-email",
"template_id": "76f12003-74e8-4e6b-bbe9-8edc1b8619ae",
"email_address": "[email protected]"
},
"dit:directoryFormsApi:Submission:Data": {
"comment": "Issue why why morning save parent southern.",
"country": "ZZ",
"full_name": "Tina Gray",
"website_url": "https://www.henderson-thomas.info/",
"company_name": "Foster, Murphy and Diaz",
"company_size": "1 - 10",
"phone_number": "12345678",
"terms_agreed": true,
"email_address": "[email protected]",
"opportunities": ["https://white.net/app/tagscategory.php"],
"role_in_company": "test",
"opportunity_urls": "https://www.brown-andrade.com/wp-content/tagfaq.htm"
"country": "ZZ"
}
},
"actor": {
"type": "dit:directoryFormsApi:Submission:Sender",
"id": "dit:directoryFormsApi:Sender:1041",
"dit:emailAddress": "[email protected]",
"dit:isBlacklisted": true,
"dit:isWhitelisted": false,
"dit:blackListedReason": null
}
}
"""
Expand All @@ -201,3 +172,34 @@ def test_invalid_country_code(self):
expected_message = 'Could not match country with iso code: ZZ, ' + \
'for form: 5249'
assert sentry_event['logentry']['message'] == expected_message

@pytest.mark.django_db
@mock_aws
def test_null_url(self, test_file_path):
"""
Test that we can ingest records with URL field null
"""
initial_count = Great.objects.count()
data = """
{
"object": {
"id": "dit:directoryFormsApi:Submission:5249",
"published": "2024-09-19T14:00:34.069Z",
"url": null
}
}
"""
task = GreatIngestionTask()
task.json_to_model(json.loads(data))
assert Great.objects.count() == initial_count + 1
data = """
{
"object": {
"id": "dit:directoryFormsApi:Submission:5250",
"published": "2024-09-19T15:00:34.069Z"
}
}
"""
task = GreatIngestionTask()
task.json_to_model(json.loads(data))
assert Great.objects.count() == initial_count + 2

0 comments on commit 4bd89cb

Please sign in to comment.