Skip to content

Commit

Permalink
Merge pull request #960 from uktrade/fix/LTD-2115-sanctions-list-fix
Browse files Browse the repository at this point in the history
fix/LTD-2115-sanctions-list-fix
  • Loading branch information
depsiatwal authored Apr 1, 2022
2 parents 67ac1c5 + 843cb34 commit 8e1eec8
Show file tree
Hide file tree
Showing 5 changed files with 156 additions and 76 deletions.
4 changes: 2 additions & 2 deletions api/conf/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -435,7 +435,7 @@
"SANCTION_LIST_SOURCES",
{
"un_sanctions_file": "https://scsanctions.un.org/resources/xml/en/consolidated.xml",
"office_financial_sanctions_file": "https://ofsistorage.blob.core.windows.net/publishlive/2022format/ConList.html",
"uk_sanctions_file": "https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/953470/UK_sanctions_list.ods",
"office_financial_sanctions_file": "https://ofsistorage.blob.core.windows.net/publishlive/2022format/ConList.xml",
"uk_sanctions_file": "https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/1063465/UK_Sanctions_List.ods",
},
)
3 changes: 3 additions & 0 deletions api/external_data/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@ class SanctionDocumentType(Document):
"date": fields.TextField(),
}
),
"title": fields.TextField(),
"Last Updated": fields.TextField(),
"Date Designated": fields.TextField(),
}
)

Expand Down
196 changes: 137 additions & 59 deletions api/external_data/management/commands/ingest_sanctions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import itertools
import logging

from django.conf import settings
from django.core.management.base import BaseCommand
Expand All @@ -11,6 +12,9 @@

from api.external_data import documents
from api.flags.enums import SystemFlags
import hashlib

log = logging.getLogger(__name__)


def get_un_sanctions():
Expand Down Expand Up @@ -44,14 +48,22 @@ def get_uk_sanctions_list():
def parse_ods(book):
for sheet_name in book.sheet_names():
records = iter(book[sheet_name])
# Top 2 lines is just meta
next(records)
next(records)
headers = next(records)
for row in records:
data = dict(zip(headers, row))
yield {**data, "sheet": sheet_name}


def join_fields(data, fields):
return " ".join(data[field] for field in fields if data.get(field))
return " ".join(str(data[field]) for field in fields if data.get(field))


def hash_values(data_values):
data = "".join([val for val in data_values if val is not None])
return hashlib.md5(data.encode()).hexdigest() # nosec


class Command(BaseCommand):
Expand All @@ -66,79 +78,145 @@ def rebuild_index(self):
def handle(self, *args, **options):
if options["rebuild"]:
self.rebuild_index()

self.populate_united_nations_sanctions()
self.populate_office_financial_sanctions_implementation()
self.populate_uk_sanctions_list()

def populate_united_nations_sanctions(self):
parsed = get_un_sanctions()

individuals = parsed["consolidated_list"]["individuals"]["individual"]
entities = parsed["consolidated_list"]["entities"]["entity"]

for item in itertools.chain(individuals, entities):
item.pop("nationality", None)
address_dicts = item.pop("entity_address", {}) or item.pop("individual_address", {})

addresses = []
for address_dict in address_dicts:
if address_dict:
addresses.append(" ".join([item for item in address_dict.values() if item]))

document = documents.SanctionDocumentType(
meta={"id": item["dataid"]},
name=join_fields(item, fields=["first_name", "second_name", "third_name"]),
address=addresses,
flag_uuid=SystemFlags.SANCTION_UN_SC_MATCH,
reference=item["dataid"],
data=item,
try:
parsed = get_un_sanctions()
successful = 0
failed = 0
individuals = parsed["consolidated_list"]["individuals"]["individual"]
entities = parsed["consolidated_list"]["entities"]["entity"]
for item in itertools.chain(individuals, entities):
try:
item.pop("nationality", None)
item.pop("title", None)

address_dicts = item.pop("entity_address", {}) or item.pop("individual_address", {})

addresses = []
for address_dict in address_dicts:
if address_dict:
addresses.append(" ".join([item for item in address_dict.values() if item]))

document = documents.SanctionDocumentType(
meta={"id": item["dataid"]},
name=join_fields(item, fields=["first_name", "second_name", "third_name"]),
address=addresses,
flag_uuid=SystemFlags.SANCTION_UN_SC_MATCH,
reference=item["dataid"],
data=item,
)
document.save()
successful += 1
except:
failed += 1
log.exception(
"Error loading un sanction record -> %s",
exc_info=True,
)
log.info(
f"uk sanctions (successful:{successful} failed:{failed})",
)
except:
log.exception(
"Error loading un sanctions -> %s",
exc_info=True,
)
document.save()

def populate_office_financial_sanctions_implementation(self):
parsed = get_office_financial_sanctions_implementation()
for item in parsed["arrayofconsolidatedlist"]["consolidatedlist"]:

item.pop("nationality", None)
address = item["fulladdress"]
postcode = normalize_address(item["postcode"])

if postcode not in normalize_address(address):
address += " " + postcode

document = documents.SanctionDocumentType(
meta={"id": f'OFSI:{item["id"]}'},
name=item["fullname"],
address=address,
postcode=postcode,
flag_uuid=SystemFlags.SANCTION_OFSI_MATCH,
reference=item["id"],
data=item,
successful = 0
failed = 0
try:
parsed = get_office_financial_sanctions_implementation()
for item in parsed["arrayoffinancialsanctionstarget"]["financialsanctionstarget"]:
try:
item.pop("nationality", None)
address = join_fields(
item, fields=["address1", "address2", "address3", "address4", "address5", "address6"]
)
name = join_fields(item, fields=["name1", "name2", "name3", "name4", "name5", "name6"])
postcode = normalize_address(item["postcode"])
if postcode not in normalize_address(address):
address += " " + postcode

# We need to hash the data that uniquely identifies records atm we only care about names
unique_id = hash_values([item["groupid"], name])
document = documents.SanctionDocumentType(
meta={"id": f"ofs:{unique_id}"},
name=name,
address=address,
postcode=postcode,
flag_uuid=SystemFlags.SANCTION_OFSI_MATCH,
reference=item["groupid"],
data=item,
)
document.save()
successful += 1
except:
failed += 1
log.exception(
"Error loading office financial sanction record -> %s",
exc_info=True,
)
log.info(
f"uk sanctions (successful:{successful} failed:{failed})",
)
except:
log.exception(
"Error office financial sanctions -> %s",
exc_info=True,
)
document.save()

def populate_uk_sanctions_list(self):
parsed = get_uk_sanctions_list()
for item in parsed:
item.pop("nationality", None)
address = join_fields(item, fields=["Address Line 1", "Address Line 2", "Address Line 3", "Address Line 4"])
postcode = normalize_address(item["Postcode"])
if postcode not in normalize_address(address):
address += " " + postcode

document = documents.SanctionDocumentType(
name=item["Primary Name"],
address=address,
postcode=postcode,
flag_uuid=SystemFlags.SANCTION_UK_MATCH,
reference=item["Unique ID"],
data=item,
successful = 0
failed = 0
try:
parsed = get_uk_sanctions_list()
for item in parsed:
try:
item.pop("nationality", None)
address = join_fields(
item, fields=["Address Line 1", "Address Line 2", "Address Line 3", "Address Line 4"]
)
postcode = normalize_address(item["Address Postal Code"])
if postcode not in normalize_address(address):
address += " " + postcode
name = join_fields(item, fields=["Name 1", "Name 2", "Name 3", "Name 4", "Name 5", "Name 6"])
unique_id = hash_values([item["Unique ID"], name, address, postcode, item["Regime Name"]])
document = documents.SanctionDocumentType(
meta={"id": f"uk:{unique_id}"},
name=name,
address=address,
postcode=postcode,
flag_uuid=SystemFlags.SANCTION_UK_MATCH,
reference=item["Unique ID"],
data=item,
)
document.save()
successful += 1
except:
failed += 1
log.exception(
"Error loading uk sanction record -> %s",
exc_info=True,
)
log.info(
f"uk sanctions (successful:{successful} failed:{failed})",
)
except:
log.exception(
"Error loading uk sanctions -> %s",
exc_info=True,
)
document.save()


def normalize_address(value):
if isinstance(value, int):
value = str(value)
if not value or value.lower() in ["unknown", None, ""]:
return ""

return value.replace(" ", "")
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,11 @@ def test_populate_sanctions(
"IMO number": "N/A",
"Last Updated": "N/A",
"Length of ship": "N/A",
"Name 1": "N/A",
"Name 2": "",
"Name 3": "",
"Name 4": "",
"Name 1": "HAJI",
"Name 2": "KHAIRULLAH",
"Name 4": "HAJI SATTAR",
"Name 5": "",
"Name 6": "N/A",
"Name 6": "MONEY EXCHANGE",
"National Identifier number": "N/A",
"Nationality(/ies)": "N/A",
"OFSI ID": "12703",
Expand All @@ -57,7 +56,7 @@ def test_populate_sanctions(
"Passport number": "N/A",
"Phone number ": "Unknown",
"Position": "N/A",
"Postcode": "Unknown",
"Address Postal Code": "Unknown",
"Previous flags": "N/A",
"Previous owner/operator (s)": "N/A",
"Primary Address Country": "Pakistan",
Expand All @@ -83,8 +82,8 @@ def test_populate_sanctions(
)

mock_get_office_financial_sanctions_implementation.return_value = {
"arrayofconsolidatedlist": {
"consolidatedlist": [
"arrayoffinancialsanctionstarget": {
"financialsanctionstarget": [
{
"address1": None,
"address2": None,
Expand All @@ -109,7 +108,6 @@ def test_populate_sanctions(
"fcoid": "AQD0104",
"flagofvessel": None,
"fulladdress": None,
"fullname": "Haji Agha Abdul Manan",
"furtheridentifiyinginformation": "Pakistan. Review pursuant to Security",
"gender": None,
"groupid": "6897",
Expand All @@ -126,12 +124,12 @@ def test_populate_sanctions(
"lengthofvessel": None,
"listingtype": "UK and UN",
"monthofbirth": None,
"name1": "Abdul Manan",
"name2": None,
"name1": "Haji",
"name2": "Agha",
"name3": None,
"name4": None,
"name5": None,
"name6": "Agha",
"name6": "Abdul Manan",
"nametitle": "Haji",
"nationalidnumber": None,
"nationality": None,
Expand Down Expand Up @@ -231,7 +229,7 @@ def test_populate_sanctions(
self.assertEqual(len(results_three.hits), 2)
self.assertEqual(results_three.hits[0]["name"], "Haji Agha Abdul Manan")
self.assertEqual(results_three.hits[0]["flag_uuid"], "00000000-0000-0000-0000-000000000040")
self.assertEqual(results_three.hits[0]["reference"], "109")
self.assertEqual(results_three.hits[0]["reference"], "6897")

self.assertEqual(results_three.hits[1]["name"], "HAJI KHAIRULLAH HAJI SATTAR MONEY EXCHANGE")
self.assertEqual(results_three.hits[1]["flag_uuid"], "00000000-0000-0000-0000-000000000041")
Expand All @@ -253,8 +251,8 @@ def test_get_office_financial_sanctions_implementation(self):
def test_get_uk_sanctions_list(self):
book = pyexcel.get_book(
bookdict={
"Sheet 1": [["a", "b", "c"], [1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
"Sheet 2": [["x", "y", "z"], [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
"Sheet 1": [[], [], ["a", "b", "c"], [1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
"Sheet 2": [[], [], ["x", "y", "z"], [1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
}
)
with mock.patch.object(pyexcel, "get_book", return_value=book):
Expand Down
1 change: 1 addition & 0 deletions pii-secret-exclude.txt
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,4 @@ api/applications/tests/test_copy_application.py
api/applications/migrations/0010_auto_20200303_1626.py
api/letter_templates/templates/letter_templates/ecju_base.html
api/external_data/tests/denial_invalid.csv
api/external_data/management/commands/tests/test_ingest_un_consolidated_list.py

0 comments on commit 8e1eec8

Please sign in to comment.