Skip to content

Commit

Permalink
Merge pull request #25 from CogStack/generate_location_es
Browse files Browse the repository at this point in the history
CU-33nkxtg add sample scripts with geolocation for ES demos
  • Loading branch information
vladd-bit authored Jul 4, 2024
2 parents 80b5c05 + 6289b60 commit 5d27725
Show file tree
Hide file tree
Showing 3 changed files with 1,993 additions and 1 deletion.
2 changes: 1 addition & 1 deletion nifi/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ RUN apt-get clean autoclean && apt-get autoremove --purge -y
RUN pip3 install --no-cache-dir --upgrade pip && \
pip3 install --no-cache-dir setuptools dvc flask GitPython elasticsearch opensearch-py neo4j eland --ignore-installed PyYAML && \
pip3 install --no-cache-dir pytesseract ipyparallel py7zr cython isort html2text jsoncsv simplejson detect wheel nltk keras bokeh seaborn matplotlib graphviz plotly tqdm && \
pip3 install --no-cache-dir pymssql mysql-connector-python cx-Oracle dataclasses numpy matplotlib pandas dill jsonpickle jsonext psycopg2-binary python-certifi-win32 certifi pyodbc openpyxl
pip3 install --no-cache-dir pymssql mysql-connector-python cx-Oracle dataclasses numpy matplotlib pandas dill jsonpickle jsonext psycopg2-binary python-certifi-win32 certifi pyodbc openpyxl rancoord

# XNAT
RUN pip3 install --no-cache-dir xnat
Expand Down
65 changes: 65 additions & 0 deletions nifi/user-scripts/generate_location.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import rancoord as rc
import os
import json
import sys
import traceback
from random import randrange

global LOCATIONS
global USER_SCRIPT_LOGS_DIR
global SUBJECT_ID_FIELD_NAME
global LOCATION_NAME_FIELD

global output_stream

LOG_FILE_NAME = "location_gen.log"
LOCATION_NAME_FIELD = "gen_location"

for arg in sys.argv:
_arg = arg.split("=", 1)
if _arg[0] == "locations":
LOCATIONS = _arg[1]
elif _arg[0] == "user_script_logs_dir":
USER_SCRIPT_LOGS_DIR = _arg[1]
elif _arg[0] == "subject_id_field":
SUBJECT_ID_FIELD_NAME = _arg[1]
elif _arg[0] == "location_name_field":
LOCATION_NAME_FIELD = _arg[1]

# generates a map polygon based on city names given
def poly_creator(city: str):
box = rc.nominatim_geocoder(city)
poly = rc.polygon_from_boundingbox(box)
return poly

def main():
input_stream = sys.stdin.read()

try:
log_file_path = os.path.join(USER_SCRIPT_LOGS_DIR, str(LOG_FILE_NAME))
patients = json.loads(input_stream)

locations = [poly_creator(location) for location in LOCATIONS.split(",")]

output_stream = []
for patient in patients:
to_append = {}

id = patient["_source"][SUBJECT_ID_FIELD_NAME]
idx = randrange(len(locations)) # pick a random location specified
lat, lon, _ = rc.coordinates_randomizer(polygon = locations[idx], num_locations = 1) # generate latitude and longitude

to_append[SUBJECT_ID_FIELD_NAME] = id
to_append[LOCATION_NAME_FIELD] = "POINT (" + str(lon[0]) + " " + str(lat[0]) + ")"
output_stream.append(to_append)
except Exception as exception:
if os.path.exists(log_file_path):
with open(log_file_path, "a+") as log_file:
log_file.write("\n" + str(traceback.print_exc()))
else:
with open(log_file_path, "a+") as log_file:
log_file.write("\n" + str(traceback.print_exc()))
finally:
return output_stream

sys.stdout.write(json.dumps(main()))
Loading

0 comments on commit 5d27725

Please sign in to comment.