Skip to content

Commit

Permalink
Merge pull request #1215 from resource-watch/cit_002
Browse files Browse the repository at this point in the history
update to match new file structure from source
  • Loading branch information
weiqi-tori authored Nov 5, 2024
2 parents e647587 + 601fafe commit 70370d7
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 11 deletions.
3 changes: 2 additions & 1 deletion bio_007_world_database_on_protected_areas/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.9
FROM python:3.10
LABEL maintainer="Weiqi Zhou <[email protected]>"
#Note this script was originally developed by Yujing Wu <[email protected]>

Expand All @@ -14,6 +14,7 @@ RUN pip install numpy
RUN pip install pandas
RUN pip install python-rapidjson
RUN pip install geopandas==1.0.1
RUN pip install fiona

# set name
ARG NAME=nrt-script
Expand Down
20 changes: 13 additions & 7 deletions bio_007_world_database_on_protected_areas/contents/src/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import glob
import warnings
import json
import fiona
warnings.simplefilter(action='ignore', category=UserWarning)


Expand Down Expand Up @@ -189,7 +190,7 @@ def fetch_data():
# maximum number of attempts
n_tries = 5
# retrieve the current date
date = datetime.datetime.utcnow()
date = datetime.datetime.now(datetime.timezone.utc)
fetch_exception = None
for i in range(0, n_tries):
try:
Expand Down Expand Up @@ -360,6 +361,10 @@ def processData(gdb, existing_ids):
INPUT gdb: fetched geodatabase with new data (geodatabase)
RETURN all_ids: a list storing all the wdpa_pids in the current dataframe (list of strings)
'''
# retrieve the current date
date = datetime.datetime.now(datetime.timezone.utc)
date_str = date.strftime("%b%Y")

# whether we have reached the last slice
last_slice = False
# the index of the first row we want to import from the geodatabase
Expand All @@ -375,7 +380,7 @@ def processData(gdb, existing_ids):
# deal with the large geometries first
for i in range(0, 100000000):
# import a slice of the geopandas dataframe
gdf = gpd.read_file(gdb, driver='FileGDB', layer = 0, encoding='utf-8', rows = slice(start, end))
gdf = gpd.read_file(gdb, driver='FileGDB', layer =f'WDPA_poly_{date_str}', encoding='utf-8', rows = slice(start, end), engine="fiona")
if '555643543' in gdf['WDPA_PID'].to_list():
# isolate the large polygon
gdf_large = gdf.loc[gdf['WDPA_PID'] =='555643543']
Expand All @@ -394,6 +399,7 @@ def processData(gdb, existing_ids):
end = start
start -= step

# process WDPA_poly
# the index of the first row we want to import from the geodatabase
start = -100
# the number of rows we want to fetch and process each time
Expand All @@ -402,7 +408,7 @@ def processData(gdb, existing_ids):
end = None
for i in range(0, 100000000):
# import a slice of the geopandas dataframe
gdf = gpd.read_file(gdb, driver='FileGDB', layer = 0, encoding='utf-8', rows = slice(start, end))
gdf = gpd.read_file(gdb, driver='FileGDB', layer =f'WDPA_poly_{date_str}', encoding='utf-8', rows = slice(start, end), engine="fiona")
# get rid of the \r\n in the wdpa_pid column
gdf['WDPA_PID'] = [x.split('\r\n')[0] for x in gdf['WDPA_PID']]
# create a new column to store the status_yr column as timestamps
Expand Down Expand Up @@ -445,7 +451,7 @@ def processData(gdb, existing_ids):
start = 0
last_slice = True
else:
# we've processed the whole dataframe
# we've processed the whole poly dataframe
break

return(all_ids)
Expand All @@ -458,7 +464,7 @@ def updateResourceWatch(num_new):
# If there are new entries in the Carto table
if num_new>0:
# Update dataset's last update date on Resource Watch
most_recent_date = datetime.datetime.utcnow()
most_recent_date = datetime.datetime.now(datetime.timezone.utc)
lastUpdateDate(DATASET_ID, most_recent_date)

# Update the dates on layer legends - TO BE ADDED IN FUTURE
Expand All @@ -472,9 +478,9 @@ def check_first_run(existing_ids):
# get current last updated date
dataLastUpdated = json.loads(r.content.decode('utf-8'))['data']['attributes']['dataLastUpdated']
# Check if it's more then 10 days ago
if datetime.datetime.utcnow() - datetime.datetime.strptime(dataLastUpdated, "%Y-%m-%dT%H:%M:%S.%fZ") > datetime.timedelta(days=10):
if datetime.datetime.now(datetime.timezone.utc) - datetime.datetime.strptime(dataLastUpdated, "%Y-%m-%dT%H:%M:%S.%fZ").replace(tzinfo=datetime.timezone.utc) > datetime.timedelta(days=10):
# update last update date
lastUpdateDate(DATASET_ID, datetime.datetime.utcnow())
lastUpdateDate(DATASET_ID, datetime.datetime.now(datetime.timezone.utc))
# set CLEAR_TABLE_FIRST to True
CLEAR_TABLE_FIRST = True
# clear existing_ids
Expand Down
2 changes: 1 addition & 1 deletion bio_007b_nrt_rw0_marine_protected_areas/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM python:3.9
FROM python:3.10
LABEL maintainer="Weiqi Zhou <[email protected]>"
# Note this script was originally developed by Yujing Wu <[email protected]>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,7 @@ def fetch():
'''
# pull the data from the url
n_tries = 5
date = datetime.datetime.utcnow()
date = datetime.datetime.now(datetime.timezone.utc)
fetch_exception = None
for i in range(0, n_tries):
try:
Expand Down Expand Up @@ -355,7 +355,7 @@ def updateResourceWatch(num_new):
# If there have been data uploaded to the Carto table
if num_new > 0:
# Update dataset's last update date on Resource Watch
most_recent_date = datetime.datetime.utcnow()
most_recent_date = datetime.datetime.now(datetime.timezone.utc)
lastUpdateDate(DATASET_ID, most_recent_date)

def main():
Expand Down

0 comments on commit 70370d7

Please sign in to comment.