Skip to content

Commit

Permalink
[SYNPY-1358] Correction of timestamp in annotations from manifest file (
Browse files Browse the repository at this point in the history
#1020)

* Correction when a date is used in an annotation file not persisting as a date. Previously this was converting to a Text type annotation. This means the manifest is time zone aware and if a timezone is not defined it is assumed to use the timezone of the machine where the code is ran. https://docs.python.org/3/library/datetime.html#datetime.datetime.fromisoformat
  • Loading branch information
BryanFauble authored Dec 6, 2023
1 parent aaef885 commit 219fd89
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 45 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ build/*
/venv

.vscode/
CONFIGFILE
96 changes: 79 additions & 17 deletions synapseclient/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,13 @@
import sys
import tempfile
import threading
import typing
import urllib.parse as urllib_parse
import uuid
import warnings


UNIX_EPOCH = datetime.datetime(1970, 1, 1, 0, 0)
UNIX_EPOCH = datetime.datetime(1970, 1, 1, 0, 0, tzinfo=datetime.timezone.utc)
ISO_FORMAT = "%Y-%m-%dT%H:%M:%S.000Z"
ISO_FORMAT_MICROS = "%Y-%m-%dT%H:%M:%S.%fZ"
GB = 2**30
Expand Down Expand Up @@ -325,6 +326,25 @@ def is_synapse_id_str(obj):
return None


def datetime_or_none(datetime_str: str) -> typing.Union[datetime.datetime, None]:
"""Attempts to convert a string to a datetime object. Returns None if it fails.
Some of the expected formats of datetime_str are:
- 2023-12-04T07:00:00Z
- 2001-01-01 15:00:00+07:00
- 2001-01-01 15:00:00-07:00
- 2023-12-04 07:00:00+00:00
- 2019-01-01
:param datetime_str: The string to convert to a datetime object
:return: The datetime object or None if the conversion fails
"""
try:
return datetime.datetime.fromisoformat(datetime_str.replace("Z", "+00:00"))
except Exception:
return None


def is_date(dt):
"""Objects of class datetime.date and datetime.datetime will be recognized as dates"""
return isinstance(dt, datetime.date) or isinstance(dt, datetime.datetime)
Expand All @@ -333,9 +353,18 @@ def is_date(dt):
def to_list(value):
"""Convert the value (an iterable or a scalar value) to a list."""
if isinstance(value, collections.abc.Iterable) and not isinstance(value, str):
return list(value)
values = []
for val in value:
possible_datetime = None
if isinstance(val, str):
possible_datetime = datetime_or_none(value)
values.append(val if possible_datetime is None else possible_datetime)
return values
else:
return [value]
possible_datetime = None
if isinstance(value, str):
possible_datetime = datetime_or_none(value)
return [value if possible_datetime is None else possible_datetime]


def _to_iterable(value):
Expand Down Expand Up @@ -395,26 +424,48 @@ def make_bogus_binary_file(n=1 * KB, filepath=None, printprogress=False):
return normalize_path(filepath)


def to_unix_epoch_time(dt):
def to_unix_epoch_time(dt: typing.Union[datetime.date, datetime.datetime, str]) -> int:
"""
Convert either `datetime.date or datetime.datetime objects <http://docs.python.org/2/library/datetime.html>`_
to UNIX time.
"""

if type(dt) == str:
dt = datetime.datetime.fromisoformat(dt.replace("Z", "+00:00"))
if type(dt) == datetime.date:
return (dt - UNIX_EPOCH.date()).total_seconds() * 1000
return int((dt - UNIX_EPOCH).total_seconds() * 1000)


def to_unix_epoch_time_secs(dt):
current_timezone = datetime.datetime.now().astimezone().tzinfo
datetime_utc = datetime.datetime.combine(dt, datetime.time(0, 0, 0, 0)).replace(
tzinfo=current_timezone
)
else:
# If the datetime is not timezone aware, assume it is in the local timezone.
# This is required in order for windows to work with the `astimezone` method.
if dt.tzinfo is None:
current_timezone = datetime.datetime.now().astimezone().tzinfo
dt = dt.replace(tzinfo=current_timezone)
datetime_utc = dt.astimezone(datetime.timezone.utc)
return int((datetime_utc - UNIX_EPOCH).total_seconds() * 1000)


def to_unix_epoch_time_secs(
dt: typing.Union[datetime.date, datetime.datetime]
) -> float:
"""
Convert either `datetime.date or datetime.datetime objects <http://docs.python.org/2/library/datetime.html>`_
to UNIX time.
"""

if type(dt) == datetime.date:
return (dt - UNIX_EPOCH.date()).total_seconds()
return (dt - UNIX_EPOCH).total_seconds()
current_timezone = datetime.datetime.now().astimezone().tzinfo
datetime_utc = datetime.datetime.combine(dt, datetime.time(0, 0, 0, 0)).replace(
tzinfo=current_timezone
)
else:
# If the datetime is not timezone aware, assume it is in the local timezone.
# This is required in order for windows to work with the `astimezone` method.
if dt.tzinfo is None:
current_timezone = datetime.datetime.now().astimezone().tzinfo
dt = dt.replace(tzinfo=current_timezone)
datetime_utc = dt.astimezone(datetime.timezone.utc)
return (datetime_utc - UNIX_EPOCH).total_seconds()


def from_unix_epoch_time_secs(secs):
Expand All @@ -426,12 +477,23 @@ def from_unix_epoch_time_secs(secs):
# so, here's a hack that enables ancient events, such as Chris's birthday to be
# converted from milliseconds since the UNIX epoch to higher level Datetime objects. Ha!
if platform.system() == "Windows" and secs < 0:
mirror_date = datetime.datetime.utcfromtimestamp(abs(secs))
return UNIX_EPOCH - (mirror_date - UNIX_EPOCH)
return datetime.datetime.utcfromtimestamp(secs)
mirror_date = datetime.datetime.utcfromtimestamp(abs(secs)).replace(
tzinfo=datetime.timezone.utc
)

result = (UNIX_EPOCH - (mirror_date - UNIX_EPOCH)).replace(
tzinfo=datetime.timezone.utc
)

return result
datetime_instance = datetime.datetime.utcfromtimestamp(secs).replace(
tzinfo=datetime.timezone.utc
)

return datetime_instance


def from_unix_epoch_time(ms):
def from_unix_epoch_time(ms) -> datetime.datetime:
"""Returns a Datetime object given milliseconds since midnight Jan 1, 1970."""

if isinstance(ms, str):
Expand Down
28 changes: 20 additions & 8 deletions synapseutils/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .monitor import notifyMe
from synapseclient.entity import is_container
from synapseclient.core import config
from synapseclient.core.utils import id_of, is_url, is_synapse_id_str
from synapseclient.core.utils import id_of, is_url, is_synapse_id_str, datetime_or_none
from synapseclient import File, table
from synapseclient.core.pool_provider import SingleThreadExecutor
from synapseclient.core import utils
Expand Down Expand Up @@ -1059,12 +1059,13 @@ def syncToSynapse(
**Example manifest file**
=============== ======== ======= ======= =========================== ============================
path parent annot1 annot2 used executed
=============== ======== ======= ======= =========================== ============================
/path/file1.txt syn1243 "bar" 3.1415 "syn124;/path/file2.txt" "https://github.org/foo/bar"
/path/file2.txt syn12433 "baz" 2.71 "" "https://github.org/foo/baz"
=============== ======== ======= ======= =========================== ============================
=============== ======== ======= ======= ========================= =========================== ============================
path parent annot1 annot2 collection_date used executed
=============== ======== ======= ======= ========================= =========================== ============================
/path/file1.txt syn1243 "bar" 3.1415 2023-12-04 07:00:00+00:00 "syn124;/path/file2.txt" "https://github.org/foo/bar"
/path/file2.txt syn12433 "baz" 2.71 2001-01-01 15:00:00+07:00 "" "https://github.org/foo/baz"
/path/file3.txt syn12455 "zzz" 3.52 2023-12-04T07:00:00Z "" "https://github.org/foo/zzz"
=============== ======== ======= ======= ========================= =========================== ============================
"""
df = readManifestFile(syn, manifestFile)
Expand Down Expand Up @@ -1115,7 +1116,18 @@ def _manifest_upload(syn, df):

# if a item in the manifest upload is an empty string we do not want to upload that
# as an empty string annotation
file.annotations = {k: v for k, v in annotations.items() if v != ""}
file_annotations = {}

for annotation_key, annotation_value in annotations.items():
if annotation_value is None or annotation_value == "":
continue
possible_datetime = None
if isinstance(annotation_value, str):
possible_datetime = datetime_or_none(annotation_value)
file_annotations[annotation_key] = (
annotation_value if possible_datetime is None else possible_datetime
)
file.annotations = file_annotations

item = _SyncUploadItem(
file,
Expand Down
9 changes: 8 additions & 1 deletion tests/integration/conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import platform
import uuid
import os
import os, time
import sys
import shutil
import tempfile
Expand Down Expand Up @@ -151,3 +151,10 @@ def setup_otel():
)
else:
trace.set_tracer_provider(TracerProvider(sampler=ALWAYS_OFF))


@pytest.fixture(autouse=True)
def set_timezone():
os.environ["TZ"] = "UTC"
if platform.system() != "Windows":
time.tzset()
7 changes: 4 additions & 3 deletions tests/integration/synapseclient/integration_test_Entity.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import datetime
import uuid
import filecmp
import os
Expand Down Expand Up @@ -82,7 +83,7 @@ def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup):
), ("description= %s" % a_file.description)
assert a_file["foo"][0] == "An arbitrary value", "foo= %s" % a_file["foo"][0]
assert a_file["bar"] == [33, 44, 55]
assert a_file["bday"][0] == Datetime(2013, 3, 15)
assert a_file["bday"][0] == Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc)
assert a_file.contentType == "text/flapdoodle", (
"contentType= %s" % a_file.contentType
)
Expand All @@ -107,7 +108,7 @@ def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup):
a_file = syn.store(a_file, forceVersion=False)
assert a_file["foo"][0] == "Another arbitrary chunk of text data"
assert a_file["bar"] == [33, 44, 55]
assert a_file["bday"][0] == Datetime(2013, 3, 15)
assert a_file["bday"][0] == Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc)
assert a_file.new_key[0] == "A newly created value"
assert a_file.path == path
assert a_file.versionNumber == 1, "unexpected version number: " + str(
Expand All @@ -134,7 +135,7 @@ def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup):
link = syn.get(link, followLink=True)
assert link["foo"][0] == "Another arbitrary chunk of text data"
assert link["bar"] == [33, 44, 55]
assert link["bday"][0] == Datetime(2013, 3, 15)
assert link["bday"][0] == Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc)
assert link.new_key[0] == "A newly created value"
assert utils.equal_paths(link.path, path)
assert link.versionNumber == 1, "unexpected version number: " + str(
Expand Down
6 changes: 4 additions & 2 deletions tests/integration/synapseclient/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import tempfile
import time
import uuid
from datetime import datetime
from datetime import datetime, timezone

from pandas.testing import assert_frame_equal
import pytest
Expand Down Expand Up @@ -564,7 +564,9 @@ def test_synapse_integer_columns_with_missing_values_from_dataframe(

@tracer.start_as_current_span("test_tables::test_store_table_datetime")
def test_store_table_datetime(syn, project):
current_datetime = datetime.fromtimestamp(round(time.time(), 3))
current_datetime = datetime.fromtimestamp(round(time.time(), 3)).replace(
tzinfo=timezone.utc
)
schema = syn.store(
Schema("testTable", [Column(name="testerino", columnType="DATE")], project)
)
Expand Down
43 changes: 34 additions & 9 deletions tests/integration/synapseutils/test_synapseutils_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import tempfile
from func_timeout import FunctionTimedOut, func_set_timeout
import pandas as pd
import numpy as np

import pytest

Expand Down Expand Up @@ -33,12 +34,15 @@ def __init__(self):
self.f2 = utils.make_bogus_data_file(n=10)
self.f3 = "https://www.synapse.org"

self.header = "path parent used executed activityName synapseStore foo\n"
self.row1 = '%s %s %s "%s;https://www.example.com" provName bar\n' % (
self.f1,
self.project.id,
self.f2,
self.f3,
self.header = "path parent used executed activityName synapseStore foo date_1 datetime_1 datetime_2 datetime_3\n"
self.row1 = (
'%s %s %s "%s;https://www.example.com" provName bar 2020-01-01 2023-12-04T07:00:00Z 2023-12-05 23:37:02.995000+00:00 2023-12-05 07:00:00+00:00\n'
% (
self.f1,
self.project.id,
self.f2,
self.f3,
)
)
self.row2 = (
'%s %s "syn12" " syn123 ;https://www.example.com" provName2 bar\n'
Expand Down Expand Up @@ -157,9 +161,30 @@ def test_syncToSynapse(test_state):
assert (
orig_anots.shape[1] == new_anots.shape[1]
) # Verify that we have the same number of cols
assert new_anots.equals(
orig_anots.loc[:, new_anots.columns]
), "Annotations different"

assert new_anots.loc[:]["foo"].equals(orig_anots.loc[:]["foo"])
# The dates in the manifest can accept a variety of formats, however we are always writing
# them back in the same expected format. Verify they're converted correctly.
assert new_anots.loc[:]["date_1"].tolist() == [
"2020-01-01 00:00:00+00:00",
np.nan,
np.nan,
]
assert new_anots.loc[:]["datetime_1"].tolist() == [
"2023-12-04 07:00:00+00:00",
np.nan,
np.nan,
]
assert new_anots.loc[:]["datetime_2"].tolist() == [
"2023-12-05 23:37:02.995000+00:00",
np.nan,
np.nan,
]
assert new_anots.loc[:]["datetime_3"].tolist() == [
"2023-12-05 07:00:00+00:00",
np.nan,
np.nan,
]

# Validate that provenance is correct
for provenanceType in ["executed", "used"]:
Expand Down
9 changes: 9 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import logging
import platform
import urllib.request

from unittest import mock
import pytest
import os, time

from synapseclient import Synapse
from synapseclient.core.logging_setup import SILENT_LOGGER_NAME
Expand Down Expand Up @@ -42,6 +44,13 @@ def test_confirm_connections_blocked():
assert _BLOCKED_CONNECTION_MESSAGE == str(cm_ex.value)


@pytest.fixture(autouse=True)
def set_timezone():
os.environ["TZ"] = "UTC"
if platform.system() != "Windows":
time.tzset()


@pytest.fixture(scope="session")
def syn():
"""
Expand Down
11 changes: 6 additions & 5 deletions tests/unit/synapseclient/unit_test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
############################################################

from datetime import datetime as Datetime
import datetime
from math import pi
import time
import uuid
Expand Down Expand Up @@ -155,10 +156,10 @@ def test_round_trip_annotations():
"zoo": [123.1, 456.2, 789.3],
"species": ["Moose"],
"birthdays": [
Datetime(1969, 4, 28),
Datetime(1973, 12, 8),
Datetime(2008, 1, 3),
Datetime(2013, 3, 15),
Datetime(1969, 4, 28, tzinfo=datetime.timezone.utc),
Datetime(1973, 12, 8, tzinfo=datetime.timezone.utc),
Datetime(2008, 1, 3, tzinfo=datetime.timezone.utc),
Datetime(2013, 3, 15, tzinfo=datetime.timezone.utc),
],
"facts": [
True,
Expand Down Expand Up @@ -206,7 +207,7 @@ def test_idempotent_annotations():


def test_submission_status_annotations_round_trip():
april_28_1969 = Datetime(1969, 4, 28)
april_28_1969 = Datetime(1969, 4, 28, tzinfo=datetime.timezone.utc)
a = Annotations(
"syn123",
"7bdb83e9-a50a-46e4-987a-4962559f090f",
Expand Down

0 comments on commit 219fd89

Please sign in to comment.