Skip to content

Commit

Permalink
simplify the annotation interface (#1022)
Browse files Browse the repository at this point in the history
  • Loading branch information
BryanFauble authored Dec 12, 2023
1 parent 84a5369 commit d370b46
Show file tree
Hide file tree
Showing 11 changed files with 184 additions and 137 deletions.
17 changes: 9 additions & 8 deletions synapseclient/api/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from typing import TYPE_CHECKING, Optional
from synapseclient import Synapse
from synapseclient.annotations import _convert_to_annotations_list
from opentelemetry import context

if TYPE_CHECKING:
Expand All @@ -29,16 +30,16 @@ def set_annotations(
"""
annotations_dict = asdict(annotations)

# TODO: Is there a more elegant way to handle this - This is essentially being used
# TODO: to remove any fields that are not expected by the REST API.
filtered_dict = {
k: v for k, v in annotations_dict.items() if v is not None and k != "is_loaded"
}
synapse_annotations = _convert_to_annotations_list(annotations_dict["annotations"])

# TODO: This `restPUT` returns back a dict (or string) - Could we use:
# TODO: https://github.com/konradhalas/dacite to convert the dict to an object?
return Synapse.get_client(synapse_client=synapse_client).restPUT(
f"/entity/{annotations.id}/annotations2",
body=json.dumps(filtered_dict),
body=json.dumps(
{
"id": annotations.id,
"etag": annotations.etag,
"annotations": synapse_annotations,
}
),
opentelemetry_context=opentelemetry_context,
)
4 changes: 0 additions & 4 deletions synapseclient/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
# These are all of the models that are used by the Synapse client.
from synapseclient.models.annotations import (
Annotations,
AnnotationsValue,
AnnotationsValueType,
)
from synapseclient.models.file import File
from synapseclient.models.folder import Folder
Expand All @@ -22,8 +20,6 @@
"Folder",
"Project",
"Annotations",
"AnnotationsValue",
"AnnotationsValueType",
"Table",
"Column",
"ColumnType",
Expand Down
65 changes: 24 additions & 41 deletions synapseclient/models/annotations.py
Original file line number Diff line number Diff line change
@@ -1,44 +1,29 @@
import asyncio

from enum import Enum
from datetime import datetime, date
from dataclasses import dataclass
from typing import Dict, List, Optional, Union
from synapseclient.api import set_annotations
from opentelemetry import trace, context

from synapseclient import Synapse
from synapseclient.annotations import ANNO_TYPE_TO_FUNC


tracer = trace.get_tracer("synapseclient")


class AnnotationsValueType(str, Enum):
"""The acceptable types that an annotation value can be."""

STRING = "STRING"
DOUBLE = "DOUBLE"
LONG = "LONG"
TIMESTAMP_MS = "TIMESTAMP_MS"
BOOLEAN = "BOOLEAN"


@dataclass()
class AnnotationsValue:
"""A specific type of annotation and the values that are of that type."""

# TODO: Currently this is required - However, we do have the ability to make some assumptions based
# TODO: On the value types. For example, if it's a str, datetime, bool, etc...
type: AnnotationsValueType
# TODO: What are all the python types we are going to accept here
value: List[Union[str, bool]]


@dataclass()
class Annotations:
"""Annotations that can be applied to a number of Synapse resources to provide additional information."""

annotations: Dict[str, AnnotationsValue]
""" Additional metadata associated with the object. The key is the name of your
annotations: Dict[
str,
Union[
List[str], List[bool], List[float], List[int], List[date], List[datetime]
],
]
"""Additional metadata associated with the object. The key is the name of your
desired annotations. The value is an object containing a list of string values
(use empty list to represent no values for key) and the value type associated with
all values in the list
Expand Down Expand Up @@ -88,13 +73,13 @@ async def get(self):
@classmethod
def convert_from_api_parameters(
self, synapse_annotations: dict
) -> Dict[str, AnnotationsValue]:
"""Convert the annotations from the synapse API to the model."""
# TODO: This is not great logic and needs to be revisted. Ideally the annotations
# TODO: returned as the same during a `.get` and `.store` call. Currently they are not
# TODO: This also prevents us from using the annotations returned from a `.get` call to store them again.
# TODO: Also there is difference in timestamp being transferred - The API is expecting milliseconds
# TODO: But in most cases the python client is returning datetime.
) -> Dict[str, List[Union[str, bool, float, int, date, datetime]]]:
"""Convert the annotations from the format the synapse rest API works in -
to the format used by this class.
:param synapse_annotations: The annotations from the synapse rest API.
:return: The annotations in python class format.
"""
if synapse_annotations is None:
return None
annotations = {}
Expand All @@ -104,14 +89,12 @@ def convert_from_api_parameters(
else synapse_annotations
)
for key in dict_to_convert:
# TODO: How can we determine which type is being used when it is not provided in the response from the python client.
value = (
dict_to_convert[key]["value"]
if "value" in dict_to_convert[key]
else dict_to_convert[key]
)
annotations[key] = AnnotationsValue(
type=None,
value=value,
)
if isinstance(dict_to_convert[key], dict):
conversion_func = ANNO_TYPE_TO_FUNC[dict_to_convert[key]["type"]]
annotations[key] = [
conversion_func(v) for v in dict_to_convert[key]["value"]
]
else:
annotations[key] = dict_to_convert[key]

return annotations
19 changes: 16 additions & 3 deletions synapseclient/models/file.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import asyncio
from dataclasses import dataclass
from typing import Dict, Union
from datetime import date, datetime
from typing import Dict, List, Union
from opentelemetry import trace, context
from synapseclient.models import AnnotationsValue, Annotations
from synapseclient.models import Annotations

# import uuid

Expand Down Expand Up @@ -77,7 +78,19 @@ class File:
"""An optional replacement for the name of the uploaded file. This is distinct
from the entity name. If omitted the file will retain its original name."""

annotations: Optional[Dict[str, AnnotationsValue]] = None
annotations: Optional[
Dict[
str,
Union[
List[str],
List[bool],
List[float],
List[int],
List[date],
List[datetime],
],
]
] = None
"""Additional metadata associated with the folder. The key is the name of your
desired annotations. The value is an object containing a list of values
(use empty list to represent no values for key) and the value type associated with
Expand Down
17 changes: 15 additions & 2 deletions synapseclient/models/folder.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
from dataclasses import dataclass, field
from datetime import date, datetime
from typing import Dict, List, Union
from typing import Optional, TYPE_CHECKING
from opentelemetry import trace, context
Expand All @@ -8,7 +9,7 @@

from synapseclient import Synapse
from synapseclient.entity import Folder as Synapse_Folder
from synapseclient.models import File, Annotations, AnnotationsValue
from synapseclient.models import File, Annotations

if TYPE_CHECKING:
from synapseclient.models import Project
Expand Down Expand Up @@ -57,7 +58,19 @@ class Folder:
folders: Optional[List["Folder"]] = field(default_factory=list)
"""Folders that exist within this folder."""

annotations: Optional[Dict[str, AnnotationsValue]] = None
annotations: Optional[
Dict[
str,
Union[
List[str],
List[bool],
List[float],
List[int],
List[date],
List[datetime],
],
]
] = None
"""Additional metadata associated with the folder. The key is the name of your
desired annotations. The value is an object containing a list of values
(use empty list to represent no values for key) and the value type associated with
Expand Down
19 changes: 16 additions & 3 deletions synapseclient/models/project.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
from dataclasses import dataclass, field
from typing import List, Dict
from datetime import date, datetime
from typing import List, Dict, Union

# import uuid

Expand All @@ -9,7 +10,7 @@

from typing import Optional

from synapseclient.models import Folder, File, Annotations, AnnotationsValue
from synapseclient.models import Folder, File, Annotations
from synapseclient import Synapse


Expand Down Expand Up @@ -54,7 +55,19 @@ class Project:
folders: Optional[List["Folder"]] = field(default_factory=list)
"""Any folders that are at the root directory of the project."""

annotations: Optional[Dict[str, AnnotationsValue]] = None
annotations: Optional[
Dict[
str,
Union[
List[str],
List[bool],
List[float],
List[int],
List[date],
List[datetime],
],
]
] = None
"""Additional metadata associated with the folder. The key is the name of your
desired annotations. The value is an object containing a list of values
(use empty list to represent no values for key) and the value type associated with
Expand Down
22 changes: 20 additions & 2 deletions synapseclient/models/table.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
from dataclasses import dataclass
from datetime import date, datetime
from enum import Enum
import os
from typing import Any, Dict, List, Optional, Union
Expand All @@ -14,7 +15,7 @@
TableQueryResult as Synaspe_TableQueryResult,
delete_rows,
)
from synapseclient.models import Annotations, AnnotationsValue
from synapseclient.models import Annotations
from opentelemetry import trace, context


Expand Down Expand Up @@ -385,7 +386,19 @@ class Table:
should be enabled. Note that enabling full text search might slow down the
indexing of the table or view."""

annotations: Optional[Dict[str, AnnotationsValue]] = None
annotations: Optional[
Dict[
str,
Union[
List[str],
List[bool],
List[float],
List[int],
List[date],
List[datetime],
],
]
] = None
"""Additional metadata associated with the table. The key is the name of your
desired annotations. The value is an object containing a list of values
(use empty list to represent no values for key) and the value type associated with
Expand Down Expand Up @@ -474,6 +487,11 @@ async def store_schema(self, synapse_client: Optional[Synapse] = None) -> "Table
with tracer.start_as_current_span(f"Table_Schema_Store: {self.name}"):
tasks = []
if self.columns:
# TODO: When a table is retrieved via `.get()` we create Column objects but
# TODO: We only have the ID attribute. THis is causing this if check to eval
# TODO: To True, however, we aren't actually modifying the column.
# TODO: Perhaps we should have a `has_changed` boolean on all dataclasses
# TODO: That we can check to see if we need to store the data.
tasks.extend(
column.store(synapse_client=synapse_client)
for column in self.columns
Expand Down
38 changes: 21 additions & 17 deletions test_scripts/oop_poc_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
"""
import asyncio
import os

from synapseclient.models import (
File,
Folder,
AnnotationsValueType,
AnnotationsValue,
)
from datetime import date, datetime, timedelta, timezone
import synapseclient

from opentelemetry import trace
Expand Down Expand Up @@ -50,21 +50,19 @@ def create_random_file(
async def store_file():
# Creating annotations for my file ==================================================
annotations_for_my_file = {
"my_key_string": AnnotationsValue(
type=AnnotationsValueType.STRING, value=["b", "a", "c"]
),
"my_key_bool": AnnotationsValue(
type=AnnotationsValueType.BOOLEAN, value=[False, False, False]
),
"my_key_double": AnnotationsValue(
type=AnnotationsValueType.DOUBLE, value=[1.2, 3.4, 5.6]
),
"my_key_long": AnnotationsValue(
type=AnnotationsValueType.LONG, value=[1, 2, 3]
),
"my_key_timestamp": AnnotationsValue(
type=AnnotationsValueType.TIMESTAMP_MS, value=[1701362964066, 1577862000000]
),
"my_single_key_string": "a",
"my_key_string": ["b", "a", "c"],
"my_key_bool": [False, False, False],
"my_key_double": [1.2, 3.4, 5.6],
"my_key_long": [1, 2, 3],
"my_key_date": [date.today(), date.today() - timedelta(days=1)],
"my_key_datetime": [
datetime.today(),
datetime.today() - timedelta(days=1),
datetime.now(tz=timezone(timedelta(hours=-5))),
datetime(2023, 12, 7, 13, 0, 0, tzinfo=timezone(timedelta(hours=0))),
datetime(2023, 12, 7, 13, 0, 0, tzinfo=timezone(timedelta(hours=-7))),
],
}

name_of_file = "my_file_with_random_data.txt"
Expand All @@ -84,6 +82,12 @@ async def store_file():

print(file)

# Updating and storing an annotation =================================================
file_copy = await File(id=file.id).get()
file_copy.annotations["my_key_string"] = ["new", "values", "here"]
stored_file = await file_copy.store()
print(stored_file)

# Downloading a file =================================================================
downloaded_file_copy = await File(id=file.id).get(
download_location=os.path.expanduser("~/temp/myNewFolder")
Expand Down
Loading

0 comments on commit d370b46

Please sign in to comment.