Skip to content

Commit

Permalink
Fix for cache item names matching
Browse files Browse the repository at this point in the history
  • Loading branch information
BryanFauble committed Oct 30, 2023
1 parent 415fcb2 commit 69e6600
Show file tree
Hide file tree
Showing 2 changed files with 114 additions and 20 deletions.
22 changes: 20 additions & 2 deletions synapseclient/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,7 +1035,11 @@ def _getWithEntityBundle(self, entityBundle, entity=None, **kwargs):
if downloadFile:
if file_handle:
self._download_file_entity(
downloadLocation, entity, ifcollision, submission
downloadLocation,
entity,
ifcollision,
submission,
file_handle.get("contentMd5", None),
)
else: # no filehandle means that we do not have DOWNLOAD permission
warning_message = (
Expand All @@ -1062,7 +1066,12 @@ def _ensure_download_location_is_directory(self, downloadLocation):
return download_dir

def _download_file_entity(
self, downloadLocation: str, entity: Entity, ifcollision: str, submission: str
self,
downloadLocation: str,
entity: Entity,
ifcollision: str,
submission: str,
expected_md5: typing.Union[str, None],
):
# set the initial local state
entity.path = None
Expand All @@ -1074,6 +1083,15 @@ def _download_file_entity(
# downloaded the file
cached_file_path = self.cache.get(entity.dataFileHandleId, downloadLocation)

# This is to handle for cases where the names of the files in the cache and the
# requested file name match - But there is a difference in content.
if (
expected_md5 is not None
and cached_file_path is not None
and utils.md5_for_file(cached_file_path).hexdigest() != expected_md5
):
cached_file_path = None

# location in .synapseCache where the file would be corresponding to its FileHandleId
synapseCache_location = self.cache.get_cache_dir(entity.dataFileHandleId)

Expand Down
112 changes: 94 additions & 18 deletions tests/integration/synapseclient/integration_test_Entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,24 @@
import pytest
from unittest.mock import patch

from pytest_mock import MockerFixture


from synapseclient.core.upload.upload_functions import create_external_file_handle
from synapseclient import Activity, DockerRepository, File, Folder, Link, Project
from synapseclient import (
Activity,
DockerRepository,
File,
Folder,
Link,
Project,
Synapse,
)
from synapseclient.core.exceptions import SynapseError, SynapseHTTPError
import synapseclient.core.utils as utils


def test_Entity(syn, project, schedule_for_cleanup):
def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup):
# Update the project
project_name = str(uuid.uuid4())
project = Project(name=project_name)
Expand Down Expand Up @@ -152,7 +163,7 @@ def test_Entity(syn, project, schedule_for_cleanup):
assert os.path.basename(a_file_cached.path) == os.path.basename(a_file.path)


def test_special_characters(syn, project, schedule_for_cleanup):
def test_special_characters(syn: Synapse, project: Project):
folder = syn.store(
Folder(
"Special Characters Here",
Expand All @@ -174,7 +185,7 @@ def test_special_characters(syn, project, schedule_for_cleanup):
assert folder.russian_annotation[0] == "Обезьяна прикладом"


def test_get_local_file(syn, project, schedule_for_cleanup):
def test_get_local_file(syn: Synapse, project: Project, schedule_for_cleanup):
new_path = utils.make_bogus_data_file()
schedule_for_cleanup(new_path)
folder = Folder(
Expand Down Expand Up @@ -204,7 +215,7 @@ def test_get_local_file(syn, project, schedule_for_cleanup):
pytest.raises(SynapseError, syn.get, new_path, limitSearch="syn1")


def test_store_with_flags(syn, project, schedule_for_cleanup):
def test_store_with_flags(syn: Synapse, project: Project, schedule_for_cleanup):
# -- CreateOrUpdate flag for Projects --
# If we store a project with the same name, it should become an update
projUpdate = Project(project.name)
Expand Down Expand Up @@ -287,11 +298,14 @@ def test_store_with_flags(syn, project, schedule_for_cleanup):
assert ephemeralBogus.shoe_size == [11.5]


def test_get_with_downloadLocation_and_ifcollision(syn, project, schedule_for_cleanup):
def test_get_with_downloadLocation_and_ifcollision(
syn: Synapse, project: Project, schedule_for_cleanup
):
# Store a File and delete it locally
filepath = utils.make_bogus_binary_file()
bogus = File(filepath, name="Bogus Test File", parent=project)
bogus = syn.store(bogus)
schedule_for_cleanup(bogus)
os.remove(filepath)

# Compare stuff to this one
Expand Down Expand Up @@ -339,7 +353,65 @@ def test_get_with_downloadLocation_and_ifcollision(syn, project, schedule_for_cl
os.remove(renamedBogus.path)


def test_store_activity(syn, project, schedule_for_cleanup):
def test_get_with_cache_hit_and_miss_with_ifcollision(
syn: Synapse, project: Project, schedule_for_cleanup, mocker: MockerFixture
):
download_file_function = mocker.spy(obj=syn, name="_downloadFileHandle")
# GIVEN a File that is stored in Synapse - and removed from the local machine
filepath = utils.make_bogus_binary_file()
original_file_md5 = utils.md5_for_file(filepath).hexdigest()
bogus_file = File(
filepath, name="a_name_that_will_show_up_in_cache", parent=project
)
bogus_file = syn.store(bogus_file)
schedule_for_cleanup(bogus_file)
os.remove(filepath)

# WHEN I get the File from synapse
unmodified_file_from_server = syn.get(
bogus_file,
downloadLocation=os.path.dirname(filepath),
ifcollision="overwrite.local",
)

# THEN I expect the file to have been downloaded and match the original files MD5
assert download_file_function.call_count == 1
assert unmodified_file_from_server._file_handle["contentMd5"] == original_file_md5
assert utils.md5_for_file(filepath).hexdigest() == original_file_md5

# AND I expect the file to be in the cache and match the original files MD5
copy_of_file_from_server = syn.get(
bogus_file,
downloadLocation=os.path.dirname(filepath),
ifcollision="overwrite.local",
)
assert copy_of_file_from_server.id == unmodified_file_from_server.id
assert filecmp.cmp(copy_of_file_from_server.path, unmodified_file_from_server.path)
assert download_file_function.call_count == 1
assert copy_of_file_from_server._file_handle["contentMd5"] == original_file_md5
assert utils.md5_for_file(filepath).hexdigest() == original_file_md5

# GIVEN another bogus file with the same name and non matching MD5 data
new_bogus_file = utils.make_bogus_binary_file()
os.rename(new_bogus_file, filepath)
assert utils.md5_for_file(filepath).hexdigest() != original_file_md5

# WHEN I get the File from synapse
copy_of_file_from_server = syn.get(
bogus_file,
downloadLocation=os.path.dirname(filepath),
ifcollision="overwrite.local",
)

# THEN I expect the file to have been downloaded and overwrite the file on my machine with the same name
assert copy_of_file_from_server._file_handle["contentMd5"] == original_file_md5
assert utils.md5_for_file(filepath).hexdigest() == original_file_md5
assert download_file_function.call_count == 2

os.remove(filepath)


def test_store_activity(syn: Synapse, project: Project, schedule_for_cleanup):
# Create a File and an Activity
path = utils.make_bogus_binary_file()
schedule_for_cleanup(path)
Expand Down Expand Up @@ -388,7 +460,7 @@ def test_store_activity(syn, project, schedule_for_cleanup):
assert honking["id"] == honking2["id"]


def test_store_isRestricted_flag(syn, project, schedule_for_cleanup):
def test_store_isRestricted_flag(syn: Synapse, project: Project, schedule_for_cleanup):
# Store a file with access requirements
path = utils.make_bogus_binary_file()
schedule_for_cleanup(path)
Expand All @@ -402,7 +474,7 @@ def test_store_isRestricted_flag(syn, project, schedule_for_cleanup):
assert intercepted.called


def test_ExternalFileHandle(syn, project, schedule_for_cleanup):
def test_ExternalFileHandle(syn: Synapse, project: Project):
# Tests shouldn't have external dependencies, but this is a pretty picture of Singapore
singapore_url = "http://upload.wikimedia.org/wikipedia/commons/thumb/3/3e/1_singapore_city_skyline_dusk_panorama_2011.jpg/1280px-1_singapore_city_skyline_dusk_panorama_2011.jpg" # noqa
singapore = File(singapore_url, parent=project, synapseStore=False)
Expand Down Expand Up @@ -432,7 +504,7 @@ def test_ExternalFileHandle(syn, project, schedule_for_cleanup):
assert s2.externalURL == singapore_2_url


def test_synapseStore_flag(syn, project, schedule_for_cleanup):
def test_synapseStore_flag(syn: Synapse, project: Project, schedule_for_cleanup):
# Store a path to a local file
path = utils.make_bogus_data_file()
schedule_for_cleanup(path)
Expand Down Expand Up @@ -474,7 +546,7 @@ def test_synapseStore_flag(syn, project, schedule_for_cleanup):
assert not bogus.synapseStore


def test_create_or_update_project(syn, project, schedule_for_cleanup):
def test_create_or_update_project(syn: Synapse, project: Project, schedule_for_cleanup):
name = str(uuid.uuid4())

project = Project(name, a=1, b=2)
Expand All @@ -498,7 +570,7 @@ def test_create_or_update_project(syn, project, schedule_for_cleanup):
pytest.raises(Exception, syn.store, project, createOrUpdate=False)


def test_download_file_false(syn, project, schedule_for_cleanup):
def test_download_file_false(syn: Synapse, project: Project, schedule_for_cleanup):
RENAME_SUFFIX = "blah"

# Upload a file
Expand All @@ -523,7 +595,7 @@ def test_download_file_false(syn, project, schedule_for_cleanup):
assert reupload.name == file.name


def test_download_file_URL_false(syn, project, schedule_for_cleanup):
def test_download_file_URL_false(syn: Synapse, project: Project):
# Upload an external file handle
fileThatExists = "http://dev-versions.synapse.sagebase.org/synapsePythonClient"
reupload = File(fileThatExists, synapseStore=False, parent=project)
Expand Down Expand Up @@ -554,7 +626,9 @@ def test_download_file_URL_false(syn, project, schedule_for_cleanup):


# SYNPY-366
def test_download_local_file_URL_path(syn, project, schedule_for_cleanup):
def test_download_local_file_URL_path(
syn: Synapse, project: Project, schedule_for_cleanup
):
path = utils.make_bogus_data_file()
schedule_for_cleanup(path)

Expand All @@ -566,7 +640,9 @@ def test_download_local_file_URL_path(syn, project, schedule_for_cleanup):


# SYNPY-424
def test_store_file_handle_update_metadata(syn, project, schedule_for_cleanup):
def test_store_file_handle_update_metadata(
syn: Synapse, project: Project, schedule_for_cleanup
):
original_file_path = utils.make_bogus_data_file()
schedule_for_cleanup(original_file_path)

Expand All @@ -593,7 +669,7 @@ def test_store_file_handle_update_metadata(syn, project, schedule_for_cleanup):
assert [os.path.basename(replacement_file_path)] == new_entity.files


def test_store_DockerRepository(syn, project, schedule_for_cleanup):
def test_store_DockerRepository(syn: Synapse, project: Project):
repo_name = "some/repository/path"
docker_repo = syn.store(DockerRepository(repo_name, parent=project))
assert isinstance(docker_repo, DockerRepository)
Expand All @@ -602,7 +678,7 @@ def test_store_DockerRepository(syn, project, schedule_for_cleanup):


def test_store__changing_externalURL_by_changing_path(
syn, project, schedule_for_cleanup
syn: Synapse, project: Project, schedule_for_cleanup
):
url = "https://www.synapse.org/Portal/clear.cache.gif"
ext = syn.store(File(url, name="test", parent=project, synapseStore=False))
Expand All @@ -628,7 +704,7 @@ def test_store__changing_externalURL_by_changing_path(


def test_store__changing_from_Synapse_to_externalURL_by_changing_path(
syn, project, schedule_for_cleanup
syn: Synapse, project: Project, schedule_for_cleanup
):
# create a temp file
temp_path = utils.make_bogus_data_file()
Expand Down

0 comments on commit 69e6600

Please sign in to comment.