From 69e6600c3d065b3a96bfffe6b357e7b1b7d73b5e Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 30 Oct 2023 11:56:44 -0700 Subject: [PATCH] Fix for cache item names matching --- synapseclient/client.py | 22 +++- .../synapseclient/integration_test_Entity.py | 112 +++++++++++++++--- 2 files changed, 114 insertions(+), 20 deletions(-) diff --git a/synapseclient/client.py b/synapseclient/client.py index 69ccaafe1..a7a57a7cd 100644 --- a/synapseclient/client.py +++ b/synapseclient/client.py @@ -1035,7 +1035,11 @@ def _getWithEntityBundle(self, entityBundle, entity=None, **kwargs): if downloadFile: if file_handle: self._download_file_entity( - downloadLocation, entity, ifcollision, submission + downloadLocation, + entity, + ifcollision, + submission, + file_handle.get("contentMd5", None), ) else: # no filehandle means that we do not have DOWNLOAD permission warning_message = ( @@ -1062,7 +1066,12 @@ def _ensure_download_location_is_directory(self, downloadLocation): return download_dir def _download_file_entity( - self, downloadLocation: str, entity: Entity, ifcollision: str, submission: str + self, + downloadLocation: str, + entity: Entity, + ifcollision: str, + submission: str, + expected_md5: typing.Union[str, None], ): # set the initial local state entity.path = None @@ -1074,6 +1083,15 @@ def _download_file_entity( # downloaded the file cached_file_path = self.cache.get(entity.dataFileHandleId, downloadLocation) + # This is to handle for cases where the names of the files in the cache and the + # requested file name match - But there is a difference in content. + if ( + expected_md5 is not None + and cached_file_path is not None + and utils.md5_for_file(cached_file_path).hexdigest() != expected_md5 + ): + cached_file_path = None + # location in .synapseCache where the file would be corresponding to its FileHandleId synapseCache_location = self.cache.get_cache_dir(entity.dataFileHandleId) diff --git a/tests/integration/synapseclient/integration_test_Entity.py b/tests/integration/synapseclient/integration_test_Entity.py index 261a99616..1367f9f6e 100644 --- a/tests/integration/synapseclient/integration_test_Entity.py +++ b/tests/integration/synapseclient/integration_test_Entity.py @@ -7,13 +7,24 @@ import pytest from unittest.mock import patch +from pytest_mock import MockerFixture + + from synapseclient.core.upload.upload_functions import create_external_file_handle -from synapseclient import Activity, DockerRepository, File, Folder, Link, Project +from synapseclient import ( + Activity, + DockerRepository, + File, + Folder, + Link, + Project, + Synapse, +) from synapseclient.core.exceptions import SynapseError, SynapseHTTPError import synapseclient.core.utils as utils -def test_Entity(syn, project, schedule_for_cleanup): +def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup): # Update the project project_name = str(uuid.uuid4()) project = Project(name=project_name) @@ -152,7 +163,7 @@ def test_Entity(syn, project, schedule_for_cleanup): assert os.path.basename(a_file_cached.path) == os.path.basename(a_file.path) -def test_special_characters(syn, project, schedule_for_cleanup): +def test_special_characters(syn: Synapse, project: Project): folder = syn.store( Folder( "Special Characters Here", @@ -174,7 +185,7 @@ def test_special_characters(syn, project, schedule_for_cleanup): assert folder.russian_annotation[0] == "Обезьяна прикладом" -def test_get_local_file(syn, project, schedule_for_cleanup): +def test_get_local_file(syn: Synapse, project: Project, schedule_for_cleanup): new_path = utils.make_bogus_data_file() schedule_for_cleanup(new_path) folder = Folder( @@ -204,7 +215,7 @@ def test_get_local_file(syn, project, schedule_for_cleanup): pytest.raises(SynapseError, syn.get, new_path, limitSearch="syn1") -def test_store_with_flags(syn, project, schedule_for_cleanup): +def test_store_with_flags(syn: Synapse, project: Project, schedule_for_cleanup): # -- CreateOrUpdate flag for Projects -- # If we store a project with the same name, it should become an update projUpdate = Project(project.name) @@ -287,11 +298,14 @@ def test_store_with_flags(syn, project, schedule_for_cleanup): assert ephemeralBogus.shoe_size == [11.5] -def test_get_with_downloadLocation_and_ifcollision(syn, project, schedule_for_cleanup): +def test_get_with_downloadLocation_and_ifcollision( + syn: Synapse, project: Project, schedule_for_cleanup +): # Store a File and delete it locally filepath = utils.make_bogus_binary_file() bogus = File(filepath, name="Bogus Test File", parent=project) bogus = syn.store(bogus) + schedule_for_cleanup(bogus) os.remove(filepath) # Compare stuff to this one @@ -339,7 +353,65 @@ def test_get_with_downloadLocation_and_ifcollision(syn, project, schedule_for_cl os.remove(renamedBogus.path) -def test_store_activity(syn, project, schedule_for_cleanup): +def test_get_with_cache_hit_and_miss_with_ifcollision( + syn: Synapse, project: Project, schedule_for_cleanup, mocker: MockerFixture +): + download_file_function = mocker.spy(obj=syn, name="_downloadFileHandle") + # GIVEN a File that is stored in Synapse - and removed from the local machine + filepath = utils.make_bogus_binary_file() + original_file_md5 = utils.md5_for_file(filepath).hexdigest() + bogus_file = File( + filepath, name="a_name_that_will_show_up_in_cache", parent=project + ) + bogus_file = syn.store(bogus_file) + schedule_for_cleanup(bogus_file) + os.remove(filepath) + + # WHEN I get the File from synapse + unmodified_file_from_server = syn.get( + bogus_file, + downloadLocation=os.path.dirname(filepath), + ifcollision="overwrite.local", + ) + + # THEN I expect the file to have been downloaded and match the original files MD5 + assert download_file_function.call_count == 1 + assert unmodified_file_from_server._file_handle["contentMd5"] == original_file_md5 + assert utils.md5_for_file(filepath).hexdigest() == original_file_md5 + + # AND I expect the file to be in the cache and match the original files MD5 + copy_of_file_from_server = syn.get( + bogus_file, + downloadLocation=os.path.dirname(filepath), + ifcollision="overwrite.local", + ) + assert copy_of_file_from_server.id == unmodified_file_from_server.id + assert filecmp.cmp(copy_of_file_from_server.path, unmodified_file_from_server.path) + assert download_file_function.call_count == 1 + assert copy_of_file_from_server._file_handle["contentMd5"] == original_file_md5 + assert utils.md5_for_file(filepath).hexdigest() == original_file_md5 + + # GIVEN another bogus file with the same name and non matching MD5 data + new_bogus_file = utils.make_bogus_binary_file() + os.rename(new_bogus_file, filepath) + assert utils.md5_for_file(filepath).hexdigest() != original_file_md5 + + # WHEN I get the File from synapse + copy_of_file_from_server = syn.get( + bogus_file, + downloadLocation=os.path.dirname(filepath), + ifcollision="overwrite.local", + ) + + # THEN I expect the file to have been downloaded and overwrite the file on my machine with the same name + assert copy_of_file_from_server._file_handle["contentMd5"] == original_file_md5 + assert utils.md5_for_file(filepath).hexdigest() == original_file_md5 + assert download_file_function.call_count == 2 + + os.remove(filepath) + + +def test_store_activity(syn: Synapse, project: Project, schedule_for_cleanup): # Create a File and an Activity path = utils.make_bogus_binary_file() schedule_for_cleanup(path) @@ -388,7 +460,7 @@ def test_store_activity(syn, project, schedule_for_cleanup): assert honking["id"] == honking2["id"] -def test_store_isRestricted_flag(syn, project, schedule_for_cleanup): +def test_store_isRestricted_flag(syn: Synapse, project: Project, schedule_for_cleanup): # Store a file with access requirements path = utils.make_bogus_binary_file() schedule_for_cleanup(path) @@ -402,7 +474,7 @@ def test_store_isRestricted_flag(syn, project, schedule_for_cleanup): assert intercepted.called -def test_ExternalFileHandle(syn, project, schedule_for_cleanup): +def test_ExternalFileHandle(syn: Synapse, project: Project): # Tests shouldn't have external dependencies, but this is a pretty picture of Singapore singapore_url = "http://upload.wikimedia.org/wikipedia/commons/thumb/3/3e/1_singapore_city_skyline_dusk_panorama_2011.jpg/1280px-1_singapore_city_skyline_dusk_panorama_2011.jpg" # noqa singapore = File(singapore_url, parent=project, synapseStore=False) @@ -432,7 +504,7 @@ def test_ExternalFileHandle(syn, project, schedule_for_cleanup): assert s2.externalURL == singapore_2_url -def test_synapseStore_flag(syn, project, schedule_for_cleanup): +def test_synapseStore_flag(syn: Synapse, project: Project, schedule_for_cleanup): # Store a path to a local file path = utils.make_bogus_data_file() schedule_for_cleanup(path) @@ -474,7 +546,7 @@ def test_synapseStore_flag(syn, project, schedule_for_cleanup): assert not bogus.synapseStore -def test_create_or_update_project(syn, project, schedule_for_cleanup): +def test_create_or_update_project(syn: Synapse, project: Project, schedule_for_cleanup): name = str(uuid.uuid4()) project = Project(name, a=1, b=2) @@ -498,7 +570,7 @@ def test_create_or_update_project(syn, project, schedule_for_cleanup): pytest.raises(Exception, syn.store, project, createOrUpdate=False) -def test_download_file_false(syn, project, schedule_for_cleanup): +def test_download_file_false(syn: Synapse, project: Project, schedule_for_cleanup): RENAME_SUFFIX = "blah" # Upload a file @@ -523,7 +595,7 @@ def test_download_file_false(syn, project, schedule_for_cleanup): assert reupload.name == file.name -def test_download_file_URL_false(syn, project, schedule_for_cleanup): +def test_download_file_URL_false(syn: Synapse, project: Project): # Upload an external file handle fileThatExists = "http://dev-versions.synapse.sagebase.org/synapsePythonClient" reupload = File(fileThatExists, synapseStore=False, parent=project) @@ -554,7 +626,9 @@ def test_download_file_URL_false(syn, project, schedule_for_cleanup): # SYNPY-366 -def test_download_local_file_URL_path(syn, project, schedule_for_cleanup): +def test_download_local_file_URL_path( + syn: Synapse, project: Project, schedule_for_cleanup +): path = utils.make_bogus_data_file() schedule_for_cleanup(path) @@ -566,7 +640,9 @@ def test_download_local_file_URL_path(syn, project, schedule_for_cleanup): # SYNPY-424 -def test_store_file_handle_update_metadata(syn, project, schedule_for_cleanup): +def test_store_file_handle_update_metadata( + syn: Synapse, project: Project, schedule_for_cleanup +): original_file_path = utils.make_bogus_data_file() schedule_for_cleanup(original_file_path) @@ -593,7 +669,7 @@ def test_store_file_handle_update_metadata(syn, project, schedule_for_cleanup): assert [os.path.basename(replacement_file_path)] == new_entity.files -def test_store_DockerRepository(syn, project, schedule_for_cleanup): +def test_store_DockerRepository(syn: Synapse, project: Project): repo_name = "some/repository/path" docker_repo = syn.store(DockerRepository(repo_name, parent=project)) assert isinstance(docker_repo, DockerRepository) @@ -602,7 +678,7 @@ def test_store_DockerRepository(syn, project, schedule_for_cleanup): def test_store__changing_externalURL_by_changing_path( - syn, project, schedule_for_cleanup + syn: Synapse, project: Project, schedule_for_cleanup ): url = "https://www.synapse.org/Portal/clear.cache.gif" ext = syn.store(File(url, name="test", parent=project, synapseStore=False)) @@ -628,7 +704,7 @@ def test_store__changing_externalURL_by_changing_path( def test_store__changing_from_Synapse_to_externalURL_by_changing_path( - syn, project, schedule_for_cleanup + syn: Synapse, project: Project, schedule_for_cleanup ): # create a temp file temp_path = utils.make_bogus_data_file()