Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYNPY-1316] Fix for cache item names matching #998

Merged
merged 2 commits into from
Oct 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 20 additions & 2 deletions synapseclient/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -1035,7 +1035,11 @@ def _getWithEntityBundle(self, entityBundle, entity=None, **kwargs):
if downloadFile:
if file_handle:
self._download_file_entity(
downloadLocation, entity, ifcollision, submission
downloadLocation,
entity,
ifcollision,
submission,
file_handle.get("contentMd5", None),
)
else: # no filehandle means that we do not have DOWNLOAD permission
warning_message = (
Expand All @@ -1062,7 +1066,12 @@ def _ensure_download_location_is_directory(self, downloadLocation):
return download_dir

def _download_file_entity(
self, downloadLocation: str, entity: Entity, ifcollision: str, submission: str
self,
downloadLocation: str,
entity: Entity,
ifcollision: str,
submission: str,
expected_md5: typing.Union[str, None],
):
# set the initial local state
entity.path = None
Expand All @@ -1074,6 +1083,15 @@ def _download_file_entity(
# downloaded the file
cached_file_path = self.cache.get(entity.dataFileHandleId, downloadLocation)

# This is to handle for cases where the names of the files in the cache and the
# requested file name match - But there is a difference in content.
if (
expected_md5 is not None
and cached_file_path is not None
and utils.md5_for_file(cached_file_path).hexdigest() != expected_md5
):
cached_file_path = None

# location in .synapseCache where the file would be corresponding to its FileHandleId
synapseCache_location = self.cache.get_cache_dir(entity.dataFileHandleId)

Expand Down
112 changes: 94 additions & 18 deletions tests/integration/synapseclient/integration_test_Entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,24 @@
import pytest
from unittest.mock import patch

from pytest_mock import MockerFixture


from synapseclient.core.upload.upload_functions import create_external_file_handle
from synapseclient import Activity, DockerRepository, File, Folder, Link, Project
from synapseclient import (
Activity,
DockerRepository,
File,
Folder,
Link,
Project,
Synapse,
)
from synapseclient.core.exceptions import SynapseError, SynapseHTTPError
import synapseclient.core.utils as utils


def test_Entity(syn, project, schedule_for_cleanup):
def test_Entity(syn: Synapse, project: Project, schedule_for_cleanup):
# Update the project
project_name = str(uuid.uuid4())
project = Project(name=project_name)
Expand Down Expand Up @@ -152,7 +163,7 @@ def test_Entity(syn, project, schedule_for_cleanup):
assert os.path.basename(a_file_cached.path) == os.path.basename(a_file.path)


def test_special_characters(syn, project, schedule_for_cleanup):
def test_special_characters(syn: Synapse, project: Project):
folder = syn.store(
Folder(
"Special Characters Here",
Expand All @@ -174,7 +185,7 @@ def test_special_characters(syn, project, schedule_for_cleanup):
assert folder.russian_annotation[0] == "Обезьяна прикладом"


def test_get_local_file(syn, project, schedule_for_cleanup):
def test_get_local_file(syn: Synapse, project: Project, schedule_for_cleanup):
new_path = utils.make_bogus_data_file()
schedule_for_cleanup(new_path)
folder = Folder(
Expand Down Expand Up @@ -204,7 +215,7 @@ def test_get_local_file(syn, project, schedule_for_cleanup):
pytest.raises(SynapseError, syn.get, new_path, limitSearch="syn1")


def test_store_with_flags(syn, project, schedule_for_cleanup):
def test_store_with_flags(syn: Synapse, project: Project, schedule_for_cleanup):
# -- CreateOrUpdate flag for Projects --
# If we store a project with the same name, it should become an update
projUpdate = Project(project.name)
Expand Down Expand Up @@ -287,11 +298,14 @@ def test_store_with_flags(syn, project, schedule_for_cleanup):
assert ephemeralBogus.shoe_size == [11.5]


def test_get_with_downloadLocation_and_ifcollision(syn, project, schedule_for_cleanup):
def test_get_with_downloadLocation_and_ifcollision(
syn: Synapse, project: Project, schedule_for_cleanup
):
# Store a File and delete it locally
filepath = utils.make_bogus_binary_file()
bogus = File(filepath, name="Bogus Test File", parent=project)
bogus = syn.store(bogus)
schedule_for_cleanup(bogus)
os.remove(filepath)

# Compare stuff to this one
Expand Down Expand Up @@ -339,7 +353,65 @@ def test_get_with_downloadLocation_and_ifcollision(syn, project, schedule_for_cl
os.remove(renamedBogus.path)


def test_store_activity(syn, project, schedule_for_cleanup):
def test_get_with_cache_hit_and_miss_with_ifcollision(
syn: Synapse, project: Project, schedule_for_cleanup, mocker: MockerFixture
):
download_file_function = mocker.spy(obj=syn, name="_downloadFileHandle")
# GIVEN a File that is stored in Synapse - and removed from the local machine
filepath = utils.make_bogus_binary_file()
original_file_md5 = utils.md5_for_file(filepath).hexdigest()
bogus_file = File(
filepath, name="a_name_that_will_show_up_in_cache", parent=project
)
bogus_file = syn.store(bogus_file)
schedule_for_cleanup(bogus_file)
os.remove(filepath)

# WHEN I get the File from synapse
unmodified_file_from_server = syn.get(
bogus_file,
downloadLocation=os.path.dirname(filepath),
ifcollision="overwrite.local",
)

# THEN I expect the file to have been downloaded and match the original files MD5
assert download_file_function.call_count == 1
assert unmodified_file_from_server._file_handle["contentMd5"] == original_file_md5
assert utils.md5_for_file(filepath).hexdigest() == original_file_md5

# AND I expect the file to be in the cache and match the original files MD5
copy_of_file_from_server = syn.get(
bogus_file,
downloadLocation=os.path.dirname(filepath),
ifcollision="overwrite.local",
)
assert copy_of_file_from_server.id == unmodified_file_from_server.id
assert filecmp.cmp(copy_of_file_from_server.path, unmodified_file_from_server.path)
assert download_file_function.call_count == 1
assert copy_of_file_from_server._file_handle["contentMd5"] == original_file_md5
assert utils.md5_for_file(filepath).hexdigest() == original_file_md5

# GIVEN another bogus file with the same name and non matching MD5 data
new_bogus_file = utils.make_bogus_binary_file()
os.rename(new_bogus_file, filepath)
assert utils.md5_for_file(filepath).hexdigest() != original_file_md5

# WHEN I get the File from synapse
copy_of_file_from_server = syn.get(
bogus_file,
downloadLocation=os.path.dirname(filepath),
ifcollision="overwrite.local",
)

# THEN I expect the file to have been downloaded and overwrite the file on my machine with the same name
assert copy_of_file_from_server._file_handle["contentMd5"] == original_file_md5
assert utils.md5_for_file(filepath).hexdigest() == original_file_md5
assert download_file_function.call_count == 2

os.remove(filepath)


def test_store_activity(syn: Synapse, project: Project, schedule_for_cleanup):
# Create a File and an Activity
path = utils.make_bogus_binary_file()
schedule_for_cleanup(path)
Expand Down Expand Up @@ -388,7 +460,7 @@ def test_store_activity(syn, project, schedule_for_cleanup):
assert honking["id"] == honking2["id"]


def test_store_isRestricted_flag(syn, project, schedule_for_cleanup):
def test_store_isRestricted_flag(syn: Synapse, project: Project, schedule_for_cleanup):
# Store a file with access requirements
path = utils.make_bogus_binary_file()
schedule_for_cleanup(path)
Expand All @@ -402,7 +474,7 @@ def test_store_isRestricted_flag(syn, project, schedule_for_cleanup):
assert intercepted.called


def test_ExternalFileHandle(syn, project, schedule_for_cleanup):
def test_ExternalFileHandle(syn: Synapse, project: Project):
# Tests shouldn't have external dependencies, but this is a pretty picture of Singapore
singapore_url = "http://upload.wikimedia.org/wikipedia/commons/thumb/3/3e/1_singapore_city_skyline_dusk_panorama_2011.jpg/1280px-1_singapore_city_skyline_dusk_panorama_2011.jpg" # noqa
singapore = File(singapore_url, parent=project, synapseStore=False)
Expand Down Expand Up @@ -432,7 +504,7 @@ def test_ExternalFileHandle(syn, project, schedule_for_cleanup):
assert s2.externalURL == singapore_2_url


def test_synapseStore_flag(syn, project, schedule_for_cleanup):
def test_synapseStore_flag(syn: Synapse, project: Project, schedule_for_cleanup):
# Store a path to a local file
path = utils.make_bogus_data_file()
schedule_for_cleanup(path)
Expand Down Expand Up @@ -474,7 +546,7 @@ def test_synapseStore_flag(syn, project, schedule_for_cleanup):
assert not bogus.synapseStore


def test_create_or_update_project(syn, project, schedule_for_cleanup):
def test_create_or_update_project(syn: Synapse, project: Project, schedule_for_cleanup):
name = str(uuid.uuid4())

project = Project(name, a=1, b=2)
Expand All @@ -498,7 +570,7 @@ def test_create_or_update_project(syn, project, schedule_for_cleanup):
pytest.raises(Exception, syn.store, project, createOrUpdate=False)


def test_download_file_false(syn, project, schedule_for_cleanup):
def test_download_file_false(syn: Synapse, project: Project, schedule_for_cleanup):
RENAME_SUFFIX = "blah"

# Upload a file
Expand All @@ -523,7 +595,7 @@ def test_download_file_false(syn, project, schedule_for_cleanup):
assert reupload.name == file.name


def test_download_file_URL_false(syn, project, schedule_for_cleanup):
def test_download_file_URL_false(syn: Synapse, project: Project):
# Upload an external file handle
fileThatExists = "http://dev-versions.synapse.sagebase.org/synapsePythonClient"
reupload = File(fileThatExists, synapseStore=False, parent=project)
Expand Down Expand Up @@ -554,7 +626,9 @@ def test_download_file_URL_false(syn, project, schedule_for_cleanup):


# SYNPY-366
def test_download_local_file_URL_path(syn, project, schedule_for_cleanup):
def test_download_local_file_URL_path(
syn: Synapse, project: Project, schedule_for_cleanup
):
path = utils.make_bogus_data_file()
schedule_for_cleanup(path)

Expand All @@ -566,7 +640,9 @@ def test_download_local_file_URL_path(syn, project, schedule_for_cleanup):


# SYNPY-424
def test_store_file_handle_update_metadata(syn, project, schedule_for_cleanup):
def test_store_file_handle_update_metadata(
syn: Synapse, project: Project, schedule_for_cleanup
):
original_file_path = utils.make_bogus_data_file()
schedule_for_cleanup(original_file_path)

Expand All @@ -593,7 +669,7 @@ def test_store_file_handle_update_metadata(syn, project, schedule_for_cleanup):
assert [os.path.basename(replacement_file_path)] == new_entity.files


def test_store_DockerRepository(syn, project, schedule_for_cleanup):
def test_store_DockerRepository(syn: Synapse, project: Project):
repo_name = "some/repository/path"
docker_repo = syn.store(DockerRepository(repo_name, parent=project))
assert isinstance(docker_repo, DockerRepository)
Expand All @@ -602,7 +678,7 @@ def test_store_DockerRepository(syn, project, schedule_for_cleanup):


def test_store__changing_externalURL_by_changing_path(
syn, project, schedule_for_cleanup
syn: Synapse, project: Project, schedule_for_cleanup
):
url = "https://www.synapse.org/Portal/clear.cache.gif"
ext = syn.store(File(url, name="test", parent=project, synapseStore=False))
Expand All @@ -628,7 +704,7 @@ def test_store__changing_externalURL_by_changing_path(


def test_store__changing_from_Synapse_to_externalURL_by_changing_path(
syn, project, schedule_for_cleanup
syn: Synapse, project: Project, schedule_for_cleanup
):
# create a temp file
temp_path = utils.make_bogus_data_file()
Expand Down
19 changes: 10 additions & 9 deletions tests/unit/synapseclient/core/unit_test_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def mock_generate_headers(self, headers=None):
return {}


def test_mock_download(syn):
def test_mock_download(syn: Synapse):
temp_dir = tempfile.gettempdir()

fileHandleId = "42"
Expand Down Expand Up @@ -388,7 +388,7 @@ def test_mock_download(syn):

class Test__downloadFileHandle(unittest.TestCase):
@pytest.fixture(autouse=True, scope="function")
def init_syn(self, syn):
def init_syn(self, syn: Synapse):
self.syn = syn

def tearDown(self) -> None:
Expand Down Expand Up @@ -506,7 +506,7 @@ def test_multithread_false__S3_fileHandle(self):

class Test_download_from_url_multi_threaded:
@pytest.fixture(autouse=True, scope="function")
def init_syn(self, syn):
def init_syn(self, syn: Synapse):
self.syn = syn

def test_md5_mismatch(self):
Expand Down Expand Up @@ -564,7 +564,7 @@ def test_md5_match(self):
)


def test_download_end_early_retry(syn):
def test_download_end_early_retry(syn: Synapse):
"""
-------Test to ensure download retry even if connection ends early--------
"""
Expand Down Expand Up @@ -645,7 +645,7 @@ def test_download_end_early_retry(syn):
mocked_move.assert_called_once_with(temp_destination, destination)


def test_download_md5_mismatch__not_local_file(syn):
def test_download_md5_mismatch__not_local_file(syn: Synapse):
"""
--------Test to ensure file gets removed on md5 mismatch--------
"""
Expand Down Expand Up @@ -712,7 +712,7 @@ def test_download_md5_mismatch__not_local_file(syn):
mocked_remove.assert_called_once_with(destination)


def test_download_md5_mismatch_local_file(syn):
def test_download_md5_mismatch_local_file(syn: Synapse):
"""
--------Test to ensure file gets removed on md5 mismatch--------
"""
Expand Down Expand Up @@ -743,7 +743,7 @@ def test_download_md5_mismatch_local_file(syn):
assert not mocked_remove.called


def test_download_file_entity__correct_local_state(syn):
def test_download_file_entity__correct_local_state(syn: Synapse):
mock_cache_path = utils.normalize_path("/i/will/show/you/the/path/yi.txt")
file_entity = File(parentId="syn123")
file_entity.dataFileHandleId = 123
Expand All @@ -753,14 +753,15 @@ def test_download_file_entity__correct_local_state(syn):
entity=file_entity,
ifcollision="overwrite.local",
submission=None,
expected_md5=None,
)
assert mock_cache_path == utils.normalize_path(file_entity.path)
assert os.path.dirname(mock_cache_path) == file_entity.cacheDir
assert 1 == len(file_entity.files)
assert os.path.basename(mock_cache_path) == file_entity.files[0]


def test_getFileHandleDownload__error_UNAUTHORIZED(syn):
def test_getFileHandleDownload__error_UNAUTHORIZED(syn: Synapse):
ret_val = {
"requestedFiles": [
{
Expand All @@ -772,7 +773,7 @@ def test_getFileHandleDownload__error_UNAUTHORIZED(syn):
pytest.raises(SynapseError, syn._getFileHandleDownload, "123", "syn456")


def test_getFileHandleDownload__error_NOT_FOUND(syn):
def test_getFileHandleDownload__error_NOT_FOUND(syn: Synapse):
ret_val = {
"requestedFiles": [
{
Expand Down