From 55e1df3c8509d2916076bd51b0debb8f872e7a7d Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 27 Nov 2023 09:52:27 -0700 Subject: [PATCH 1/3] Adding note about unique .db file --- docs/articles/data_storage.rst | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/docs/articles/data_storage.rst b/docs/articles/data_storage.rst index 81709b7fb..177249ea7 100644 --- a/docs/articles/data_storage.rst +++ b/docs/articles/data_storage.rst @@ -138,6 +138,10 @@ Once the storage location is known, the first step to migrate the project or fol of its contents using the `index_files_for_migration `__ function, e.g. +When specifying the `.db` file for the migratable indexes you need to specify a `.db` file that does not already exist +for another synapse project or folder on disk. It is the best practice to specify a unique name for the file by including +the synapse id in the name of the file, or other unique identifier. + .. code-block:: import synapseutils @@ -149,7 +153,7 @@ of its contents using the # nothing needs to exist at this path, but it must be a valid path on a volume with sufficient # disk space to store a meta data listing of all the contents in the indexed entity. # a rough rule of thumb is 100kB per 1000 entities indexed. - db_path = '/tmp/foo/bar.db' + db_path = '/tmp/foo/syn123_bar.db' result = synapseutils.index_files_for_migration( syn, @@ -207,7 +211,7 @@ Putting all the migration pieces together # # nothing needs to exist at this path, but it must be a valid path on a volume with sufficient # # disk space to store a meta data listing of all the contents in the indexed entity. # # a rough rule of thumb is 100kB per 1000 entities indexed. - db_path = os.path.expanduser("~/synapseMigration/my.db") + db_path = os.path.expanduser(f"~/synapseMigration/{my_synapse_project_or_folder_to_migrate}_my.db") syn = synapseclient.Synapse() @@ -253,7 +257,7 @@ Putting all the migration pieces together The result of running this should look like .. code-block:: - Indexing: syn123 for migration to storage_id: 11111 at: /home/user/synapseMigration/my.db + Indexing: syn123 for migration to storage_id: 11111 at: /home/user/synapseMigration/syn123_my.db Indexing result: {'INDEXED': 100, 'MIGRATED': 0, 'ALREADY_MIGRATED': 0, 'ERRORED': 0} Migrating files... Migration result: {'INDEXED': 0, 'MIGRATED': 100, 'ALREADY_MIGRATED': 0, 'ERRORED': 0} From 56d66b870e64dbb3a5948592fe964fc5843fdff5 Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 27 Nov 2023 10:04:17 -0700 Subject: [PATCH 2/3] Update script to include sending an email on migration finish --- docs/articles/data_storage.rst | 125 +++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 54 deletions(-) diff --git a/docs/articles/data_storage.rst b/docs/articles/data_storage.rst index 177249ea7..14fb6bf28 100644 --- a/docs/articles/data_storage.rst +++ b/docs/articles/data_storage.rst @@ -198,61 +198,78 @@ Putting all the migration pieces together ----------------------------------------- .. code-block:: - import os - import synapseutils - import synapseclient - - my_synapse_project_or_folder_to_migrate = "syn123" - - external_bucket_name = "my-external-synapse-bucket" - external_bucket_base_key = "path/within/bucket/" - - # # a path on disk where this utility can create a sqlite database to store its index. - # # nothing needs to exist at this path, but it must be a valid path on a volume with sufficient - # # disk space to store a meta data listing of all the contents in the indexed entity. - # # a rough rule of thumb is 100kB per 1000 entities indexed. - db_path = os.path.expanduser(f"~/synapseMigration/{my_synapse_project_or_folder_to_migrate}_my.db") - - syn = synapseclient.Synapse() - - # # Log-in with ~.synapseConfig `authToken` - syn.login() - - # The project or folder I want to migrate everything to this S3 storage location - project_or_folder = syn.get(my_synapse_project_or_folder_to_migrate) - - project_or_folder, storage_location, project_setting = syn.create_s3_storage_location( - # Despite the KW argument name, this can be a project or folder - folder=project_or_folder, - bucket_name=external_bucket_name, - base_key=external_bucket_base_key, - ) - - # The id of the destination storage location being migrated to - storage_location_id = storage_location["storageLocationId"] - print( - f"Indexing: {project_or_folder.id} for migration to storage_id: {storage_location_id} at: {db_path}" - ) - - result = synapseutils.index_files_for_migration( - syn, - project_or_folder.id, - storage_location_id, - db_path, - file_version_strategy="all", - ) - - print(f"Indexing result: {result.get_counts_by_status()}") - - print("Migrating files...") - - result = synapseutils.migrate_indexed_files( - syn, - db_path, - force=True, - ) + import os + import synapseutils + import synapseclient + + my_synapse_project_or_folder_to_migrate = "syn123" + + external_bucket_name = "my-external-synapse-bucket" + external_bucket_base_key = "path/within/bucket/" + + my_user_id = "1234" + + # # a path on disk where this utility can create a sqlite database to store its index. + # # nothing needs to exist at this path, but it must be a valid path on a volume with sufficient + # # disk space to store a meta data listing of all the contents in the indexed entity. + # # a rough rule of thumb is 100kB per 1000 entities indexed. + db_path = os.path.expanduser( + f"~/synapseMigration/{my_synapse_project_or_folder_to_migrate}_my.db" + ) + + syn = synapseclient.Synapse() + + # # Log-in with ~.synapseConfig `authToken` + syn.login() + + # The project or folder I want to migrate everything to this S3 storage location + project_or_folder = syn.get(my_synapse_project_or_folder_to_migrate) + + project_or_folder, storage_location, project_setting = syn.create_s3_storage_location( + # Despite the KW argument name, this can be a project or folder + folder=project_or_folder, + bucket_name=external_bucket_name, + base_key=external_bucket_base_key, + ) + + # The id of the destination storage location being migrated to + storage_location_id = storage_location["storageLocationId"] + print( + f"Indexing: {project_or_folder.id} for migration to storage_id: {storage_location_id} at: {db_path}" + ) + + try: + result = synapseutils.index_files_for_migration( + syn, + project_or_folder.id, + storage_location_id, + db_path, + file_version_strategy="all", + ) + + print(f"Indexing result: {result.get_counts_by_status()}") + + print("Migrating files...") + + result = synapseutils.migrate_indexed_files( + syn, + db_path, + force=True, + ) + + print(f"Migration result: {result.get_counts_by_status()}") + syn.sendMessage( + userIds=[my_user_id], + messageSubject=f"Migration success for {project_or_folder.id}", + messageBody=f"Migration result: {result.get_counts_by_status()}", + ) + except Exception as e: + syn.sendMessage( + userIds=[my_user_id], + messageSubject=f"Migration failed for {project_or_folder.id}", + messageBody=f"Migration failed with error: {e}", + ) - print(f"Migration result: {result.get_counts_by_status()}") The result of running this should look like .. code-block:: From af1f51661fd79b6b69c28556c4e61ae9a9dd57aa Mon Sep 17 00:00:00 2001 From: BryanFauble <17128019+BryanFauble@users.noreply.github.com> Date: Mon, 27 Nov 2023 10:29:02 -0700 Subject: [PATCH 3/3] Run pre-commit --- docs/articles/data_storage.rst | 144 ++++++++++++++++----------------- 1 file changed, 72 insertions(+), 72 deletions(-) diff --git a/docs/articles/data_storage.rst b/docs/articles/data_storage.rst index 14fb6bf28..ff7a2cace 100644 --- a/docs/articles/data_storage.rst +++ b/docs/articles/data_storage.rst @@ -138,7 +138,7 @@ Once the storage location is known, the first step to migrate the project or fol of its contents using the `index_files_for_migration `__ function, e.g. -When specifying the `.db` file for the migratable indexes you need to specify a `.db` file that does not already exist +When specifying the `.db` file for the migratable indexes you need to specify a `.db` file that does not already exist for another synapse project or folder on disk. It is the best practice to specify a unique name for the file by including the synapse id in the name of the file, or other unique identifier. @@ -198,77 +198,77 @@ Putting all the migration pieces together ----------------------------------------- .. code-block:: - import os - import synapseutils - import synapseclient - - my_synapse_project_or_folder_to_migrate = "syn123" - - external_bucket_name = "my-external-synapse-bucket" - external_bucket_base_key = "path/within/bucket/" - - my_user_id = "1234" - - # # a path on disk where this utility can create a sqlite database to store its index. - # # nothing needs to exist at this path, but it must be a valid path on a volume with sufficient - # # disk space to store a meta data listing of all the contents in the indexed entity. - # # a rough rule of thumb is 100kB per 1000 entities indexed. - db_path = os.path.expanduser( - f"~/synapseMigration/{my_synapse_project_or_folder_to_migrate}_my.db" - ) - - syn = synapseclient.Synapse() - - # # Log-in with ~.synapseConfig `authToken` - syn.login() - - # The project or folder I want to migrate everything to this S3 storage location - project_or_folder = syn.get(my_synapse_project_or_folder_to_migrate) - - project_or_folder, storage_location, project_setting = syn.create_s3_storage_location( - # Despite the KW argument name, this can be a project or folder - folder=project_or_folder, - bucket_name=external_bucket_name, - base_key=external_bucket_base_key, - ) - - # The id of the destination storage location being migrated to - storage_location_id = storage_location["storageLocationId"] - print( - f"Indexing: {project_or_folder.id} for migration to storage_id: {storage_location_id} at: {db_path}" - ) - - try: - result = synapseutils.index_files_for_migration( - syn, - project_or_folder.id, - storage_location_id, - db_path, - file_version_strategy="all", - ) - - print(f"Indexing result: {result.get_counts_by_status()}") - - print("Migrating files...") - - result = synapseutils.migrate_indexed_files( - syn, - db_path, - force=True, - ) - - print(f"Migration result: {result.get_counts_by_status()}") - syn.sendMessage( - userIds=[my_user_id], - messageSubject=f"Migration success for {project_or_folder.id}", - messageBody=f"Migration result: {result.get_counts_by_status()}", - ) - except Exception as e: - syn.sendMessage( - userIds=[my_user_id], - messageSubject=f"Migration failed for {project_or_folder.id}", - messageBody=f"Migration failed with error: {e}", - ) + import os + import synapseutils + import synapseclient + + my_synapse_project_or_folder_to_migrate = "syn123" + + external_bucket_name = "my-external-synapse-bucket" + external_bucket_base_key = "path/within/bucket/" + + my_user_id = "1234" + + # # a path on disk where this utility can create a sqlite database to store its index. + # # nothing needs to exist at this path, but it must be a valid path on a volume with sufficient + # # disk space to store a meta data listing of all the contents in the indexed entity. + # # a rough rule of thumb is 100kB per 1000 entities indexed. + db_path = os.path.expanduser( + f"~/synapseMigration/{my_synapse_project_or_folder_to_migrate}_my.db" + ) + + syn = synapseclient.Synapse() + + # # Log-in with ~.synapseConfig `authToken` + syn.login() + + # The project or folder I want to migrate everything to this S3 storage location + project_or_folder = syn.get(my_synapse_project_or_folder_to_migrate) + + project_or_folder, storage_location, project_setting = syn.create_s3_storage_location( + # Despite the KW argument name, this can be a project or folder + folder=project_or_folder, + bucket_name=external_bucket_name, + base_key=external_bucket_base_key, + ) + + # The id of the destination storage location being migrated to + storage_location_id = storage_location["storageLocationId"] + print( + f"Indexing: {project_or_folder.id} for migration to storage_id: {storage_location_id} at: {db_path}" + ) + + try: + result = synapseutils.index_files_for_migration( + syn, + project_or_folder.id, + storage_location_id, + db_path, + file_version_strategy="all", + ) + + print(f"Indexing result: {result.get_counts_by_status()}") + + print("Migrating files...") + + result = synapseutils.migrate_indexed_files( + syn, + db_path, + force=True, + ) + + print(f"Migration result: {result.get_counts_by_status()}") + syn.sendMessage( + userIds=[my_user_id], + messageSubject=f"Migration success for {project_or_folder.id}", + messageBody=f"Migration result: {result.get_counts_by_status()}", + ) + except Exception as e: + syn.sendMessage( + userIds=[my_user_id], + messageSubject=f"Migration failed for {project_or_folder.id}", + messageBody=f"Migration failed with error: {e}", + ) The result of running this should look like