From 350a5de4ea257275b6b1f3275c9779da7f958ca9 Mon Sep 17 00:00:00 2001 From: Hallie Swan <26949006+hallieswan@users.noreply.github.com> Date: Wed, 29 Jan 2025 10:04:18 -0800 Subject: [PATCH 1/6] AG-1640: create dataversion collection --- README.md | 14 +++++++------- data-manifest.json | 6 +++--- import-data.sh | 7 ++++--- 3 files changed, 14 insertions(+), 13 deletions(-) diff --git a/README.md b/README.md index 0821942..f468879 100644 --- a/README.md +++ b/README.md @@ -14,17 +14,17 @@ self-service update. # Workflow To deploy an updated data version to the Agora development database -1. Increment `data-version` in `data-manifest.json` on the `develop` branch. +1. Increment `data_version` in `data-manifest.json` on the `develop` branch. 2. Commit the change 3. The Github action CI system automatically updates the dev DB To deploy an updated data version to the Agora staging database: -1. Merge the data-version update from the dev branch to the staging branch. +1. Merge the data_version update from the dev branch to the staging branch. 2. The Github action CI system automatically updates the dev DB To deploy an updated data version to the Agora production database: -1. Merge the data-version update from the staging branch to the production branch. +1. Merge the data_version update from the staging branch to the production branch. 2. The Github action CI system automatically updates the dev DB @@ -55,12 +55,12 @@ Context specific secrets for each environment that corresponds to a git branch ( ## Self hosted runners -[agora2-infra] repository deploys a bastian host in AWS for each environment which have access to +[agora-infra-v3] repository deploys a bastian host in AWS for each environment which have access to the databases. We manually configure a [Github self-hosted runner](https://docs.github.com/en/actions/hosting-your-own-runners) for each bastian host, a label is applied to each runner to match the corresponding git branch name (develop/staging/prod). Each runner corresponds to an environment which corresponds to a git branch. The update is executed from these runners. When a push happens on a branch (i.e. develop), the update -is executed on the `agora-bastian-develop` runner which in turn updates the development database. +is executed on the `agora-bastion-develop` runner which in turn updates the development database. ![alt text][self_hosted_runners] @@ -68,7 +68,7 @@ is executed on the `agora-bastian-develop` runner which in turn updates the deve ### Setup self hosted runners -Github self hosted runners are deployed with a [Sceptre template config file])(https://github.com/Sage-Bionetworks/agora2-infra/blob/main/config/agoradev/develop/agora-bastian.yaml). +Github self hosted runners are deployed with [Cloudformation](https://github.com/Sage-Bionetworks-IT/agora-infra-v3/blob/dev/src/bastion_stack.py). Self Hosted Runner setup: * Deploy the template to the Agora AWS account. @@ -121,5 +121,5 @@ Enter name of work folder: [press Enter for _work] [db_update]: agora-db-update.drawio.png "update diagram" [github_secrets]: github_secrets.png "github secrets screen" [self_hosted_runners]: self-hosted-runners.png "self hosted runners" -[agora2-infra]: https://github.com/Sage-Bionetworks/agora2-infra "agora2-infra repository" +[agora-infra-v3]: https://github.com/Sage-Bionetworks-IT/agora-infra-v3 "agora-infra-v3 repository" [Github self-hosted runners]: https://docs.github.com/en/actions/hosting-your-own-runners/managing-self-hosted-runners/about-self-hosted-runners#about-self-hosted-runners diff --git a/data-manifest.json b/data-manifest.json index 312e106..eda92a9 100644 --- a/data-manifest.json +++ b/data-manifest.json @@ -1,5 +1,5 @@ { - "data-version": "71", - "data-manifest-id": "syn13363290", - "team-images-id": "syn12861877" + "data_version": "71", + "data_manifest_id": "syn13363290", + "team_images_id": "syn12861877" } diff --git a/import-data.sh b/import-data.sh index 7a88cba..efd2a26 100755 --- a/import-data.sh +++ b/import-data.sh @@ -19,9 +19,9 @@ TEAM_IMAGES_DIR=$DATA_DIR/team_images mkdir -p $TEAM_IMAGES_DIR # Version key/value should be on his own line -DATA_VERSION=$(cat $WORKING_DIR/data-manifest.json | grep data-version | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') -DATA_MANIFEST_ID=$(cat $WORKING_DIR/data-manifest.json | grep data-manifest-id | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') -TEAM_IMAGES_ID=$(cat $WORKING_DIR/data-manifest.json | grep team-images-id | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') +DATA_VERSION=$(cat $WORKING_DIR/data-manifest.json | grep data_version | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') +DATA_MANIFEST_ID=$(cat $WORKING_DIR/data-manifest.json | grep data_manifest_id | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') +TEAM_IMAGES_ID=$(cat $WORKING_DIR/data-manifest.json | grep team_images_id | head -1 | awk -F: '{ print $2 }' | sed 's/[",]//g' | tr -d '[[:space:]]') echo "$BRANCH branch, DATA_VERSION = $DATA_VERSION, manifest id = $DATA_MANIFEST_ID" # Download the manifest file from synapse @@ -63,6 +63,7 @@ mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabas mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection proteomicssrm --jsonArray --drop --file $DATA_DIR/proteomics_srm.json mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection genesbiodomains --jsonArray --drop --file $DATA_DIR/genes_biodomains.json mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection biodomaininfo --jsonArray --drop --file $DATA_DIR/biodomain_info.json +mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection dataversion --jsonArray --drop --file $WORKING_DIR/data-manifest.json mongosh --host $DB_HOST -u $DB_USER -p $DB_PASS --authenticationDatabase admin $WORKING_DIR/create-indexes.js From 6fdfece3eaaf1d5a35b9e574768b228b1509309b Mon Sep 17 00:00:00 2001 From: Hallie Swan <26949006+hallieswan@users.noreply.github.com> Date: Fri, 31 Jan 2025 09:04:02 -0800 Subject: [PATCH 2/6] AG-1640: generate dataversion if file does not exist in data release --- import-data.sh | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/import-data.sh b/import-data.sh index efd2a26..3cc2a81 100755 --- a/import-data.sh +++ b/import-data.sh @@ -44,6 +44,12 @@ ls -al $WORKING_DIR ls -al $DATA_DIR ls -al $TEAM_IMAGES_DIR +# Check if dataversion exists +DATAVERSION_PATH="${DATA_DIR}/dataversion.json" +if [ ! -f "${DATAVERSION_PATH}" ]; then + DATAVERSION_PATH="${WORKING_DIR}/data-manifest.json" +fi + # Import synapse data to database # Not using --mode upsert for now because we don't have unique indexes properly set for the collections @@ -63,7 +69,7 @@ mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabas mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection proteomicssrm --jsonArray --drop --file $DATA_DIR/proteomics_srm.json mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection genesbiodomains --jsonArray --drop --file $DATA_DIR/genes_biodomains.json mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection biodomaininfo --jsonArray --drop --file $DATA_DIR/biodomain_info.json -mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection dataversion --jsonArray --drop --file $WORKING_DIR/data-manifest.json +mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection dataversion --jsonArray --drop --file $DATAVERSION_PATH mongosh --host $DB_HOST -u $DB_USER -p $DB_PASS --authenticationDatabase admin $WORKING_DIR/create-indexes.js From 27547a5693d070b40ae87af333138f1209d691ca Mon Sep 17 00:00:00 2001 From: Hallie Swan <26949006+hallieswan@users.noreply.github.com> Date: Fri, 31 Jan 2025 09:07:50 -0800 Subject: [PATCH 3/6] AG-1640: update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f468879..b479897 100644 --- a/README.md +++ b/README.md @@ -60,7 +60,7 @@ the databases. We manually configure a [Github self-hosted runner](https://docs for each bastian host, a label is applied to each runner to match the corresponding git branch name (develop/staging/prod). Each runner corresponds to an environment which corresponds to a git branch. The update is executed from these runners. When a push happens on a branch (i.e. develop), the update -is executed on the `agora-bastion-develop` runner which in turn updates the development database. +is executed on the self-hosted runner with the `develop` label, which in turn updates the development database. ![alt text][self_hosted_runners] From ead7cef9fe4dd0868f98936ade677f7a57a87eae Mon Sep 17 00:00:00 2001 From: Hallie Swan <26949006+hallieswan@users.noreply.github.com> Date: Fri, 31 Jan 2025 09:14:43 -0800 Subject: [PATCH 4/6] AG-1640: fix precommit --- import-data.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/import-data.sh b/import-data.sh index 3cc2a81..4bc2eb1 100755 --- a/import-data.sh +++ b/import-data.sh @@ -48,7 +48,7 @@ ls -al $TEAM_IMAGES_DIR DATAVERSION_PATH="${DATA_DIR}/dataversion.json" if [ ! -f "${DATAVERSION_PATH}" ]; then DATAVERSION_PATH="${WORKING_DIR}/data-manifest.json" -fi +fi # Import synapse data to database # Not using --mode upsert for now because we don't have unique indexes properly set for the collections From 7982324e6b8e84338e67f3a4929ee15871decb89 Mon Sep 17 00:00:00 2001 From: Hallie Swan <26949006+hallieswan@users.noreply.github.com> Date: Fri, 31 Jan 2025 09:55:02 -0800 Subject: [PATCH 5/6] AG-1640: improvement from review --- import-data.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/import-data.sh b/import-data.sh index 4bc2eb1..63d1b07 100755 --- a/import-data.sh +++ b/import-data.sh @@ -69,6 +69,8 @@ mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabas mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection proteomicssrm --jsonArray --drop --file $DATA_DIR/proteomics_srm.json mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection genesbiodomains --jsonArray --drop --file $DATA_DIR/genes_biodomains.json mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection biodomaininfo --jsonArray --drop --file $DATA_DIR/biodomain_info.json + +echo "Importing dataversion from ${DATAVERSION_PATH}" mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection dataversion --jsonArray --drop --file $DATAVERSION_PATH mongosh --host $DB_HOST -u $DB_USER -p $DB_PASS --authenticationDatabase admin $WORKING_DIR/create-indexes.js From 5b7a7c4829b2d310c4b2af4771c2305f0b987347 Mon Sep 17 00:00:00 2001 From: Hallie Swan <26949006+hallieswan@users.noreply.github.com> Date: Fri, 31 Jan 2025 13:58:11 -0800 Subject: [PATCH 6/6] AG-1640: handle different data format for manifest vs ADT file --- import-data.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/import-data.sh b/import-data.sh index 63d1b07..c650137 100755 --- a/import-data.sh +++ b/import-data.sh @@ -44,10 +44,12 @@ ls -al $WORKING_DIR ls -al $DATA_DIR ls -al $TEAM_IMAGES_DIR -# Check if dataversion exists +# Check if dataversion exists and handle different data format DATAVERSION_PATH="${DATA_DIR}/dataversion.json" +DATAVERSION_FLAG="--jsonArray" if [ ! -f "${DATAVERSION_PATH}" ]; then DATAVERSION_PATH="${WORKING_DIR}/data-manifest.json" + DATAVERSION_FLAG="" fi # Import synapse data to database @@ -71,7 +73,7 @@ mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabas mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection biodomaininfo --jsonArray --drop --file $DATA_DIR/biodomain_info.json echo "Importing dataversion from ${DATAVERSION_PATH}" -mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection dataversion --jsonArray --drop --file $DATAVERSION_PATH +mongoimport -h $DB_HOST -d agora -u $DB_USER -p $DB_PASS --authenticationDatabase admin --collection dataversion $DATAVERSION_FLAG --drop --file $DATAVERSION_PATH mongosh --host $DB_HOST -u $DB_USER -p $DB_PASS --authenticationDatabase admin $WORKING_DIR/create-indexes.js