From 194503281aeeab84f3f585138ce04ee49d001d68 Mon Sep 17 00:00:00 2001
From: Hallie Swan <26949006+hallieswan@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:59:33 -0700
Subject: [PATCH] AG-1498: support use of containerized data image in local
 development [skip ci]

---
 .env.example              |  5 +++++
 .gitignore                |  4 ++++
 README.md                 | 42 +++++++++++++++++++++++++++++++++------
 docker/README.md          |  4 ++--
 docker/docker-compose.yml |  1 +
 package.json              |  8 +++++++-
 6 files changed, 55 insertions(+), 9 deletions(-)
 create mode 100644 .env.example

diff --git a/.env.example b/.env.example
new file mode 100644
index 00000000..6f395adf
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,5 @@
+DB_USER="root"
+DB_PASS="${DB_PASS}"
+DB_PORT="27015"
+DB_NAME="agora"
+DATA_IMAGE_PATH="ghcr.io/sage-bionetworks/agora-data:${DATA_IMAGE_TAG}"
diff --git a/.gitignore b/.gitignore
index dc4ac05b..38f0ccaf 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,3 +48,7 @@ testem.log
 # System files
 .DS_Store
 Thumbs.db
+
+# Environment file
+.env
+
diff --git a/README.md b/README.md
index e867757c..1ff11469 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,11 @@ cd Agora
 npm install
 ```
 
-### 2 - Create database
+The next sections focus on setting up a mongo database loaded with Agora's data. There are two options: 
+ - Use a local mongo database that you manually create and populate with data (steps 2-5) 
+ - Use a containerized mongo database that is pre-populated with data (step 6)
+
+### 2 - Create local database
 
 You will need to create a MongoDB database and name it `agora`. 
 
@@ -53,7 +57,7 @@ npm run mongo:start
 npm run mongo:start:windows
 ```
 
-### 3 - Populate database
+### 3 - Populate local database
 
 Agora's data is stored in json files in the [Agora Synapse project](https://www.synapse.org/#!Synapse:syn11850457/files/), in the following subfolders:
 * [Agora Live Data](https://www.synapse.org/#!Synapse:syn12177492) - This folder contains all production data releases, as well as data releases that were never released to production
@@ -148,14 +152,14 @@ npm run mongo:create:indexes
 
 You'll need `Linux` to run the previous scripts. If you need to do this in `Windows`, you can get any `Linux` distribution at the `Windows Store` (e.g. `Ubuntu`).
 
-### 4 - Build
+### 4 - Build using local database
 
 ```bash
 # Build the server and app
 npm run dev
 ```
 
-### 5 - Start
+### 5 - Start using local database
 
 ```bash
 # Start the server and app
@@ -164,6 +168,32 @@ npm run start
 
 Go to [http://localhost:8080](http://localhost:8080)
 
+### 6 - Use containerized database
+
+1. Install Docker, if necessary.
+2. Update `data-file` and `data-version` in `package.json` to reflect the desired data release version, if necessary.
+3. Create an environment file: `npm run create-env`. 
+4. Start the containerized database: `npm run docker:db:start`. The necessary images will be pulled from GHCR. If you would like to use a different image, update DATA_IMAGE_PATH in .env. If the desired image does not exist, see steps below to create the desired image.
+5. Run the server and app against the containerized database: `npm run docker:dev`.
+6. Stop the containerized database: `npm run docker:db:stop`.
+
+#### Creating an image for a new data release
+
+A "data release" is defined in the `package.json` by the `data-file` and `data-version` values. Images pre-loaded with data from the data release are created when the `e2e.yml` GitHub Action workflow runs and are pushed to the GitHub Container Registry (GHCR) package for that namespace -- the `sage-bionetworks` organization namespace when the workflow runs in the base repo or in the user's namespace (e.g. `hallieswan`) when running in a forked repo.
+
+The `sage-bionetworks` package will contain images for data releases that have been specified in `package.json` on `develop` or `main`. The user's package will contain images for data releases that have been specified in `package.json` in branches pushed to their fork. 
+
+If a dev needs to create an image for a data release that does not yet exist in the Sage-Bionetworks package, they should follow these steps: 
+
+1. Create a new branch.
+2. Update the `package.json` to reflect the appropriate `data-file` and `data-version` files.
+3. If necessary, update `./scripts/collections.csv` to specify new collections and `./scripts/mongo-create-Indexes.js` to specify new indexes.
+4. Commit the changes.
+5. Push the changes to your remote fork to trigger a run of the `e2e.yml` workflow. 
+6. The new image will be available in your user namespaced GHCR package, e.g. `https://github.com/hallieswan/Agora/pkgs/container/agora-data`.
+7. Update your local `.env` file so `DATA_IMAGE_PATH` points to the newly created image, e.g. `ghcr.io/hallieswan/agora-data:syn13363290.68`.
+8. Start the containerized database: `npm run docker:db:start`.
+
 # Development
 
 ```bash
@@ -183,7 +213,7 @@ npm run test
 npm run test:watch
 
 # Run end-to-end tests (requires build)
-npm run test:e2e
+npm run e2e
 ```
 
 # Deployment
@@ -206,7 +236,7 @@ npm run test
 npm run build
 
 # Run end-to-end tests
-npm run test:e2e
+npm run e2e
 
 # Go to localhost:8080 and verify the app is running without errors
 npm run start
diff --git a/docker/README.md b/docker/README.md
index b832dd85..2ef4cf2b 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -2,8 +2,8 @@
 
 Files in this directory:
 
-- `Dockerfile`: creates the `agora/data` docker image, which contains data for a particular Agora data release (manifest synId + version) and team images and will seed a mongodb at start up using `import-data.sh`
-- `docker-compose.yml`: spins up `mongo` and `agora/data` docker containers
+- `Dockerfile`: creates the `agora-data` docker image, which contains data for a particular Agora data release (manifest synId + version) and team images and will seed a mongodb at start up using `import-data.sh`
+- `docker-compose.yml`: spins up `mongo` and `agora-data` docker containers
 
 ## Workflow Setup
 
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
index a1c1afc2..0a7aae29 100644
--- a/docker/docker-compose.yml
+++ b/docker/docker-compose.yml
@@ -11,6 +11,7 @@ services:
       - MONGO_INITDB_DATABASE=${DB_NAME}
   mongo-seed:
     image: ${DATA_IMAGE_PATH}
+    platform: linux/amd64
     depends_on: 
       - mongodb
     environment:
diff --git a/package.json b/package.json
index a92ebd8d..bee07c33 100644
--- a/package.json
+++ b/package.json
@@ -40,7 +40,13 @@
     "data:local": "npm run clean:data && sh ./scripts/get-data-local.sh",
     "e2e": "playwright test --trace on",
     "e2e:ui": "playwright test --ui",
-    "e2e:update": "npm install -D @playwright/test@latest; npx playwright install --with-deps; npx playwright --version"
+    "e2e:update": "npm install -D @playwright/test@latest; npx playwright install --with-deps; npx playwright --version",
+    "create-env": "DB_PASS=$(openssl rand -base64 12) DATA_IMAGE_TAG=$(npm run docker:data-image-tag -s) envsubst < .env.example > .env",
+    "docker:data-image-tag": "jq -r '.\"data-file\" + \".\" + .\"data-version\"' package.json",
+    "docker:db:seed-complete": "echo 'Waiting for docker db to finish loading data (~30s)....'; DATA_CONTAINER=$(docker compose --env-file .env -f ./docker/docker-compose.yml ps -a --format '{{.Name}}' mongo-seed); docker wait ${DATA_CONTAINER}",
+    "docker:db:start": "docker-compose --env-file .env -f ./docker/docker-compose.yml up -d; npm run docker:db:seed-complete",
+    "docker:db:stop": "docker-compose --env-file .env -f ./docker/docker-compose.yml down",
+    "docker:dev": "set -a; source .env; set +a; MONGODB_HOST=\"localhost\" MONGODB_PORT=\"${DB_PORT}\" APP_ENV=\"e2e\" npm run dev"
   },
   "pre-commit": [
     "test"