From 194503281aeeab84f3f585138ce04ee49d001d68 Mon Sep 17 00:00:00 2001 From: Hallie Swan <26949006+hallieswan@users.noreply.github.com> Date: Fri, 14 Jun 2024 14:59:33 -0700 Subject: [PATCH] AG-1498: support use of containerized data image in local development [skip ci] --- .env.example | 5 +++++ .gitignore | 4 ++++ README.md | 42 +++++++++++++++++++++++++++++++++------ docker/README.md | 4 ++-- docker/docker-compose.yml | 1 + package.json | 8 +++++++- 6 files changed, 55 insertions(+), 9 deletions(-) create mode 100644 .env.example diff --git a/.env.example b/.env.example new file mode 100644 index 00000000..6f395adf --- /dev/null +++ b/.env.example @@ -0,0 +1,5 @@ +DB_USER="root" +DB_PASS="${DB_PASS}" +DB_PORT="27015" +DB_NAME="agora" +DATA_IMAGE_PATH="ghcr.io/sage-bionetworks/agora-data:${DATA_IMAGE_TAG}" diff --git a/.gitignore b/.gitignore index dc4ac05b..38f0ccaf 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,7 @@ testem.log # System files .DS_Store Thumbs.db + +# Environment file +.env + diff --git a/README.md b/README.md index e867757c..1ff11469 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,11 @@ cd Agora npm install ``` -### 2 - Create database +The next sections focus on setting up a mongo database loaded with Agora's data. There are two options: + - Use a local mongo database that you manually create and populate with data (steps 2-5) + - Use a containerized mongo database that is pre-populated with data (step 6) + +### 2 - Create local database You will need to create a MongoDB database and name it `agora`. @@ -53,7 +57,7 @@ npm run mongo:start npm run mongo:start:windows ``` -### 3 - Populate database +### 3 - Populate local database Agora's data is stored in json files in the [Agora Synapse project](https://www.synapse.org/#!Synapse:syn11850457/files/), in the following subfolders: * [Agora Live Data](https://www.synapse.org/#!Synapse:syn12177492) - This folder contains all production data releases, as well as data releases that were never released to production @@ -148,14 +152,14 @@ npm run mongo:create:indexes You'll need `Linux` to run the previous scripts. If you need to do this in `Windows`, you can get any `Linux` distribution at the `Windows Store` (e.g. `Ubuntu`). -### 4 - Build +### 4 - Build using local database ```bash # Build the server and app npm run dev ``` -### 5 - Start +### 5 - Start using local database ```bash # Start the server and app @@ -164,6 +168,32 @@ npm run start Go to [http://localhost:8080](http://localhost:8080) +### 6 - Use containerized database + +1. Install Docker, if necessary. +2. Update `data-file` and `data-version` in `package.json` to reflect the desired data release version, if necessary. +3. Create an environment file: `npm run create-env`. +4. Start the containerized database: `npm run docker:db:start`. The necessary images will be pulled from GHCR. If you would like to use a different image, update DATA_IMAGE_PATH in .env. If the desired image does not exist, see steps below to create the desired image. +5. Run the server and app against the containerized database: `npm run docker:dev`. +6. Stop the containerized database: `npm run docker:db:stop`. + +#### Creating an image for a new data release + +A "data release" is defined in the `package.json` by the `data-file` and `data-version` values. Images pre-loaded with data from the data release are created when the `e2e.yml` GitHub Action workflow runs and are pushed to the GitHub Container Registry (GHCR) package for that namespace -- the `sage-bionetworks` organization namespace when the workflow runs in the base repo or in the user's namespace (e.g. `hallieswan`) when running in a forked repo. + +The `sage-bionetworks` package will contain images for data releases that have been specified in `package.json` on `develop` or `main`. The user's package will contain images for data releases that have been specified in `package.json` in branches pushed to their fork. + +If a dev needs to create an image for a data release that does not yet exist in the Sage-Bionetworks package, they should follow these steps: + +1. Create a new branch. +2. Update the `package.json` to reflect the appropriate `data-file` and `data-version` files. +3. If necessary, update `./scripts/collections.csv` to specify new collections and `./scripts/mongo-create-Indexes.js` to specify new indexes. +4. Commit the changes. +5. Push the changes to your remote fork to trigger a run of the `e2e.yml` workflow. +6. The new image will be available in your user namespaced GHCR package, e.g. `https://github.com/hallieswan/Agora/pkgs/container/agora-data`. +7. Update your local `.env` file so `DATA_IMAGE_PATH` points to the newly created image, e.g. `ghcr.io/hallieswan/agora-data:syn13363290.68`. +8. Start the containerized database: `npm run docker:db:start`. + # Development ```bash @@ -183,7 +213,7 @@ npm run test npm run test:watch # Run end-to-end tests (requires build) -npm run test:e2e +npm run e2e ``` # Deployment @@ -206,7 +236,7 @@ npm run test npm run build # Run end-to-end tests -npm run test:e2e +npm run e2e # Go to localhost:8080 and verify the app is running without errors npm run start diff --git a/docker/README.md b/docker/README.md index b832dd85..2ef4cf2b 100644 --- a/docker/README.md +++ b/docker/README.md @@ -2,8 +2,8 @@ Files in this directory: -- `Dockerfile`: creates the `agora/data` docker image, which contains data for a particular Agora data release (manifest synId + version) and team images and will seed a mongodb at start up using `import-data.sh` -- `docker-compose.yml`: spins up `mongo` and `agora/data` docker containers +- `Dockerfile`: creates the `agora-data` docker image, which contains data for a particular Agora data release (manifest synId + version) and team images and will seed a mongodb at start up using `import-data.sh` +- `docker-compose.yml`: spins up `mongo` and `agora-data` docker containers ## Workflow Setup diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml index a1c1afc2..0a7aae29 100644 --- a/docker/docker-compose.yml +++ b/docker/docker-compose.yml @@ -11,6 +11,7 @@ services: - MONGO_INITDB_DATABASE=${DB_NAME} mongo-seed: image: ${DATA_IMAGE_PATH} + platform: linux/amd64 depends_on: - mongodb environment: diff --git a/package.json b/package.json index a92ebd8d..bee07c33 100644 --- a/package.json +++ b/package.json @@ -40,7 +40,13 @@ "data:local": "npm run clean:data && sh ./scripts/get-data-local.sh", "e2e": "playwright test --trace on", "e2e:ui": "playwright test --ui", - "e2e:update": "npm install -D @playwright/test@latest; npx playwright install --with-deps; npx playwright --version" + "e2e:update": "npm install -D @playwright/test@latest; npx playwright install --with-deps; npx playwright --version", + "create-env": "DB_PASS=$(openssl rand -base64 12) DATA_IMAGE_TAG=$(npm run docker:data-image-tag -s) envsubst < .env.example > .env", + "docker:data-image-tag": "jq -r '.\"data-file\" + \".\" + .\"data-version\"' package.json", + "docker:db:seed-complete": "echo 'Waiting for docker db to finish loading data (~30s)....'; DATA_CONTAINER=$(docker compose --env-file .env -f ./docker/docker-compose.yml ps -a --format '{{.Name}}' mongo-seed); docker wait ${DATA_CONTAINER}", + "docker:db:start": "docker-compose --env-file .env -f ./docker/docker-compose.yml up -d; npm run docker:db:seed-complete", + "docker:db:stop": "docker-compose --env-file .env -f ./docker/docker-compose.yml down", + "docker:dev": "set -a; source .env; set +a; MONGODB_HOST=\"localhost\" MONGODB_PORT=\"${DB_PORT}\" APP_ENV=\"e2e\" npm run dev" }, "pre-commit": [ "test"