Skip to content

Commit

Permalink
train disease - cell, gene, process v2
Browse files Browse the repository at this point in the history
  • Loading branch information
bill-baumgartner committed Aug 30, 2024
1 parent 251bb4a commit a689496
Show file tree
Hide file tree
Showing 12 changed files with 88,712 additions and 13,688 deletions.
274 changes: 137 additions & 137 deletions .github/workflows/bert-models-cicd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -549,159 +549,159 @@ jobs:
run: |
docker push "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION"
# # # # # # ----------------------------------------------------------------------------------- #
# # # # # # ----------------------------------------------------------------------------------- #
# # # # # # ----------------------------------------------------------------------------------- #
# # # # # # TRAIN BL_DISEASE_TO_PHENOTYPE MODEL #
# # # # # # ----------------------------------------------------------------------------------- #
# # # # # # ----------------------------------------------------------------------------------- #
# # # # # # ----------------------------------------------------------------------------------- #
# # # # # # # ----------------------------------------------------------------------------------- #
# # # # # # # ----------------------------------------------------------------------------------- #
# # # # # # # ----------------------------------------------------------------------------------- #
# # # # # # # TRAIN BL_DISEASE_TO_PHENOTYPE MODEL #
# # # # # # # ----------------------------------------------------------------------------------- #
# # # # # # # ----------------------------------------------------------------------------------- #
# # # # # # # ----------------------------------------------------------------------------------- #

build_push_train-bl_disease_to_phenotype-train:
name: "build/push/train bl_disease_to_phenotype model"
# needs: "build_push-bert-base"
runs-on: ubuntu-latest
env:
TASK_NAME: bl_disease_to_phenotype
DOCKERFILE: train.Dockerfile
ENTRYPOINT_FILE: scripts/train.entrypoint.sh
BASE_DOCKERFILE: base.Dockerfile
DATA_FILE: data/bl_disease_to_phenotype/data.tsv
MODEL_VERSION_KEY: BL_DISEASE_TO_PHENOTYPE
AI_PLATFORM_JOB_NAME: "bl_disease_to_phenotype_train_${{ github.run_number }}"
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- id: 'auth'
uses: 'google-github-actions/auth@v2'
with:
project_id: ${{ secrets.GCE_PROJECT }}
service_account: ${{ secrets.SERVICE_ACCOUNT }}
credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }}
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
with:
version: '>= 363.0.0'
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.7
# build_push_train-bl_disease_to_phenotype-train:
# name: "build/push/train bl_disease_to_phenotype model"
# # needs: "build_push-bert-base"
# runs-on: ubuntu-latest
# env:
# TASK_NAME: bl_disease_to_phenotype
# DOCKERFILE: train.Dockerfile
# ENTRYPOINT_FILE: scripts/train.entrypoint.sh
# BASE_DOCKERFILE: base.Dockerfile
# DATA_FILE: data/bl_disease_to_phenotype/data.tsv
# MODEL_VERSION_KEY: BL_DISEASE_TO_PHENOTYPE
# AI_PLATFORM_JOB_NAME: "bl_disease_to_phenotype_train_${{ github.run_number }}"
# steps:
# - uses: actions/checkout@v4
# with:
# fetch-depth: 0
# - id: 'auth'
# uses: 'google-github-actions/auth@v2'
# with:
# project_id: ${{ secrets.GCE_PROJECT }}
# service_account: ${{ secrets.SERVICE_ACCOUNT }}
# credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }}
# - name: 'Set up Cloud SDK'
# uses: 'google-github-actions/setup-gcloud@v2'
# with:
# version: '>= 363.0.0'
# - name: Setup Python
# uses: actions/setup-python@v2
# with:
# python-version: 3.7

- name: Configure Docker Authentication
run: |
gcloud --quiet auth configure-docker
# - name: Configure Docker Authentication
# run: |
# gcloud --quiet auth configure-docker

- name: Set MODEL_VERSION env
run: |
echo "MODEL_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV
# - name: Set MODEL_VERSION env
# run: |
# echo "MODEL_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV

- name: Set BASE_VERSION env
run: |
echo "BASE_VERSION=$(grep 'BASE' MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV
# - name: Set BASE_VERSION env
# run: |
# echo "BASE_VERSION=$(grep 'BASE' MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV

- name: Set NUM_EPOCHS env
run: |
echo "NUM_EPOCHS=5.0" >> $GITHUB_ENV
# - name: Set NUM_EPOCHS env
# run: |
# echo "NUM_EPOCHS=5.0" >> $GITHUB_ENV

- name: Build Docker Image
run: |
docker build --build-arg "PROJECT_ID=$PROJECT_ID" \
--build-arg "TASK_NAME=$TASK_NAME" \
--build-arg "BASE_VERSION=$BASE_VERSION" \
--build-arg "TUNED_MODEL_VERSION=$MODEL_VERSION" \
--build-arg "EPOCHS=$NUM_EPOCHS" \
--tag "gcr.io/$PROJECT_ID/$TASK_NAME-train:$MODEL_VERSION" \
-f ${{ env.DOCKERFILE }} .
# - name: Build Docker Image
# run: |
# docker build --build-arg "PROJECT_ID=$PROJECT_ID" \
# --build-arg "TASK_NAME=$TASK_NAME" \
# --build-arg "BASE_VERSION=$BASE_VERSION" \
# --build-arg "TUNED_MODEL_VERSION=$MODEL_VERSION" \
# --build-arg "EPOCHS=$NUM_EPOCHS" \
# --tag "gcr.io/$PROJECT_ID/$TASK_NAME-train:$MODEL_VERSION" \
# -f ${{ env.DOCKERFILE }} .

- name: Publish Docker Image to Google Container Registry
run: |
docker push "gcr.io/$PROJECT_ID/$TASK_NAME-train:$MODEL_VERSION"
# - name: Publish Docker Image to Google Container Registry
# run: |
# docker push "gcr.io/$PROJECT_ID/$TASK_NAME-train:$MODEL_VERSION"

- name: Submit a training job to AI Plaform to train and cache the model
run: |
gcloud ai-platform jobs submit training ${{ env.AI_PLATFORM_JOB_NAME }} \
--scale-tier=CUSTOM \
--master-machine-type=n1-standard-8 \
--master-accelerator=count=1,type=nvidia-tesla-v100 \
--region "$GCE_REGION" \
--master-image-uri "gcr.io/$PROJECT_ID/$TASK_NAME-train:$MODEL_VERSION" \
-- \
NO_ARG \
"gs://$MODEL_STORAGE_BUCKET"
# - name: Submit a training job to AI Plaform to train and cache the model
# run: |
# gcloud ai-platform jobs submit training ${{ env.AI_PLATFORM_JOB_NAME }} \
# --scale-tier=CUSTOM \
# --master-machine-type=n1-standard-8 \
# --master-accelerator=count=1,type=nvidia-tesla-v100 \
# --region "$GCE_REGION" \
# --master-image-uri "gcr.io/$PROJECT_ID/$TASK_NAME-train:$MODEL_VERSION" \
# -- \
# NO_ARG \
# "gs://$MODEL_STORAGE_BUCKET"

- name: Monitor the training job
run: |
sh -c ".github/workflows/monitor-ai-platform-job.sh ${{ env.AI_PLATFORM_JOB_NAME }}"
timeout-minutes: 500
# - name: Monitor the training job
# run: |
# sh -c ".github/workflows/monitor-ai-platform-job.sh ${{ env.AI_PLATFORM_JOB_NAME }}"
# timeout-minutes: 500

# # -------------------------------------------------------------------------------------- #
# # BUILD/PUSH BL_DISEASE_TO_PHENOTYPE CLASSIFICATION CONTAINER IMAGE #
# # -------------------------------------------------------------------------------------- #
# # # -------------------------------------------------------------------------------------- #
# # # BUILD/PUSH BL_DISEASE_TO_PHENOTYPE CLASSIFICATION CONTAINER IMAGE #
# # # -------------------------------------------------------------------------------------- #

build_push-bl_disease_to_phenotype-predict:
name: "build/push bl_disease_to_phenotype predict container"
needs: "build_push_train-bl_disease_to_phenotype-train"
runs-on: ubuntu-latest
env:
TASK_NAME: bl_disease_to_phenotype
DOCKERFILE: predict.Dockerfile
TRAIN_DOCKERFILE: train.Dockerfile
TRAIN_ENTRYPOINT_FILE: scripts/train.entrypoint.sh
ENTRYPOINT_FILE: scripts/predict.entrypoint.sh
BASE_DOCKERFILE: base.Dockerfile
MODEL_VERSION_KEY: BL_DISEASE_TO_PHENOTYPE
# Don't forget to change labels
CLASSIFICATION_LABELS: "has_phenotype other"
DATA_FILE: data/bl_disease_to_phenotype/data.tsv
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- id: 'auth'
uses: 'google-github-actions/auth@v2'
with:
project_id: ${{ secrets.GCE_PROJECT }}
service_account: ${{ secrets.SERVICE_ACCOUNT }}
credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }}
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
with:
version: '>= 363.0.0'
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Configure Docker Authentication
run: |
gcloud --quiet auth configure-docker
# build_push-bl_disease_to_phenotype-predict:
# name: "build/push bl_disease_to_phenotype predict container"
# needs: "build_push_train-bl_disease_to_phenotype-train"
# runs-on: ubuntu-latest
# env:
# TASK_NAME: bl_disease_to_phenotype
# DOCKERFILE: predict.Dockerfile
# TRAIN_DOCKERFILE: train.Dockerfile
# TRAIN_ENTRYPOINT_FILE: scripts/train.entrypoint.sh
# ENTRYPOINT_FILE: scripts/predict.entrypoint.sh
# BASE_DOCKERFILE: base.Dockerfile
# MODEL_VERSION_KEY: BL_DISEASE_TO_PHENOTYPE
# # Don't forget to change labels
# CLASSIFICATION_LABELS: "has_phenotype other"
# DATA_FILE: data/bl_disease_to_phenotype/data.tsv
# steps:
# - uses: actions/checkout@v4
# with:
# fetch-depth: 0
# - id: 'auth'
# uses: 'google-github-actions/auth@v2'
# with:
# project_id: ${{ secrets.GCE_PROJECT }}
# service_account: ${{ secrets.SERVICE_ACCOUNT }}
# credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }}
# - name: 'Set up Cloud SDK'
# uses: 'google-github-actions/setup-gcloud@v2'
# with:
# version: '>= 363.0.0'
# - name: Setup Python
# uses: actions/setup-python@v2
# with:
# python-version: 3.7
# - name: Configure Docker Authentication
# run: |
# gcloud --quiet auth configure-docker

- name: Set MODEL_VERSION env
run: |
echo "MODEL_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV
# - name: Set MODEL_VERSION env
# run: |
# echo "MODEL_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV

- name: Set IMAGE_VERSION env
run: |
echo "IMAGE_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} IMAGE_VERSIONS_PREDICT | cut -f 2 -d '=')" >> $GITHUB_ENV
# - name: Set IMAGE_VERSION env
# run: |
# echo "IMAGE_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} IMAGE_VERSIONS_PREDICT | cut -f 2 -d '=')" >> $GITHUB_ENV

- name: Set BASE_VERSION env
run: |
echo "BASE_VERSION=$(grep 'BASE' MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV
# - name: Set BASE_VERSION env
# run: |
# echo "BASE_VERSION=$(grep 'BASE' MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV

- name: Build Docker Image
run: |
docker build --build-arg "PROJECT_ID=$PROJECT_ID" \
--build-arg "TASK_NAME=$TASK_NAME" \
--build-arg "BASE_VERSION=$BASE_VERSION" \
--build-arg "TUNED_MODEL_VERSION=$MODEL_VERSION" \
--build-arg "MODEL_STORAGE_BUCKET=$MODEL_STORAGE_BUCKET" \
--build-arg "CLASSIFICATION_LABELS=${{ env.CLASSIFICATION_LABELS }}" \
--tag "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION" \
-f ${{ env.DOCKERFILE }} .
# - name: Build Docker Image
# run: |
# docker build --build-arg "PROJECT_ID=$PROJECT_ID" \
# --build-arg "TASK_NAME=$TASK_NAME" \
# --build-arg "BASE_VERSION=$BASE_VERSION" \
# --build-arg "TUNED_MODEL_VERSION=$MODEL_VERSION" \
# --build-arg "MODEL_STORAGE_BUCKET=$MODEL_STORAGE_BUCKET" \
# --build-arg "CLASSIFICATION_LABELS=${{ env.CLASSIFICATION_LABELS }}" \
# --tag "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION" \
# -f ${{ env.DOCKERFILE }} .

- name: Publish Docker Image to Google Container Registry
run: |
docker push "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION"
# - name: Publish Docker Image to Google Container Registry
# run: |
# docker push "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION"

# # # # # # ----------------------------------------------------------------------------------- #
# # # # # # ----------------------------------------------------------------------------------- #
Expand Down
6 changes: 3 additions & 3 deletions IMAGE_VERSIONS_PREDICT
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ BL_PROCESS_TO_ANATOMY=0.1
BL_PROCESS_TO_COMPONENT_CELL=0.1
BL_PROCESS_TO_CELL=0.1
BL_DISEASE_TO_ANATOMY=0.3
BL_DISEASE_TO_CELL=0.3
BL_DISEASE_TO_GENE=0.2
BL_DISEASE_TO_CELL=0.4
BL_DISEASE_TO_GENE=0.3
BL_DISEASE_TO_PHENOTYPE=0.2
BL_DISEASE_TO_PROCESS=0.4
BL_DISEASE_TO_PROCESS=0.5
6 changes: 3 additions & 3 deletions MODEL_VERSIONS
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ BL_PROCESS_TO_ANATOMY=0.1
BL_PROCESS_TO_COMPONENT_CELL=0.1
BL_PROCESS_TO_CELL=0.1
BL_DISEASE_TO_ANATOMY=0.3
BL_DISEASE_TO_CELL=0.3
BL_DISEASE_TO_GENE=0.2
BL_DISEASE_TO_CELL=0.4
BL_DISEASE_TO_GENE=0.3
BL_DISEASE_TO_PHENOTYPE=0.2
BL_DISEASE_TO_PROCESS=0.4
BL_DISEASE_TO_PROCESS=0.5
3 changes: 3 additions & 0 deletions data/bl_disease_to_cell/CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
v0.4
- trained on real sentences judged by GPT (disease_cell.bert.v2.random.tsv)

v0.3
- trained on synthetic sentences created by GPT (disease-has_basis_in-cell.synthetic.bert.v1.random.tsv)
- predicate is now has_basis_in
Expand Down
Loading

0 comments on commit a689496

Please sign in to comment.