Skip to content

Commit

Permalink
build base 0.6 - epochs as a parameter
Browse files Browse the repository at this point in the history
made a parameter for num training epochs
  • Loading branch information
bill-baumgartner committed Apr 28, 2024
1 parent 230e02a commit 4b3a1ef
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 109 deletions.
211 changes: 108 additions & 103 deletions .github/workflows/bert-models-cicd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -50,41 +50,41 @@ jobs:
# Note: when trying to run build_push-bert-base with other jobs below, an error is reported on line 90.
# However, when run on its own it works fine.

# build_push-bert-base:
# name: "build/push bluebert-base container image"
# runs-on: ubuntu-latest
# env:
# IMAGE_NAME: bluebert-base
# DOCKERFILE: base.Dockerfile
# permissions:
# contents: 'read'
# id-token: 'write'
# steps:
# - uses: actions/checkout@v4
# with:
# fetch-depth: 0
# - id: 'auth'
# uses: 'google-github-actions/auth@v2'
# with:
# project_id: ${{ secrets.GCE_PROJECT }}
# service_account: ${{ secrets.SERVICE_ACCOUNT }}
# credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }}
# - name: 'Set up Cloud SDK'
# uses: 'google-github-actions/setup-gcloud@v2'
# with:
# version: '>= 363.0.0'
# - name: Setup Python
# uses: actions/setup-python@v2
# with:
# python-version: 3.7
# - name: Configure Docker Authentication
# run: gcloud --quiet auth configure-docker
# - name: Set MODEL_VERSION env
# run: echo "MODEL_VERSION=$(grep 'BASE' MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV
# - name: Build Docker Image
# run: docker build --tag "gcr.io/$PROJECT_ID/$IMAGE_NAME:$MODEL_VERSION" -f ${{ env.DOCKERFILE }} .
# - name: Publish Docker Image to Google Container Registry
# run: docker push "gcr.io/$PROJECT_ID/$IMAGE_NAME:$MODEL_VERSION"
build_push-bert-base:
name: "build/push bluebert-base container image"
runs-on: ubuntu-latest
env:
IMAGE_NAME: bluebert-base
DOCKERFILE: base.Dockerfile
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- id: 'auth'
uses: 'google-github-actions/auth@v2'
with:
project_id: ${{ secrets.GCE_PROJECT }}
service_account: ${{ secrets.SERVICE_ACCOUNT }}
credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }}
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
with:
version: '>= 363.0.0'
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Configure Docker Authentication
run: gcloud --quiet auth configure-docker
- name: Set MODEL_VERSION env
run: echo "MODEL_VERSION=$(grep 'BASE' MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV
- name: Build Docker Image
run: docker build --tag "gcr.io/$PROJECT_ID/$IMAGE_NAME:$MODEL_VERSION" -f ${{ env.DOCKERFILE }} .
- name: Publish Docker Image to Google Container Registry
run: docker push "gcr.io/$PROJECT_ID/$IMAGE_NAME:$MODEL_VERSION"

# # ----------------------------------------------------------------------------------- #
# # ----------------------------------------------------------------------------------- #
Expand Down Expand Up @@ -335,13 +335,13 @@ jobs:
# docker push "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$MODEL_VERSION"


# # # # ----------------------------------------------------------------------------------- #
# # # # ----------------------------------------------------------------------------------- #
# # # # ----------------------------------------------------------------------------------- #
# # # # TRAIN BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE MODEL #
# # # # ----------------------------------------------------------------------------------- #
# # # # ----------------------------------------------------------------------------------- #
# # # # ----------------------------------------------------------------------------------- #
# # # # # ----------------------------------------------------------------------------------- #
# # # # # ----------------------------------------------------------------------------------- #
# # # # # ----------------------------------------------------------------------------------- #
# # # # # TRAIN BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE MODEL #
# # # # # ----------------------------------------------------------------------------------- #
# # # # # ----------------------------------------------------------------------------------- #
# # # # # ----------------------------------------------------------------------------------- #

# build_push_train-bl_chemical_to_disease_or_phenotypic_feature-train:
# name: "build/push/train bl_chemical_to_disease_or_phenotypic_feature model"
Expand Down Expand Up @@ -386,12 +386,17 @@ jobs:
# run: |
# echo "BASE_VERSION=$(grep 'BASE' MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV

# - name: Set NUM_EPOCHS env
# run: |
# echo "NUM_EPOCHS='5.0'" >> $GITHUB_ENV

# - name: Build Docker Image
# run: |
# docker build --build-arg "PROJECT_ID=$PROJECT_ID" \
# --build-arg "TASK_NAME=$TASK_NAME" \
# --build-arg "BASE_VERSION=$BASE_VERSION" \
# --build-arg "TUNED_MODEL_VERSION=$MODEL_VERSION" \
# --build-arg "EPOCHS=$NUM_EPOCHS" \
# --tag "gcr.io/$PROJECT_ID/$TASK_NAME-train:$MODEL_VERSION" \
# -f ${{ env.DOCKERFILE }} .

Expand All @@ -413,73 +418,73 @@ jobs:
# sh -c ".github/workflows/monitor-ai-platform-job.sh ${{ env.AI_PLATFORM_JOB_NAME }}"
# timeout-minutes: 500

# # -------------------------------------------------------------------------------------- #
# # BUILD/PUSH BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE CLASSIFICATION CONTAINER IMAGE #
# # -------------------------------------------------------------------------------------- #
# # # -------------------------------------------------------------------------------------- #
# # # BUILD/PUSH BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE CLASSIFICATION CONTAINER IMAGE #
# # # -------------------------------------------------------------------------------------- #

build_push-bl_chemical_to_disease_or_phenotypic_feature-predict:
name: "build/push bl_chemical_to_disease_or_phenotypic_feature predict container"
# needs: "build_push_train-bl_chemical_to_disease_or_phenotypic_feature-train"
runs-on: ubuntu-latest
env:
TASK_NAME: bl_chemical_to_disease_or_phenotypic_feature
DOCKERFILE: predict.Dockerfile
TRAIN_DOCKERFILE: train.Dockerfile
TRAIN_ENTRYPOINT_FILE: scripts/train.entrypoint.sh
ENTRYPOINT_FILE: scripts/predict.entrypoint.sh
BASE_DOCKERFILE: base.Dockerfile
MODEL_VERSION_KEY: BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE
#CLASSIFICATION_LABELS: "treats causes_or_contributes_to false"
CLASSIFICATION_LABELS: "associated_with_resistance_to associated_with_sensitivity_to decreases_risk_for exacerbates exposure_is_origin_of has_excessive_amount_in has_no_effect_on increases_risk_for is_biomarker_for is_contraindicated_for is_deficient_in prevents results_in_side_effect treats used_in_diagnosis_of other"
DATA_FILE: data/bl_chemical_to_disease_or_phenotypic_feature/data.tsv
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- id: 'auth'
uses: 'google-github-actions/auth@v2'
with:
project_id: ${{ secrets.GCE_PROJECT }}
service_account: ${{ secrets.SERVICE_ACCOUNT }}
credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }}
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
with:
version: '>= 363.0.0'
- name: Setup Python
uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Configure Docker Authentication
run: |
gcloud --quiet auth configure-docker
# build_push-bl_chemical_to_disease_or_phenotypic_feature-predict:
# name: "build/push bl_chemical_to_disease_or_phenotypic_feature predict container"
# needs: "build_push_train-bl_chemical_to_disease_or_phenotypic_feature-train"
# runs-on: ubuntu-latest
# env:
# TASK_NAME: bl_chemical_to_disease_or_phenotypic_feature
# DOCKERFILE: predict.Dockerfile
# TRAIN_DOCKERFILE: train.Dockerfile
# TRAIN_ENTRYPOINT_FILE: scripts/train.entrypoint.sh
# ENTRYPOINT_FILE: scripts/predict.entrypoint.sh
# BASE_DOCKERFILE: base.Dockerfile
# MODEL_VERSION_KEY: BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE
# #CLASSIFICATION_LABELS: "treats causes_or_contributes_to false"
# CLASSIFICATION_LABELS: "associated_with_resistance_to associated_with_sensitivity_to decreases_risk_for exacerbates exposure_is_origin_of has_excessive_amount_in has_no_effect_on increases_risk_for is_biomarker_for is_contraindicated_for is_deficient_in prevents results_in_side_effect treats used_in_diagnosis_of other"
# DATA_FILE: data/bl_chemical_to_disease_or_phenotypic_feature/data.tsv
# steps:
# - uses: actions/checkout@v4
# with:
# fetch-depth: 0
# - id: 'auth'
# uses: 'google-github-actions/auth@v2'
# with:
# project_id: ${{ secrets.GCE_PROJECT }}
# service_account: ${{ secrets.SERVICE_ACCOUNT }}
# credentials_json: ${{ secrets.SERVICE_ACCOUNT_KEY }}
# - name: 'Set up Cloud SDK'
# uses: 'google-github-actions/setup-gcloud@v2'
# with:
# version: '>= 363.0.0'
# - name: Setup Python
# uses: actions/setup-python@v2
# with:
# python-version: 3.7
# - name: Configure Docker Authentication
# run: |
# gcloud --quiet auth configure-docker

- name: Set MODEL_VERSION env
run: |
echo "MODEL_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV
# - name: Set MODEL_VERSION env
# run: |
# echo "MODEL_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV

- name: Set IMAGE_VERSION env
run: |
echo "IMAGE_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} IMAGE_VERSIONS_PREDICT | cut -f 2 -d '=')" >> $GITHUB_ENV
# - name: Set IMAGE_VERSION env
# run: |
# echo "IMAGE_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} IMAGE_VERSIONS_PREDICT | cut -f 2 -d '=')" >> $GITHUB_ENV

- name: Set BASE_VERSION env
run: |
echo "BASE_VERSION=$(grep 'BASE' MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV
# - name: Set BASE_VERSION env
# run: |
# echo "BASE_VERSION=$(grep 'BASE' MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV

- name: Build Docker Image
run: |
docker build --build-arg "PROJECT_ID=$PROJECT_ID" \
--build-arg "TASK_NAME=$TASK_NAME" \
--build-arg "BASE_VERSION=$BASE_VERSION" \
--build-arg "TUNED_MODEL_VERSION=$MODEL_VERSION" \
--build-arg "MODEL_STORAGE_BUCKET=$MODEL_STORAGE_BUCKET" \
--build-arg "CLASSIFICATION_LABELS=${{ env.CLASSIFICATION_LABELS }}" \
--tag "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION" \
-f ${{ env.DOCKERFILE }} .
# - name: Build Docker Image
# run: |
# docker build --build-arg "PROJECT_ID=$PROJECT_ID" \
# --build-arg "TASK_NAME=$TASK_NAME" \
# --build-arg "BASE_VERSION=$BASE_VERSION" \
# --build-arg "TUNED_MODEL_VERSION=$MODEL_VERSION" \
# --build-arg "MODEL_STORAGE_BUCKET=$MODEL_STORAGE_BUCKET" \
# --build-arg "CLASSIFICATION_LABELS=${{ env.CLASSIFICATION_LABELS }}" \
# --tag "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION" \
# -f ${{ env.DOCKERFILE }} .

- name: Publish Docker Image to Google Container Registry
run: |
docker push "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION"
# - name: Publish Docker Image to Google Container Registry
# run: |
# docker push "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION"



Expand Down
2 changes: 1 addition & 1 deletion IMAGE_VERSIONS_PREDICT
Original file line number Diff line number Diff line change
@@ -1 +1 @@
BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE=0.9
BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE=0.10
4 changes: 2 additions & 2 deletions MODEL_VERSIONS
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
BASE=0.5.2
BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE=0.9
BASE=0.6
BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE=0.10
BL_CHEMICAL_TO_GENE=0.1
BL_DISEASE_TO_PHENOTYPIC_FEATURE=0.1
BL_GENE_REGULATORY_RELATIONSHIP=0.1
Expand Down
7 changes: 5 additions & 2 deletions scripts/train.entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@

TASK_NAME=$1
TUNED_MODEL_VERSION=$2
MODEL_STORAGE_BUCKET=$3
EPOCHS=$3
MODEL_STORAGE_BUCKET=$4


echo "~~~~~~~~TASK_NAME: ${TASK_NAME}"
echo "~~~~~~~~TUNED_MODEL_VERSION: ${TUNED_MODEL_VERSION}"
echo "~~~~~~~~EPOCHS: ${EPOCHS}"
echo "~~~~~~~~MODEL_STORAGE_BUCKET: ${MODEL_STORAGE_BUCKET}"

if [ -z "${MODEL_STORAGE_BUCKET}" ]; then
Expand All @@ -24,7 +27,7 @@ python bluebert/run_bluebert.py \
--vocab_file=$BlueBERT_DIR/vocab.txt \
--bert_config_file=$BlueBERT_DIR/bert_config.json \
--init_checkpoint=$BlueBERT_DIR/bert_model.ckpt \
--num_train_epochs=10.0 \
--num_train_epochs=$EPOCHS \
--data_dir=$DATASET_DIR \
--output_dir=$OUTPUT_DIR \
--do_lower_case=true
Expand Down
6 changes: 5 additions & 1 deletion train.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ ARG TUNED_MODEL_VERSION=latest
# and also must align with the data directory structure, e.g. bl_chemical_to_gene
ARG TASK_NAME=latest

# Number of epochs to use during training
ARG EPOCHS=latest

# Download the base BlueBERT model
WORKDIR /home/dev/models/baseline
RUN wget https://ftp.ncbi.nlm.nih.gov/pub/lu/Suppl/NCBI-BERT/NCBI_BERT_pubmed_uncased_L-12_H-768_A-12.zip && \
Expand Down Expand Up @@ -62,8 +65,9 @@ RUN sed -i '1s/^/id docid arg1 arg2 label\n/' test.blue.gs && \

ENV TUNED_MODEL_VERSION_ENV=$TUNED_MODEL_VERSION
ENV TASK_NAME_ENV=$TASK_NAME
ENV EPOCHS=$EPOCHS

ENTRYPOINT /home/dev/entrypoint.sh "$TASK_NAME_ENV" "$TUNED_MODEL_VERSION_ENV" "$@"
ENTRYPOINT /home/dev/entrypoint.sh "$TASK_NAME_ENV" "$TUNED_MODEL_VERSION_ENV" "$EPOCHS" "$@"

# To build:
# docker build --build-arg "PROJECT_ID=[PROJECT_ID]" \
Expand Down

0 comments on commit 4b3a1ef

Please sign in to comment.