Skip to content

Commit

Permalink
add sentence_version to output path
Browse files Browse the repository at this point in the history
add the sentence version to the output path where classified sentences are stored

make the image version distinct from the bert model version
  • Loading branch information
bill-baumgartner committed Dec 12, 2023
1 parent 7b334ea commit 7fcc599
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 66 deletions.
72 changes: 7 additions & 65 deletions .github/workflows/bert-models-cicd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -492,102 +492,44 @@ jobs:
- uses: actions/checkout@v2
with:
fetch-depth: 0
# - name: Get changed files
# id: changed-files
# uses: tj-actions/changed-files@v7

# - name: List all modified files
# run: |
# for file in "${{ steps.changed-files.outputs.all_modified_files }}"; do
# echo "$file was modified"
# done

# - name: Run step when a file changes
# if: |
# contains(steps.changed-files.outputs.all_modified_files, env.DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_ENTRYPOINT_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.BASE_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.DATA_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.ENTRYPOINT_FILE)
# run: |
# echo "A modified file has triggered a rebuild of the ${{ env.TASK_NAME }} predict container."


- name: Set up Cloud SDK
# if: |
# contains(steps.changed-files.outputs.all_modified_files, env.DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_ENTRYPOINT_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.BASE_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.DATA_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.ENTRYPOINT_FILE)
uses: google-github-actions/setup-gcloud@main
with:
project_id: ${{ secrets.GCE_PROJECT }}
service_account_key: ${{ secrets.GCE_SA_KEY }}
version: '290.0.1'

- name: Configure Docker Authentication
# if: |
# contains(steps.changed-files.outputs.all_modified_files, env.DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_ENTRYPOINT_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.BASE_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.DATA_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.ENTRYPOINT_FILE)
run: |
gcloud --quiet auth configure-docker
- name: Set MODEL_VERSION env
# if: |
# contains(steps.changed-files.outputs.all_modified_files, env.DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_ENTRYPOINT_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.BASE_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.DATA_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.ENTRYPOINT_FILE)
run: |
echo "MODEL_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV
- name: Set IMAGE_VERSION env
run: |
echo "IMAGE_VERSION=$(grep ${{ env.MODEL_VERSION_KEY }} IMAGE_VERSIONS_PREDICT | cut -f 2 -d '=')" >> $GITHUB_ENV
- name: Set BASE_VERSION env
# if: |
# contains(steps.changed-files.outputs.all_modified_files, env.DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_ENTRYPOINT_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.BASE_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.DATA_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.ENTRYPOINT_FILE)
run: |
echo "BASE_VERSION=$(grep 'BASE' MODEL_VERSIONS | cut -f 2 -d '=')" >> $GITHUB_ENV
- name: Build Docker Image
# if: |
# contains(steps.changed-files.outputs.all_modified_files, env.DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_ENTRYPOINT_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.BASE_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.DATA_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.ENTRYPOINT_FILE)
run: |
docker build --build-arg "PROJECT_ID=$PROJECT_ID" \
--build-arg "TASK_NAME=$TASK_NAME" \
--build-arg "BASE_VERSION=$BASE_VERSION" \
--build-arg "TUNED_MODEL_VERSION=$MODEL_VERSION" \
--build-arg "MODEL_STORAGE_BUCKET=$MODEL_STORAGE_BUCKET" \
--build-arg "CLASSIFICATION_LABELS=${{ env.CLASSIFICATION_LABELS }}" \
--tag "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$MODEL_VERSION" \
--tag "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION" \
-f ${{ env.DOCKERFILE }} .
- name: Publish Docker Image to Google Container Registry
# if: |
# contains(steps.changed-files.outputs.all_modified_files, env.DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.TRAIN_ENTRYPOINT_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.BASE_DOCKERFILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.DATA_FILE) ||
# contains(steps.changed-files.outputs.all_modified_files, env.ENTRYPOINT_FILE)
run: |
docker push "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$MODEL_VERSION"
docker push "gcr.io/$PROJECT_ID/$TASK_NAME-predict:$IMAGE_VERSION"
# # ----------------------------------------------------------------------------------- #
# # ----------------------------------------------------------------------------------- #
Expand Down
1 change: 1 addition & 0 deletions IMAGE_VERSIONS_PREDICT
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
BL_CHEMICAL_TO_DISEASE_OR_PHENOTYPIC_FEATURE=0.4.1
9 changes: 8 additions & 1 deletion scripts/predict.entrypoint.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,13 @@ COLLECTION=$5
# the gcp bucket where the classified (labeled) sentences will be stored
OUTPUT_BUCKET=$6

# the version of the sentences being processed - this will be part of the
# output file path and name. By specifying both the model version and the
# sentence version in the output file path, this allows us to process
# difference sentence versions with the same model, or vice versa without
# overwiting output.
SENTENCE_VERSION=$7

# download the sentence files to process
# cat the sentence files into a single file called test.tsv in the $DATASET_DIR
mkdir /home/dev/sentences
Expand Down Expand Up @@ -63,6 +70,6 @@ paste $DATASET_DIR/test.tsv test_results.tsv | gzip > classified_sentences.tsv.g
[ $? -eq 0 ] || exit 1

# export the bert output file
gsutil cp classified_sentences.tsv.gz "${OUTPUT_BUCKET}/output/classified_sentences/${TASK_NAME}/${TUNED_MODEL_VERSION}/${TASK_NAME}.${TUNED_MODEL_VERSION}.${COLLECTION}.classified_sentences.tsv.gz"
gsutil cp classified_sentences.tsv.gz "${OUTPUT_BUCKET}/output/classified_sentences/sent_${SENTENCE_VERSION}/${TASK_NAME}/model_${TUNED_MODEL_VERSION}/${TASK_NAME}.${SENTENCE_VERSION}_${TUNED_MODEL_VERSION}.${COLLECTION}.classified_sentences.tsv.gz"
[ $? -eq 0 ] || exit 1
popd

0 comments on commit 7fcc599

Please sign in to comment.