Skip to content

Commit

Permalink
Sentiment: Add working test pipeline
Browse files Browse the repository at this point in the history
- Creates a SageMaker pipeline with a processing step only
- Builds and pushes a Docker container to ECR used by the image
  • Loading branch information
nico-corthorn committed Nov 30, 2024
1 parent c6d6392 commit 889e6c8
Show file tree
Hide file tree
Showing 13 changed files with 591 additions and 6 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ jobs:
env:
AWS_SAM_STACK_NAME: ${{ secrets.AWS_SAM_STACK_NAME }}
run: |
make sambuild
make build
- name: SAM deploy
if: success()
env:
AWS_SAM_STACK_NAME: ${{ secrets.AWS_SAM_STACK_NAME }}
run: |
make samdeploy
make deploy
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -147,3 +147,6 @@ db/viewer.ipynb

# WRDS solutions
esgtools/wrds

# Package duplicates
esgtools/sentiment/inference_pipeline/esgtools
13 changes: 10 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ test:

pre_pr: format lint test

sambuild:
build:
@echo "Cleaning previous build..."
rm -rf .aws-sam/build

Expand All @@ -41,11 +41,11 @@ sambuild:
@echo "Size of entire .aws-sam directory:"
du -sh .aws-sam

samdeploy-local:
deploy-local:
@echo "Deploying from local samconfig file..."
sam deploy --config-file samconfig.toml

samdeploy:
deploy:
@echo "Deploying..."
sam deploy \
--stack-name sam-app \
Expand All @@ -56,3 +56,10 @@ samdeploy:
--no-confirm-changeset \
--no-fail-on-empty-changeset \
--disable-rollback false

push-sentiment-container:
chmod +x ./esgtools/sentiment/inference_pipeline/build_and_push.sh
./esgtools/sentiment/inference_pipeline/build_and_push.sh

deploy-sentiment-pipeline:
python ./esgtools/sentiment/inference_pipeline/deploy_pipeline.py
5 changes: 4 additions & 1 deletion dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,7 @@ pytest==7.4.3
pytest-cov==4.1.0

# Deployment
aws-sam-cli==1.127.0
aws-sam-cli==1.127.0

# SageMaker
sagemaker==2.197.1
44 changes: 44 additions & 0 deletions esgtools/sentiment/inference_pipeline/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
FROM --platform=linux/amd64 nvidia/cuda:11.8.0-base-ubuntu22.04

# Set working directory
WORKDIR /opt/ml/code

# Install Python and basic dependencies
RUN apt-get update && apt-get install -y \
python3-pip \
python3-dev \
&& rm -rf /var/lib/apt/lists/*

# Create necessary directories
RUN mkdir -p /opt/ml/processing/input/code && \
mkdir -p /opt/ml/processing/output && \
chmod -R 777 /opt/ml/processing

# Copy files individually to maintain structure
COPY esgtools ./esgtools
COPY setup.py .
COPY lambda_requirements.txt .
COPY preprocessing.py .
COPY requirements.txt .

# Install Python packages from requirements.txt
RUN pip3 install -r requirements.txt

# Install the package
RUN pip3 install -e .

# Make both python3 and python available
RUN ln -sf /usr/bin/python3 /usr/bin/python

# Make everything accessible
RUN chmod -R 755 /opt/ml/code

# Optional: Print directory contents for debugging
RUN echo "Contents of /opt/ml/code:" && \
ls -la /opt/ml/code && \
echo "Python path:" && \
python3 -c "import sys; print('\n'.join(sys.path))"

ENV PYTHONPATH=/opt/ml/code

CMD ["python3", "/opt/ml/code/preprocessing.py"]
50 changes: 50 additions & 0 deletions esgtools/sentiment/inference_pipeline/build_and_push.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#!/bin/bash
set -e

# Get AWS account ID and region using AWS CLI (uses existing credentials)
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
AWS_REGION=$(aws configure get region)

# Repository name
REPOSITORY_NAME=sentiment-inference

# Set up directory paths
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../../" && pwd )"
PIPELINE_DIR="${REPO_DIR}/esgtools/sentiment/inference_pipeline"

# Create ECR repository if it doesn't exist
aws ecr describe-repositories --repository-names ${REPOSITORY_NAME} || \
aws ecr create-repository --repository-name ${REPOSITORY_NAME}

# Login to ECR
aws ecr get-login-password | docker login --username AWS --password-stdin "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com"

# Create a temporary build directory
BUILD_DIR=$(mktemp -d)
echo "Created temporary build directory: ${BUILD_DIR}"

# Copy required files to build directory
echo "Copying files to build directory..."
cp -r "${REPO_DIR}/esgtools" "${BUILD_DIR}/esgtools"
cp "${REPO_DIR}/setup.py" "${BUILD_DIR}/setup.py"
cp "${REPO_DIR}/lambda_requirements.txt" "${BUILD_DIR}/lambda_requirements.txt"
cp "${PIPELINE_DIR}/Dockerfile" "${BUILD_DIR}/Dockerfile"
cp "${PIPELINE_DIR}/requirements.txt" "${BUILD_DIR}/requirements.txt"
cp "${PIPELINE_DIR}/preprocessing.py" "${BUILD_DIR}/preprocessing.py"

# Debug: List contents of build directory
echo "Contents of build directory:"
ls -la "${BUILD_DIR}"

# Build and tag the docker image
echo "Building docker container..."
docker build --platform linux/amd64 -t ${REPOSITORY_NAME} "${BUILD_DIR}"
docker tag "${REPOSITORY_NAME}:latest" "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REPOSITORY_NAME}:latest"

# Push the image
echo "Pushing docker container to ECR..."
docker push "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REPOSITORY_NAME}:latest"

# Clean up
echo "Cleaning up temporary files..."
rm -rf "${BUILD_DIR}"
23 changes: 23 additions & 0 deletions esgtools/sentiment/inference_pipeline/deploy_pipeline.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import boto3
import sagemaker
from sagemaker_pipeline import create_pipeline


def deploy():
"""Deploy the SageMaker pipeline."""
session = sagemaker.Session()

# Get role ARN using boto3 (uses existing credentials)
iam = boto3.client("iam")
role_arn = iam.get_role(RoleName="SageMaker-DataScientist")["Role"]["Arn"]

# Create and start the pipeline
pipeline = create_pipeline(role_arn=role_arn)
pipeline.upsert(role_arn=role_arn)
execution = pipeline.start()

print(f"Pipeline started. Execution ID: {execution.arn}")


if __name__ == "__main__":
deploy()
Loading

0 comments on commit 889e6c8

Please sign in to comment.