-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Sentiment: Add working test pipeline
- Creates a SageMaker pipeline with a processing step only - Builds and pushes a Docker container to ECR used by the image
- Loading branch information
1 parent
c6d6392
commit 889e6c8
Showing
13 changed files
with
591 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
FROM --platform=linux/amd64 nvidia/cuda:11.8.0-base-ubuntu22.04 | ||
|
||
# Set working directory | ||
WORKDIR /opt/ml/code | ||
|
||
# Install Python and basic dependencies | ||
RUN apt-get update && apt-get install -y \ | ||
python3-pip \ | ||
python3-dev \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Create necessary directories | ||
RUN mkdir -p /opt/ml/processing/input/code && \ | ||
mkdir -p /opt/ml/processing/output && \ | ||
chmod -R 777 /opt/ml/processing | ||
|
||
# Copy files individually to maintain structure | ||
COPY esgtools ./esgtools | ||
COPY setup.py . | ||
COPY lambda_requirements.txt . | ||
COPY preprocessing.py . | ||
COPY requirements.txt . | ||
|
||
# Install Python packages from requirements.txt | ||
RUN pip3 install -r requirements.txt | ||
|
||
# Install the package | ||
RUN pip3 install -e . | ||
|
||
# Make both python3 and python available | ||
RUN ln -sf /usr/bin/python3 /usr/bin/python | ||
|
||
# Make everything accessible | ||
RUN chmod -R 755 /opt/ml/code | ||
|
||
# Optional: Print directory contents for debugging | ||
RUN echo "Contents of /opt/ml/code:" && \ | ||
ls -la /opt/ml/code && \ | ||
echo "Python path:" && \ | ||
python3 -c "import sys; print('\n'.join(sys.path))" | ||
|
||
ENV PYTHONPATH=/opt/ml/code | ||
|
||
CMD ["python3", "/opt/ml/code/preprocessing.py"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#!/bin/bash | ||
set -e | ||
|
||
# Get AWS account ID and region using AWS CLI (uses existing credentials) | ||
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) | ||
AWS_REGION=$(aws configure get region) | ||
|
||
# Repository name | ||
REPOSITORY_NAME=sentiment-inference | ||
|
||
# Set up directory paths | ||
REPO_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )/../../../" && pwd )" | ||
PIPELINE_DIR="${REPO_DIR}/esgtools/sentiment/inference_pipeline" | ||
|
||
# Create ECR repository if it doesn't exist | ||
aws ecr describe-repositories --repository-names ${REPOSITORY_NAME} || \ | ||
aws ecr create-repository --repository-name ${REPOSITORY_NAME} | ||
|
||
# Login to ECR | ||
aws ecr get-login-password | docker login --username AWS --password-stdin "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com" | ||
|
||
# Create a temporary build directory | ||
BUILD_DIR=$(mktemp -d) | ||
echo "Created temporary build directory: ${BUILD_DIR}" | ||
|
||
# Copy required files to build directory | ||
echo "Copying files to build directory..." | ||
cp -r "${REPO_DIR}/esgtools" "${BUILD_DIR}/esgtools" | ||
cp "${REPO_DIR}/setup.py" "${BUILD_DIR}/setup.py" | ||
cp "${REPO_DIR}/lambda_requirements.txt" "${BUILD_DIR}/lambda_requirements.txt" | ||
cp "${PIPELINE_DIR}/Dockerfile" "${BUILD_DIR}/Dockerfile" | ||
cp "${PIPELINE_DIR}/requirements.txt" "${BUILD_DIR}/requirements.txt" | ||
cp "${PIPELINE_DIR}/preprocessing.py" "${BUILD_DIR}/preprocessing.py" | ||
|
||
# Debug: List contents of build directory | ||
echo "Contents of build directory:" | ||
ls -la "${BUILD_DIR}" | ||
|
||
# Build and tag the docker image | ||
echo "Building docker container..." | ||
docker build --platform linux/amd64 -t ${REPOSITORY_NAME} "${BUILD_DIR}" | ||
docker tag "${REPOSITORY_NAME}:latest" "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REPOSITORY_NAME}:latest" | ||
|
||
# Push the image | ||
echo "Pushing docker container to ECR..." | ||
docker push "${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${REPOSITORY_NAME}:latest" | ||
|
||
# Clean up | ||
echo "Cleaning up temporary files..." | ||
rm -rf "${BUILD_DIR}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
import boto3 | ||
import sagemaker | ||
from sagemaker_pipeline import create_pipeline | ||
|
||
|
||
def deploy(): | ||
"""Deploy the SageMaker pipeline.""" | ||
session = sagemaker.Session() | ||
|
||
# Get role ARN using boto3 (uses existing credentials) | ||
iam = boto3.client("iam") | ||
role_arn = iam.get_role(RoleName="SageMaker-DataScientist")["Role"]["Arn"] | ||
|
||
# Create and start the pipeline | ||
pipeline = create_pipeline(role_arn=role_arn) | ||
pipeline.upsert(role_arn=role_arn) | ||
execution = pipeline.start() | ||
|
||
print(f"Pipeline started. Execution ID: {execution.arn}") | ||
|
||
|
||
if __name__ == "__main__": | ||
deploy() |
Oops, something went wrong.