Skip to content

Commit

Permalink
Restructure: Move sentiment to another package and rename
Browse files Browse the repository at this point in the history
- Deleted sentiment sagemaker pipeline since it was moved to another repo: tba-investments-sentiment
- Renamed this repo tba-investments-etl and the package tba_invest_etl
  • Loading branch information
nico-corthorn committed Jan 8, 2025
1 parent ac227d5 commit 152f491
Show file tree
Hide file tree
Showing 66 changed files with 112 additions and 1,516 deletions.
11 changes: 9 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,15 +38,22 @@ jobs:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: us-east-2
- name: Configure AWS CodeArtifact
run: |
aws codeartifact login --tool twine --domain your-domain --domain-owner $(aws sts get-caller-identity --query 'Account' --output text) --repository your-repository
- name: Build package
run: make build-package
- name: Publish package
run: make publish-package
- name: SAM build
if: success()
env:
AWS_SAM_STACK_NAME: ${{ secrets.AWS_SAM_STACK_NAME }}
run: |
make build
make build-sam
- name: SAM deploy
if: success()
env:
AWS_SAM_STACK_NAME: ${{ secrets.AWS_SAM_STACK_NAME }}
run: |
make deploy
make deploy-sam
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ db/data/
db/viewer.ipynb

# WRDS solutions
esgtools/wrds
tba_invest_etl/wrds

# Package duplicates
esgtools/sentiment/inference_pipeline/esgtools
tba_invest_etl/sentiment/inference_pipeline/tba_invest_etl
2 changes: 1 addition & 1 deletion .samignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
**/.pytest_cache
**/.ipynb_checkpoints
tests/
esgtools/db/notebooks
tba_invest_etl/db/notebooks
2 changes: 1 addition & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
recursive-include esgtools *
recursive-include tba_invest_etl *
recursive-exclude * __pycache__
recursive-exclude * *.py[co]
57 changes: 21 additions & 36 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
AWS_REGION := $(shell aws configure get region)
SAGEMAKER_BUCKET := $(shell aws sts get-caller-identity --query 'Account' --output text | xargs -I {} echo "sagemaker-$(AWS_REGION)-{}")
AWS_ACCOUNT_ID := $(shell aws sts get-caller-identity --query 'Account' --output text)

install:
python -m pip install --upgrade pip &&\
pip install -r lambda_requirements.txt &&\
pip install -r dev_requirements.txt &&\
pip install -r tests/test_requirements.txt &&\
pip install -r esgtools/sentiment/inference_pipeline/requirements.txt &&\
pip install -e . --use-pep517

update-conda-env:
Expand All @@ -18,20 +18,33 @@ update-conda-env:
make install

format:
black esgtools tests
isort esgtools tests
black tba_invest_etl tests
isort tba_invest_etl tests

lint:
pylint esgtools tests --rcfile=.pylintrc
black --check --diff esgtools tests
isort --check-only esgtools tests
pylint tba_invest_etl tests --rcfile=.pylintrc
black --check --diff tba_invest_etl tests
isort --check-only tba_invest_etl tests

test:
python -m pytest -v

pre_pr: format lint test

build:
build-package:
@echo "Building tba_invest_etl package..."
rm -rf dist
mkdir -p dist
python setup.py sdist
mv dist/*.tar.gz dist/tba_invest_etl.tar.gz

publish-package:
@echo "Publishing package to AWS CodeArtifact..."
python -m pip install --upgrade twine
aws codeartifact login --tool twine --domain tba-investments --domain-owner $(AWS_ACCOUNT_ID) --repository tba-investments-etl
python -m twine upload --repository codeartifact dist/*

build-sam:
@echo "Cleaning previous build..."
rm -rf .aws-sam/build

Expand All @@ -57,7 +70,7 @@ deploy-local:
@echo "Deploying from local samconfig file..."
sam deploy --config-file samconfig.toml

deploy:
deploy-sam:
@echo "Deploying..."
sam deploy \
--stack-name sam-app \
Expand All @@ -68,31 +81,3 @@ deploy:
--no-confirm-changeset \
--no-fail-on-empty-changeset \
--disable-rollback false

build-esgtools-package:
@echo "Building esgtools package..."
rm -rf dist
mkdir -p dist
python setup.py sdist
mv dist/*.tar.gz dist/esgtools.tar.gz

upload-sentiment-code: build-esgtools-package
aws s3 cp ./esgtools/sentiment/inference_pipeline/preprocessing.py s3://$(SAGEMAKER_BUCKET)/sentiment/code/
aws s3 cp ./esgtools/sentiment/inference_pipeline/inference.py s3://$(SAGEMAKER_BUCKET)/sentiment/code/
aws s3 cp ./esgtools/sentiment/inference_pipeline/serve s3://$(SAGEMAKER_BUCKET)/sentiment/code/
aws s3 cp ./esgtools/sentiment/inference_pipeline/model_config.json s3://$(SAGEMAKER_BUCKET)/sentiment/config/
aws s3 cp ./esgtools/sentiment/inference_pipeline/prompt_template.txt s3://$(SAGEMAKER_BUCKET)/sentiment/config/
aws s3 cp ./dist/esgtools.tar.gz s3://$(SAGEMAKER_BUCKET)/sentiment/packages/

free-docker-memory:
docker system prune -f
docker image prune -a
docker volume prune -f

push-sentiment-container:
chmod +x ./esgtools/sentiment/inference_pipeline/build_and_push.sh
./esgtools/sentiment/inference_pipeline/build_and_push.sh

deploy-sentiment-pipeline:
make upload-sentiment-code
python ./esgtools/sentiment/inference_pipeline/deploy_pipeline.py
2 changes: 1 addition & 1 deletion db/wrds_preprocessing.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
"metadata": {},
"outputs": [],
"source": [
"from esgtools.utils import sql_manager"
"from tba_invest_etl.utils import sql_manager"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion developer-guide.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
We use AWS to host different resources, the main ones being:
- RDS Postgres database: Hosts all investment data sources and various outputs.
- Secrets Manager: Keeps database and stock price API credentials accesible and safe.
- Lambda functions: Get assets ids, update prices, update news, etc. See /esgtools/lambdas and template.yaml.
- Lambda functions: Get assets ids, update prices, update news, etc. See /tba_invest_etl/lambdas and template.yaml.
- Step functions: To orchestrate (and parallelize) lambda function execution. See /statemachines.


Expand Down
51 changes: 0 additions & 51 deletions esgtools/sentiment/inference_pipeline/Dockerfile

This file was deleted.

50 changes: 0 additions & 50 deletions esgtools/sentiment/inference_pipeline/build_and_push.sh

This file was deleted.

22 changes: 0 additions & 22 deletions esgtools/sentiment/inference_pipeline/deploy_pipeline.py

This file was deleted.

Loading

0 comments on commit 152f491

Please sign in to comment.