-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsetup_infra.sh
69 lines (55 loc) · 2.06 KB
/
setup_infra.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
#!/bin/bash
# Load environment variables from dotenv / .env file in Bash
if [ -f .env ]
then
export $(cat .env | sed 's/#.*//g' | xargs)
fi
# Validate Docker is running
if ! docker info >/dev/null 2>&1; then
echo "Docker does not seem to be running, run it first and retry"
exit 1
fi
# Configuring AWS Credentials & Region
aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID
aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY
aws configure set default.region $AWS_DEFAULT_REGION
echo "Your .aws/config file looks like this:"
cat ~/.aws/config
echo "Your .aws/credentials file looks like this:"
cat ~/.aws/credentials
AWS_ID=$(aws sts get-caller-identity --query Account --output text | cat)
# Creating Bucket
echo "Creating bucket "$1""
aws s3api create-bucket --acl public-read-write --bucket $1 --output text >> setup.log
echo "Clean up stale local data"
rm -f data.zip
rm -rf data
echo "Download data"
aws s3 cp s3://start-data-engg/data.zip ./
unzip data.zip
#--------------------------------------------------------------------------
# Airflow
#--------------------------------------------------------------------------
echo "Spinning up local Airflow infrastructure"
rm -rf logs
mkdir logs
rm -rf temp
mkdir temp
# Airflow
# Now built in docker-compose: docker build ./airflow --tag "$AIRFLOW_IMAGE_NAME"
docker-compose up airflow-init
docker-compose up -d
echo "Sleeping 5 Minutes to let Airflow containers reach a healthy state"
sleep 300
#echo "Creating an AWS EMR Cluster named "$SERVICE_NAME""
#aws emr create-default-roles >> setup.log
#aws emr create-cluster --applications Name=Hadoop Name=Spark \
#--release-label emr-6.2.0 --name $SERVICE_NAME \
#--scale-down-behavior TERMINATE_AT_TASK_COMPLETION \
#--service-role EMR_DefaultRole \
#--instance-groups
docker exec spark-master spark-submit \
--master spark://spark-master:7077 \
/usr/local/spark/app/health_violations.py \
--data_source /usr/local/spark/resources/data/Food_Establishment_Inspection_Data.csv \
--output_uri /usr/local/spark/resources/data/output.csv