Skip to content

Commit

Permalink
Merge pull request #34 from GSA/staging
Browse files Browse the repository at this point in the history
Code deploy build
  • Loading branch information
igoristic authored Dec 18, 2024
2 parents c567af5 + 0b05f3f commit 2d107a6
Show file tree
Hide file tree
Showing 79 changed files with 871 additions and 79 deletions.
Empty file modified .circleci/config.yml
100644 → 100755
Empty file.
Empty file modified .codeclimate.yml
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion .github/dependabot.yml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,4 @@ updates:
patterns:
- "freezegun"
- "pylint"
- "pytest"
- "pytest*"
Empty file modified .github/pull_request_template.md
100644 → 100755
Empty file.
Empty file modified .gitignore
100644 → 100755
Empty file.
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ repos:
- id: check-added-large-files # prevents giant files from being committed.
- id: check-case-conflict # checks for files that would conflict in case-insensitive filesystems.
- id: check-json # checks json files for parseable syntax.
- id: check-shebang-scripts-are-executable # ensures that (non-binary) files with a shebang are executable.
- id: check-merge-conflict # checks for files that contain merge conflict strings.
- id: check-symlinks # checks for symlinks which do not point to anything.
- id: check-yaml # checks yaml files for parseable syntax.
Expand Down
Empty file modified LICENSE
100644 → 100755
Empty file.
2 changes: 1 addition & 1 deletion README.md
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ Make sure to run `pip install -r requirements.txt` and `playwright install` befo
1. Navigate to the [*spiders*](search_gov_crawler/search_gov_spiders/spiders) directory
2. Enter one of two following commands:

* This command will output the yielded URLs in the destination (relative to the [*spiders*](search_gov_crawler/search_gov_spiders/spiders) directory) and file format specified in the “FEEDS” variable of the [*settings.py*](search_gov_crawler/search_gov_spiders/settings.py) file:
* This command will output the yielded URLs in the destination (relative to the [*spiders*](search_gov_crawler/search_gov_spiders/spiders) directory) and file format specified in the `search_gov_crawler/search_gov_spiders/pipelines.py`:

$ scrapy runspider <spider_file.py>

Expand Down
Empty file added __init__.py
Empty file.
25 changes: 25 additions & 0 deletions appspec.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
version: 0.0
os: linux
permissions:
- object: .
mode: 777
acls:
- "d:u::rwx"
- "d:g::rwx"
- "d:o::rwx"
owner: search
type:
- directory
hooks:
AfterInstall:
- location: cicd-scripts/app_install.sh
timeout: 600
runas: search
ApplicationStart:
- location: cicd-scripts/app_start.sh
timeout: 300
runas: search
ApplicationStop:
- location: cicd-scripts/app_stop.sh
timeout: 300
runas: search
137 changes: 137 additions & 0 deletions cicd-scripts/app_install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
#!/bin/bash

# CD into the current script directory (which != $pwd)
cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && cd ../

chmod +x ./cicd-scripts/helpers/ensure_executable.sh
source ./cicd-scripts/helpers/ensure_executable.sh

### VARIABLES ###
SPIDER_PYTHON_VERSION=3.12
_CURRENT_BUILD_DIR=${PWD}
VENV_DIR=./venv

### FUNCTIONS ###

# Stop spider services
stop_services() {
echo "Running app_stop.sh..."
ensure_executable "./cicd-scripts/app_stop.sh"
}

# Install missing system dependencies
install_system_dependencies() {
echo "Installing system dependencies..."
sudo apt-get update -y
sudo apt-get install -y \
lzma liblzma-dev libbz2-dev python-setuptools \
acl build-essential checkinstall libreadline-dev \
libncursesw5-dev libssl-dev libsqlite3-dev tk-dev \
libgdbm-dev libc6-dev zlib1g-dev libffi-dev openssl
}

# Install Python
install_python() {
echo "Installing Python ${SPIDER_PYTHON_VERSION}..."
cd /usr/src
wget -q https://www.python.org/ftp/python/${SPIDER_PYTHON_VERSION}.0/Python-${SPIDER_PYTHON_VERSION}.0.tgz
tar xzf Python-${SPIDER_PYTHON_VERSION}.0.tgz
sudo chown -R $(whoami) ./Python-${SPIDER_PYTHON_VERSION}.0
cd Python-${SPIDER_PYTHON_VERSION}.0
./configure --enable-optimizations
make
make install
make altinstall
cd "$_CURRENT_BUILD_DIR"
echo "Python ${SPIDER_PYTHON_VERSION} installed successfully."
}

# Check and install Python if needed
check_python() {
if ! command -v python${SPIDER_PYTHON_VERSION} &>/dev/null; then
install_python
else
echo "Python ${SPIDER_PYTHON_VERSION} already installed: $(python${SPIDER_PYTHON_VERSION} --version)"
fi
}

# Set environment paths
update_pythonpath() {
ensure_executable "./cicd-scripts/helpers/update_pythonpath.sh"
}

# Setup virtual environment
setup_virtualenv() {
echo "Setting up virtual environment..."
python${SPIDER_PYTHON_VERSION} -m venv "$VENV_DIR"
source "$VENV_DIR/bin/activate"
python -m pip install --upgrade pip
}

# Install dependencies
install_dependencies() {
echo "Installing dependencies..."
python -m pip install --upgrade -r ./search_gov_crawler/requirements.txt
echo "Installing Playwright..."
python -m pip install --upgrade pytest-playwright playwright
playwright install --with-deps
deactivate
}

# Configure permissions
configure_permissions() {
echo "Configuring file permissions..."
chmod -R 777 .
chown -R "$(whoami)" .
sudo setfacl -Rdm g:dgsearch:rwx .
}

# Manage cron jobs
manage_cron_jobs() {
echo "Managing cron jobs..."
crontab -l | grep -v 'app_start.sh' > temp_cron || true
echo "@reboot $(pwd)/cicd-scripts/app_start.sh" >> temp_cron
crontab temp_cron
rm temp_cron
echo "Cron jobs updated."
}

# Start monitoring agents
start_agents() {
echo "Starting AWS CloudWatch agent..."
ensure_executable "./cicd-scripts/helpers/check_cloudwatch.sh"

echo "Starting AWS CodeDeploy agent..."
ensure_executable "./cicd-scripts/helpers/check_codedeploy.sh"
}

### SCRIPT EXECUTION ###

# Stop running services
stop_services

# Install system dependencies
install_system_dependencies

# Check and install Python if missing
check_python

# Set environment paths
update_pythonpath

# Configure permissions
configure_permissions

# Setup and activate virtual environment
setup_virtualenv

# Install dependencies
install_dependencies

# Start AWS agents
start_agents

# Manage cron jobs
manage_cron_jobs

echo "App installation completed successfully."
20 changes: 20 additions & 0 deletions cicd-scripts/app_start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

# CD into the current script directory (which != $pwd)
cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && cd ../

chmod +x ./cicd-scripts/helpers/ensure_executable.sh
source ./cicd-scripts/helpers/ensure_executable.sh

# TODO: Make it part of the local env variable that is set by Ansible
SPIDER_RUN_WITH_UI=false

# Determine which script to run based on the SPIDER_RUN_WITH_UI flag
if $SPIDER_RUN_WITH_UI; then
SCRIPT="./cicd-scripts/helpers/run_with_ui.sh"
else
SCRIPT="./cicd-scripts/helpers/run_without_ui.sh"
fi

# Ensure the script exists, is executable, and run it
ensure_executable "$SCRIPT"
118 changes: 118 additions & 0 deletions cicd-scripts/app_stop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/bin/bash

# CD into the current script directory (which != $pwd)
cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && cd ../

chmod +x ./cicd-scripts/helpers/ensure_executable.sh
source ./cicd-scripts/helpers/ensure_executable.sh

### FUNCTIONS ###

# Remove virtual environment if it exists
remove_venv() {
if [ -d ./venv ]; then
echo "Removing virtual environment..."
rm -rf ./venv/
fi
}

# Purge pip cache
purge_pip_cache() {
echo "Purging pip cache..."
rm -rf ~/.cache/pip /root/.cache/pip
}

# Stop scrapy scheduler if running
stop_scrapy_scheduler() {
echo "Stopping scrapy_scheduler.py (if running)..."
ensure_executable "./cicd-scripts/helpers/kill_scheduler.sh"
}

# Stop scrapyd and scrapydweb tasks
stop_scrapy_tasks() {
echo "Stopping all scrapyd and scrapydweb tasks..."

# Kill scrapydweb tasks
if pkill -f "scrapydweb" 2>/dev/null; then
echo "scrapydweb tasks stopped."
else
echo "No scrapydweb tasks running."
fi

# Kill scrapyd tasks
if pkill -f "scrapyd" 2>/dev/null; then
echo "scrapyd tasks stopped."
else
echo "No scrapyd tasks running."
fi
}

# Display remaining scrapy processes
display_remaining_scrapy_processes() {
echo -e "\nRemaining scrapy processes (if any):"
ps -ef | grep scrapy | grep -v grep || echo "No scrapy processes running."
}

# Force kill any remaining scrapy background jobs
kill_remaining_scrapy_jobs() {
echo "Force killing remaining scrapy background jobs..."
if ps aux | grep -ie [s]crapy | awk '{print $2}' | xargs kill -9; then
echo "Remaining scrapy jobs killed."
else
echo "No remaining scrapy jobs to kill."
fi
}

# Remove nohup jobs (python scripts)
remove_nohup_jobs() {
echo "Removing nohup jobs (python)..."
ps -ef | grep nohup | grep -v grep | awk '{print $2}' | xargs kill -9
}

# Remove cron job entries referencing the given string
remove_cron_entry() {
if [ -z "$1" ]; then
echo "Error: No cron entry provided."
return
fi

local CRON_ENTRY="$1"
local CRON_USER=$(whoami)

echo "Removing cron job entries referencing: $CRON_ENTRY"

# Remove cron job for the current user (including the full path if needed)
sudo crontab -l -u "$CRON_USER" 2>/dev/null | grep -v -F "$CRON_ENTRY" | sudo crontab -u "$CRON_USER" -

echo "Cron job entries for '$CRON_ENTRY' removed."
}

### SCRIPT EXECUTION ###

# Remove virtual environment
remove_venv

# Purge pip cache
purge_pip_cache

# Stop scrapy scheduler if running
stop_scrapy_scheduler

# Stop scrapyd and scrapydweb tasks
stop_scrapy_tasks

# Display remaining scrapy processes (if any)
display_remaining_scrapy_processes

# Force kill any remaining scrapy background jobs
kill_remaining_scrapy_jobs

# Remove nohup jobs (python)
remove_nohup_jobs

# Remove specific cron jobs
remove_cron_entry "check_cloudwatch.sh"
remove_cron_entry "check_codedeploy.sh"
remove_cron_entry "app_start.sh"

echo "App stop completed successfully."
32 changes: 32 additions & 0 deletions cicd-scripts/helpers/check_cloudwatch.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

# Function to check if CloudWatch agent is running
check_cloudwatch() {
if ! pgrep -f amazon-cloudwatch-agent > /dev/null; then
echo "AWS CloudWatch agent is not running. Starting it now..."
sudo service amazon-cloudwatch-agent start
if [ $? -eq 0 ]; then
echo "AWS CloudWatch agent started successfully."
else
echo "Failed to start AWS CloudWatch agent."
fi
else
echo "AWS CloudWatch agent is running."
fi
}

# Ensure the script is added to crontab for execution on reboot
setup_cron() {
chmod +x ./cicd-scripts/helpers/check_cloudwatch.sh
CRON_ENTRY="@reboot $(pwd)/cicd-scripts/helpers/check_cloudwatch.sh"

# Update crontab, ensuring no duplicates
(crontab -l 2>/dev/null | grep -v -F "check_cloudwatch.sh"; echo "$CRON_ENTRY") | crontab -
echo "Crontab entry added to ensure the script runs on reboot."
}

# Execute the function
check_cloudwatch

# Add to crontab
setup_cron
32 changes: 32 additions & 0 deletions cicd-scripts/helpers/check_codedeploy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

# Function to check if CodeDeploy agent is running
check_codedeploy() {
if ! pgrep -f codedeploy-agent > /dev/null; then
echo "AWS CodeDeploy agent is not running. Starting it now..."
sudo service codedeploy-agent start
if [ $? -eq 0 ]; then
echo "AWS CodeDeploy agent started successfully."
else
echo "Failed to start AWS CodeDeploy agent."
fi
else
echo "AWS CodeDeploy agent is running."
fi
}

# Ensure the script is added to crontab for execution on reboot
setup_cron() {
chmod +x ./cicd-scripts/helpers/check_codedeploy.sh
CRON_ENTRY="@reboot $(pwd)/cicd-scripts/helpers/check_codedeploy.sh"

# Update crontab, ensuring no duplicates
(crontab -l 2>/dev/null | grep -v -F "check_codedeploy.sh"; echo "$CRON_ENTRY") | crontab -
echo "Crontab entry added to ensure the script runs on reboot."
}

# Execute the function
check_codedeploy

# Add to crontab
setup_cron
Loading

0 comments on commit 2d107a6

Please sign in to comment.