Skip to content
This repository has been archived by the owner on Feb 20, 2023. It is now read-only.

Commit

Permalink
Build Artifact Stats for Nightly CI Pipeline (#1330)
Browse files Browse the repository at this point in the history
Co-authored-by: Ricthofen <[email protected]>
Co-authored-by: Andy Pavlo <[email protected]>
  • Loading branch information
3 people authored Nov 30, 2020
1 parent 09ff8ea commit f28dca8
Show file tree
Hide file tree
Showing 34 changed files with 549 additions and 220 deletions.
1 change: 0 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
# Don't send any build context to Docker.
apidoc/
sample_tpl/
.gitattributes
.gitignore
Dockerfile
Expand Down
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,6 @@ COPY script/installation/packages.sh install-script.sh
RUN echo y | ./install-script.sh all

COPY . /repo
WORKDIR /repo/build

WORKDIR /repo

2 changes: 1 addition & 1 deletion Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ pipeline {
sh 'cd build && timeout 1h ninja check-tpl'
sh script: 'cd build && timeout 20m python3 ../script/testing/junit/run_junit.py --build-type=debug --query-mode=simple', label: 'UnitTest (Simple)'
sh script: 'cd build && timeout 20m python3 ../script/testing/junit/run_junit.py --build-type=debug --query-mode=extended', label: 'UnitTest (Extended)'
sh script: 'cd build && timeout 20m python3 ../script/testing/junit/run_junit.py --build-type=debug --query-mode=extended --server-args="--pipeline_metrics_enable=True --pipeline_metrics_interval=0 --counters_enable=True --query_trace_metrics_enable=True"', label: 'UnitTest (Extended with pipeline metrics, counters, and query trace metrics)'
sh script: 'cd build && timeout 20m python3 ../script/testing/junit/run_junit.py --build-type=debug --query-mode=extended -a "pipeline_metrics_enable=True" -a "pipeline_metrics_interval=0" -a "counters_enable=True" -a "query_trace_metrics_enable=True"', label: 'UnitTest (Extended with pipeline metrics, counters, and query trace metrics)'
}
post {
always {
Expand Down
29 changes: 29 additions & 0 deletions Jenkinsfile-nightly
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,35 @@ pipeline {
}

stages {
stage('Artifact Stats') {
agent {
docker {
image 'noisepage:focal'
}
}
steps {
sh 'echo $NODE_NAME'
sh script:'echo y | sudo ./script/installation/packages.sh all', label: 'Installing packages'

// The following command compiles and builds the binary without caching and times the whole operation.
// The time gets output to a file which an artifact stats collector reads, in order to report the metrics.
sh script:'''
mkdir build
cd build
/usr/bin/time -o /tmp/compiletime.txt -f %e sh -c "cmake -GNinja -DCMAKE_BUILD_TYPE=Release -DNOISEPAGE_USE_ASAN=OFF -DNOISEPAGE_USE_JEMALLOC=ON -DNOISEPAGE_BUILD_TESTS=OFF ..
ninja noisepage"''', label: 'Timed Compile & Build'

sh script: '''
cd build
python3 ../script/testing/artifact_stats/run_artifact_stats.py --publish-results=prod --publish-username=${PSS_CREATOR_USR} --publish-password=${PSS_CREATOR_PSW}
''', label: 'Artifact Stats'
}
post {
cleanup {
deleteDir()
}
}
}
stage('Performance') {
agent { label 'benchmark' }
steps {
Expand Down
8 changes: 7 additions & 1 deletion build-support/check_github_labels.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,19 @@
'''
import json
import os
from os import environ, getenv
import re
import sys
import urllib.request


def get_pr_num():
match = re.match(r'.*terrier_PR-([^@/]*).*', os.getcwd())
if match and len(match.groups()) == 1:
return int(match.groups()[0])
return None

def is_correct_build_folder():
return getenv('JOB_NAME').startswith('terrier/')

def check_labels_polite(pr_num):
api_url = r'https://api.github.com/repos/cmu-db/noisepage/issues/{}/labels'
Expand All @@ -37,6 +39,10 @@ def check_labels_impolite(pr_num):


if __name__ == '__main__':
if not is_correct_build_folder():
print('This build originated from a jenkins subfolder. Skipping ci-label check')
sys.exit(0)

pr_num = get_pr_num()
# If we can't find a PR number, allow the build to go on.
if pr_num is None:
Expand Down
1 change: 1 addition & 0 deletions script/installation/packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ LINUX_BUILD_PACKAGES=(\
"ninja-build"
"wget" \
"zlib1g-dev" \
"time" \
)
LINUX_TEST_PACKAGES=(\
"ant" \
Expand Down
14 changes: 9 additions & 5 deletions script/testing/README.md
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
# Testing Scripts

## Folder structure
- `util`(compatible with python3): all the common utilities for running all kinds of tests
- `junit`(compatible with python3): entry script to fire a junit test (and many other supporting configs)
- `micro_bench`(compatible with python3): entry script to run the microbenchmark tests
- `oltpbench`(compatible with python3): entry script to fire an oltp bench test
All tests are compatible with python3
- `util`: all the common utilities for running all kinds of tests
- `junit`: entry script to fire a junit test (and many other supporting configs)
- `micro_bench`: entry script to run the microbenchmark tests
- `oltpbench`: entry script to fire an oltp bench test
- `artifact_stats`: entry script to collect the artifact stats

## Util
`util` folder contains a list of common Python scripts
- `common`: functions that can be used in many different settings
- `constants`: all the constants used in the any file under the `util` or across the different tests
- `NoisePageServer`: A class that can start, stop, or restart an instance of the DBMS
- `TestServer`: the base class for running all types of tests
- `TestCase`: the base class for all types of tests
- `TestJUnit`: the test class for junit tests
- `TestOLTPBench`: the test class for oltp bench tests
- `constants`: all the constants used in the any file under the `util`

## OLTP Bench
`oltpbench` folder contains Python scripts for running an oltp bench test
Expand Down
49 changes: 49 additions & 0 deletions script/testing/artifact_stats/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# Artifact Stats
These metrics are non-operational measurements of the DBMS.

## Current Metrics
- Compile time
- Memory usage on startup
- (coming soon) Idle CPU utilization
- (coming soon) binary size

## How to add a metric
1) Create a file for your collector in `/script/testing/artifact_stats/artifact_stats_collectors`
2) Create a sub class of the `BaseArtifactStatsCollector` class.
- See `artifact_stats_collectors/compile_time.py` for a simple example
- See `artifact_stats_collectors/memory_on_start.py` for an example that requires running the DBMS.
- See [BaseArtifactStatsCollector](#BaseArtifactStatsCollector) for more details.
3) Import the new collector in `/script/testing/artifact_stats/artifact_stats_collectors/__init__.py`
4) Test it out by running `/script/testing/artifact_stats/run_artifact_stats.py` and see if your metric has been added to the artifact stats.

## Script
`/script/testing/artifact_stats/run_artifact_stats.py`
### Args
`--debug` - Run with debug logging.

`--publish-results` - The environment to publish the results to (test, staging, or prod). If omitted the script will not publish the results.

`--publish-username` - The username used to authenticate with the performance storage service.

`--publish-password` - The password used to authenticate with the performance storage service.

## <a id="BaseArtifactStatsCollector"></a>BaseArtifactStatsCollector
### Attributes
`is_debug` - This determines whether debug output from the collector will be printed in stdout. In many cases this will be the stdout of a spawned process.

`metrics` - The metrics collected during the execution of the collector.

### Methods
`__init__(is_debug)` - Initialize the class.

> args:
>> `is_debug`: sets the `is_debug` property for the collector.
`setup()` - This will run any steps that are needed to execute prior to the metric collection.

`run_collector()` - This will perform the setps needed to capture the artifact stats for the collector. This returns an exit code for the script. If everything succeeded return `0`.

`teardown()` - This will cleanup anything that was created or run during the `setup` or `run_collector` functions.

`get_metrics()` - This will return the metrics dict. This is used by the `run_artifact_stats.py` script to aggregate the metrics from each collector. In most cases this function should not be overwritten.

Empty file.
35 changes: 35 additions & 0 deletions script/testing/artifact_stats/base_artifact_stats_collector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import os


class BaseArtifactStatsCollector(object):
""" This is the base class to use if you want to collect a new artifact
metric. You can override the setup, run_collector, and teardown methods
for your specific collector implementation.
Properties:
`is_debug` - This determines whether debug output from the collector will
be printed in stdout. In many cases this will be the stdout
of a spawned process.
`metrics` - The metrics collected during the execution of the collector.
"""

def __init__(self, is_debug=False):
self.is_debug = is_debug
self.metrics = {}

def setup(self):
""" Run any setup for the test such as compiling, starting the DB, etc. """
pass

def run_collector(self):
""" This function is where the logic for the collector belongs """
pass

def teardown(self):
""" Return all test state to the same as it was at the start of the collector """
pass

def get_metrics(self):
""" Return all the metrics that were stored for this collector. Refrain from
overriding this method. """
return self.metrics
2 changes: 2 additions & 0 deletions script/testing/artifact_stats/collectors/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
from .compile_time import CompileTimeCollector
from .memory_on_start import MemoryOnStartCollector
15 changes: 15 additions & 0 deletions script/testing/artifact_stats/collectors/compile_time.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
import os

from artifact_stats.base_artifact_stats_collector import BaseArtifactStatsCollector


class CompileTimeCollector(BaseArtifactStatsCollector):
compile_time_file_path = '/tmp/compiletime.txt'

def run_collector(self):
""" Start a timer and run the compile commands """
if not os.path.exists(self.compile_time_file_path):
raise FileNotFoundError(f'{self.compile_time_file_path} not found.')
with open(self.compile_time_file_path, 'r') as compile_time_file:
self.metrics['compile_time_sec'] = float(compile_time_file.read())
return 0
27 changes: 27 additions & 0 deletions script/testing/artifact_stats/collectors/memory_on_start.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import os
import time
import subprocess
import psutil

from artifact_stats.base_artifact_stats_collector import BaseArtifactStatsCollector
from util.db_server import NoisePageServer


class MemoryOnStartCollector(BaseArtifactStatsCollector):
def __init__(self, is_debug):
super().__init__(is_debug)

def setup(self):
super().setup()
self.db_instance = NoisePageServer(build_type='release')
self.db_instance.run_db()

def run_collector(self):
process = psutil.Process(self.db_instance.db_process.pid)
memory_data = process.memory_info()
self.metrics['rss_on_start'] = memory_data.rss
self.metrics['vms_on_start'] = memory_data.vms

def teardown(self):
super().teardown()
self.db_instance.stop_db()
123 changes: 123 additions & 0 deletions script/testing/artifact_stats/run_artifact_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#!/usr/bin/env python3
import os
import sys
import argparse
import logging

base_path = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
sys.path.insert(0, base_path)

from artifact_stats.base_artifact_stats_collector import BaseArtifactStatsCollector
from artifact_stats.collectors import *
from reporting.report_result import report_artifact_stats_result
from util.constants import PERFORMANCE_STORAGE_SERVICE_API, LOG


def collect_artifact_stats(collectors):
""" Takes an array of collector classes, executes the collectors and
combines the result.
Args:
collectors - An array of BaseArtifactStatsCollector sub classes
Returns:
exit_code - (int)The exit code of the collection task
metrics - (dict)The combined metrics from all the collectors
"""
aggregated_metrics = {}
exit_code = 0
try:
for collector in collectors:
collector_instance = collector(is_debug=args.debug)
LOG.info(f'Starting {collector_instance.__class__.__name__} collection')
try:
exit_code, results = run_collector(collector_instance)
check_for_conflicting_metric_keys(aggregated_metrics, results)
aggregated_metrics.update(results)
except Exception as err:
exit_code = 1 if exit_code == 0 else exit_code
LOG.error(err)
collector_instance.teardown()
if exit_code:
LOG.error(f'{collector_instance.__class__.__name__} failed. Stopping all artifact stats collection')
break
LOG.info(f'{collector_instance.__class__.__name__} finished successfully')
except Exception as err:
exit_code = 1 if exit_code == 0 else exit_code
LOG.error(err)

return exit_code, aggregated_metrics


def run_collector(collector_instance):
""" Execute a collector. This includes setup, metric collection, and
teardown.
Args:
collector_instance - An instance of a BaseArtifactStatsCollector subclass
Returns:
exit_code - (int)The exit code of the collection task
metrics - (dict)The metrics collected during the collection task
"""
LOG.debug('Running setup...')
collector_instance.setup()

LOG.debug('Collecting metrics...')
exit_code = collector_instance.run_collector()

LOG.debug('Running teardown...')
collector_instance.teardown()

results = collector_instance.get_metrics()
return exit_code, results


def check_for_conflicting_metric_keys(aggregated_metrics, collector_results):
""" Check to see if another collector has already added a metric with the
same key to the aggregated metrics. If there is a conflict an exception is
raised """
shared_keys = set(aggregated_metrics).intersection(collector_results)
if shared_keys:
raise KeyError(f'artifact stats collector key conflict on {shared_keys}')
return


if __name__ == "__main__":
parser = argparse.ArgumentParser()

parser.add_argument("--debug",
action="store_true",
dest="debug",
default=False,
help="Enable debug output")

parser.add_argument("--publish-results",
default="none",
type=str,
choices=PERFORMANCE_STORAGE_SERVICE_API.keys(),
help="Environment in which to store performance results")

parser.add_argument("--publish-username",
type=str,
help="Performance Storage Service Username")

parser.add_argument("--publish-password",
type=str,
help="Performance Storage Service password")

args = parser.parse_args()

if args.debug:
LOG.setLevel(logging.DEBUG)

# Get the BaseBinaryMetricsCollector subclasses imported from binary_metrics.binary_metrics_collectors
# Effectively this adds each binary metric collector class into an array to be instantiated later.
collectors = [obj for obj in BaseArtifactStatsCollector.__subclasses__()]
exit_code, aggregated_metrics = collect_artifact_stats(collectors)

if not exit_code:
LOG.info(f'Artifact stats: {aggregated_metrics}')

if args.publish_results != 'none':
report_artifact_stats_result(args.publish_results, aggregated_metrics,
args.publish_username, args.publish_password)

logging.shutdown()
sys.exit(exit_code)
2 changes: 1 addition & 1 deletion script/testing/check_pids.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
if check_pid_exists(pid):
print(CommandLineStr.TRUE)
else:
print(CommandLineStr.FALSE)
print(CommandLineStr.FALSE)
2 changes: 1 addition & 1 deletion script/testing/collect_mem_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,4 +15,4 @@
rss = mem_info.rss if mem_info else ""
vms = mem_info.vms if mem_info else ""
res = "{RSS},{VMS}".format(RSS=rss, VMS=vms)
print(res)
print(res)
Loading

0 comments on commit f28dca8

Please sign in to comment.