Skip to content
This repository has been archived by the owner on Feb 20, 2023. It is now read-only.

Switch to the new version of OLTPBench. #1632

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
45d90cf
Refactor support for SHOW commands in DESCRIBE.
lmwnshn Jul 26, 2021
4ce4411
Adapt CI framework to use oltpbench_tim.
lmwnshn Jul 26, 2021
2191d55
Format...
lmwnshn Jul 26, 2021
0885064
Merge branch 'fix_oltpbench' into switch_oltpbench
lmwnshn Jul 26, 2021
e06c2ff
Add format header...
lmwnshn Jul 26, 2021
408c4d6
Merge branch 'fix_oltpbench' into switch_oltpbench
lmwnshn Jul 26, 2021
bdbbf3f
Merge branch 'master' into switch_oltpbench
mbutrovich Jul 26, 2021
779c516
Merge branch 'master' into switch_oltpbench
mbutrovich Jul 27, 2021
ee64eb3
Bunch of fixes/hacks around the test framework stuff.
lmwnshn Jul 28, 2021
3509bf1
Merge remote-tracking branch 'origin/switch_oltpbench' into switch_ol…
lmwnshn Jul 28, 2021
3d206c8
More test framework hackery.
lmwnshn Jul 28, 2021
8ef0e01
Replace one stupid hack with another stupid hack.
lmwnshn Jul 28, 2021
782ef89
Print out result.
lmwnshn Jul 29, 2021
e1be29b
Version is being weird.
lmwnshn Jul 29, 2021
016eec9
Suppress Maven download progress.
lmwnshn Aug 6, 2021
e0a35d4
Change folder for Jenkins to benchbase.
lmwnshn Aug 6, 2021
03a9d77
Flip over to benchbase.
lmwnshn Aug 10, 2021
bdfd4fe
Merge branch 'master' into switch_oltpbench
lmwnshn Aug 10, 2021
9d37fec
Fix typo (oltpbench2 -> benchbase).
lmwnshn Aug 11, 2021
7f8d68c
Don't run BenchBase tests.
lmwnshn Aug 11, 2021
c48c3c2
Merge remote-tracking branch 'origin/switch_oltpbench' into switch_ol…
lmwnshn Aug 11, 2021
dac568c
Temporary test with Wan's branch of benchbase.
lmwnshn Aug 12, 2021
714253a
Revert "Temporary test with Wan's branch of benchbase."
lmwnshn Aug 12, 2021
af0592c
Merge branch 'master' into switch_oltpbench
lmwnshn Aug 16, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 11 additions & 16 deletions script/testing/oltpbench/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,15 @@
from ..util.constants import DIR_TMP

# git settings for OLTPBench.
OLTPBENCH_GIT_URL = "https://github.com/oltpbenchmark/oltpbench.git"
OLTPBENCH_GIT_LOCAL_PATH = os.path.join(DIR_TMP, "oltpbench")
OLTPBENCH_VERSION = "benchbase-2021-SNAPSHOT"
OLTPBENCH_GIT_URL = "https://github.com/cmu-db/benchbase.git"
OLTPBENCH_GIT_LOCAL_PATH = os.path.join(DIR_TMP, "benchbase")
OLTPBENCH_GIT_TARGET_PATH = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "target")
OLTPBENCH_GIT_FINAL_PATH = os.path.join(OLTPBENCH_GIT_TARGET_PATH, OLTPBENCH_VERSION)
OLTPBENCH_GIT_CLEAN_COMMAND = "rm -rf {}".format(OLTPBENCH_GIT_LOCAL_PATH)
OLTPBENCH_GIT_CLONE_COMMAND = "git clone --depth 1 {} {}".format(OLTPBENCH_GIT_URL,
OLTPBENCH_GIT_LOCAL_PATH)
OLTPBENCH_GIT_CLONE_COMMAND = "git clone --depth 1 {} {}".format(
OLTPBENCH_GIT_URL,
OLTPBENCH_GIT_LOCAL_PATH)

# OLTPBench default settings.
OLTPBENCH_DEFAULT_TIME = 30
Expand All @@ -21,7 +25,7 @@
OLTPBENCH_DEFAULT_DBTYPE = "noisepage"
OLTPBENCH_DEFAULT_DRIVER = "org.postgresql.Driver"
OLTPBENCH_DEFAULT_RATE = "unlimited"
OLTPBENCH_DEFAULT_BIN = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "oltpbenchmark")
OLTPBENCH_DEFAULT_BIN = "java -jar benchbase.jar "
OLTPBENCH_DEFAULT_DATABASE_RESTART = True
OLTPBENCH_DEFAULT_DATABASE_CREATE = True
OLTPBENCH_DEFAULT_DATABASE_LOAD = True
Expand All @@ -30,17 +34,8 @@
OLTPBENCH_DEFAULT_WAL_ENABLE = True
OLTPBENCH_DEFAULT_CONTINUE_ON_ERROR = False

OLTPBENCH_DIR_CONFIG = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "config")
OLTPBENCH_DIR_TEST_RESULT = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "results")

# ant commands for invoking OLTPBench.
OLTPBENCH_ANT_BUILD_FILE = os.path.join(OLTPBENCH_GIT_LOCAL_PATH, "build.xml")
OLTPBENCH_ANT_COMMANDS = [
"ant bootstrap -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE),
"ant resolve -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE),
"ant clean -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE),
"ant build -buildfile {}".format(OLTPBENCH_ANT_BUILD_FILE),
]
OLTPBENCH_DIR_CONFIG = os.path.join(OLTPBENCH_GIT_FINAL_PATH, "config", "noisepage")
OLTPBENCH_DIR_TEST_RESULT = os.path.join(OLTPBENCH_GIT_FINAL_PATH, "results")

# API endpoints for Performance Storage Service
# Each pair represents different environment. One could choose where the benchmark testing result will be uploaded to
Expand Down
16 changes: 7 additions & 9 deletions script/testing/oltpbench/test_case_oltp.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,15 +79,15 @@ def _init_test_case(self):
self.test_output_file = os.path.join(self.test_result_dir,
"oltpbench.log")

# oltpbench historgrams results - json format
# oltpbench histograms results - json format
self.test_histograms_json_file = self.args.get("test_json_histograms")
if not self.test_histograms_json_file:
self.test_histograms_json_file = "oltp_histograms_" + self.filename_suffix + ".json"
self.test_histogram_path = os.path.join(
constants.OLTPBENCH_GIT_LOCAL_PATH, self.test_histograms_json_file)
constants.OLTPBENCH_GIT_FINAL_PATH, self.test_histograms_json_file)

# oltpbench initiate database and load data
self.oltp_flag = "--histograms --execute={EXECUTE} -s {BUCKETS}".format(
self.oltp_flag = "--execute={EXECUTE} -s {BUCKETS}".format(
EXECUTE=self.db_execute, BUCKETS=self.buckets)

# oltpbench test command
Expand All @@ -98,7 +98,7 @@ def _init_test_case(self):
XML=self.xml_config,
FLAGS=self.oltp_flag,
HISTOGRAMS=self.test_histogram_path)
self.test_command_cwd = constants.OLTPBENCH_GIT_LOCAL_PATH
self.test_command_cwd = constants.OLTPBENCH_GIT_FINAL_PATH

def run_pre_test(self):
self._config_xml_file()
Expand Down Expand Up @@ -149,9 +149,9 @@ def _get_db_url(self):
def _config_xml_file(self):
xml = ElementTree.parse(self.xml_template)
root = xml.getroot()
root.find("dbtype").text = constants.OLTPBENCH_DEFAULT_DBTYPE
root.find("type").text = constants.OLTPBENCH_DEFAULT_DBTYPE
root.find("driver").text = constants.OLTPBENCH_DEFAULT_DRIVER
root.find("DBUrl").text = self._get_db_url()
root.find("url").text = self._get_db_url()
root.find("username").text = constants.OLTPBENCH_DEFAULT_USERNAME
root.find("password").text = constants.OLTPBENCH_DEFAULT_PASSWORD
root.find("isolation").text = str(self.transaction_isolation)
Expand Down Expand Up @@ -199,9 +199,7 @@ def _validate_result(self):
with open(self.test_histogram_path) as oltp_result_file:
test_result = json.load(oltp_result_file)
unexpected_result = test_result.get("unexpected", {}).get("HISTOGRAM")
if unexpected_result and unexpected_result.keys():
if unexpected_result:
for test in unexpected_result.keys():
if unexpected_result[test] != 0:
raise RuntimeError(str(unexpected_result))
else:
raise RuntimeError(str(unexpected_result))
12 changes: 10 additions & 2 deletions script/testing/oltpbench/test_oltpbench.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
from ..util.test_server import TestServer
from . import constants

import os


class TestOLTPBench(TestServer):
"""
Expand Down Expand Up @@ -39,5 +41,11 @@ def _build_oltpbench(self):
Raises an exception if anything goes wrong.
Assumes that _download_oltpbench() has already been run.
"""
for command in constants.OLTPBENCH_ANT_COMMANDS:
expect_command(command)
old_dir = os.getcwd()
os.chdir(constants.OLTPBENCH_GIT_LOCAL_PATH)
# --no-transfer-progress: don't show download progress, too noisy
# -Dmaven.test.skip=true: we're not in the business of testing BenchBase, we just want to use it
expect_command("./mvnw package --no-transfer-progress -Dmaven.test.skip=true")
os.chdir(constants.OLTPBENCH_GIT_TARGET_PATH)
expect_command(f"tar xvzf {constants.OLTPBENCH_VERSION}.tgz")
os.chdir(old_dir)
5 changes: 0 additions & 5 deletions script/testing/reporting/constants.py
Original file line number Diff line number Diff line change
@@ -1,6 +1 @@
UNKNOWN_RESULT = 'unknown'
LATENCY_ATTRIBUTE_MAPPING = [
# key = key in publish result json, value= string to search OLTPBench results for
# TODO(WAN): this mapping could probably be a.. map? {}?
('l_25', '25'), ('l_75', '75'), ('l_90', '90'), ('l_95', '95'), ('l_99', '99'),
('avg', 'av'), ('median', 'median'), ('min', 'min'), ('max', 'max')]
17 changes: 13 additions & 4 deletions script/testing/reporting/parsers/oltpbench/res_parser.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import csv

from ...constants import LATENCY_ATTRIBUTE_MAPPING
from ...utils import get_value_by_pattern


Expand Down Expand Up @@ -29,10 +28,20 @@ def get_latency_val(row, pattern):
reader = csv.DictReader(csvfile, delimiter=',')
for row in reader:
incremental_metrics.append({
"time": float(gvbp(row, 'time', None)),
"throughput": float(gvbp(row, 'throughput', None)),
"time": float(gvbp(row, 'time(sec)', None)),
"throughput": float(gvbp(row, 'throughput(req/sec)', None)),
"latency": {key: get_latency_val(row, pat)
for key, pat in LATENCY_ATTRIBUTE_MAPPING}
for key, pat in [
('l_25', '25th_lat(ms)'),
('l_75', '75th_lat(ms)'),
('l_90', '90th_lat(ms)'),
('l_95', '95th_lat(ms)'),
('l_99', '99th_lat(ms)'),
('avg', 'avg_lat(ms)'),
('median', 'median_lat(ms)'),
('min', 'min_lat(ms)'),
('max', 'max_lat(ms)')
]}
})

return incremental_metrics
22 changes: 16 additions & 6 deletions script/testing/reporting/parsers/oltpbench/summary_parser.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import json
from time import time

from ...constants import LATENCY_ATTRIBUTE_MAPPING, UNKNOWN_RESULT
from ...constants import UNKNOWN_RESULT
from ...utils import get_value_by_pattern


Expand Down Expand Up @@ -30,27 +30,37 @@ def parse_summary_file(path):
"""
def get_latency_val(latency_dist, pattern):
value = get_value_by_pattern(latency_dist, pattern, None)
return float("{:.4}".format(value)) if value else value
return float("{:.4}".format(float(value))) if value else value

with open(path) as summary_file:
summary = json.load(summary_file)
latency_dist = summary.get('Latency Distribution', {})

metadata = {
'noisepage': {
'db_version': summary.get('DBMS Version', UNKNOWN_RESULT)
'db_version': '1.0.0'
}
}
timestamp = int(get_value_by_pattern(summary, 'timestamp', str(time())))
timestamp = int(get_value_by_pattern(summary, 'Current Timestamp (milliseconds)', str(time())))
benchmark_type = summary.get('Benchmark Type', UNKNOWN_RESULT)
parameters = {
'scale_factor': summary.get('scalefactor', '-1.0'),
'terminals': int(summary.get('terminals', -1))
}
metrics = {
'throughput': get_value_by_pattern(summary, 'throughput', '-1.0'),
'throughput': get_value_by_pattern(summary, 'Throughput (requests/second)', '-1.0'),
'latency': {key: get_latency_val(latency_dist, pattern)
for key, pattern in LATENCY_ATTRIBUTE_MAPPING}
for key, pattern in [
('l_25', '25th Percentile Latency (microseconds)'),
('l_75', '75th Percentile Latency (microseconds)'),
('l_90', '90th Percentile Latency (microseconds)'),
('l_95', '95th Percentile Latency (microseconds)'),
('l_99', '99th Percentile Latency (microseconds)'),
('avg', 'Average Latency (microseconds)'),
('median', 'Median Latency (microseconds)'),
('min', 'Minimum Latency (microseconds)'),
('max', 'Maximum Latency (microseconds)')
]}
}

return metadata, timestamp, benchmark_type, parameters, metrics
39 changes: 23 additions & 16 deletions script/testing/reporting/parsers/parse_data.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import glob
import os
import re
from decimal import Decimal
Expand Down Expand Up @@ -130,6 +131,24 @@ def parse_oltpbench_files(results_dir):
metrics : dict
The summary measurements that were gathered from the test.
"""

def hack_rename(old_glob_target, new_name):
"""
Wan wants to avoid a rabbit hole of refactoring.
Therefore the new OLTPBench files are being renamed to match old expectations here.
"""
matches = glob.glob(old_glob_target)
assert len(matches) == 1
os.rename(matches[0], new_name)

hack_rename(f'{results_dir}/*.results.csv', f'{results_dir}/oltpbench.res')
hack_rename(f'{results_dir}/*.raw.csv', f'{results_dir}/oltpbench.csv')
hack_rename(f'{results_dir}/*.samples.csv', f'{results_dir}/oltpbench.samples')
hack_rename(f'{results_dir}/*.summary.json', f'{results_dir}/oltpbench.summary')
hack_rename(f'{results_dir}/*.params.json', f'{results_dir}/oltpbench.params')
hack_rename(f'{results_dir}/*.metrics.json', f'{results_dir}/oltpbench.metrics')
hack_rename(f'{results_dir}/*.config.xml', f'{results_dir}/oltpbench.expconfig')

config_parameters = parse_config_file(results_dir + '/oltpbench.expconfig')
metadata, timestamp, benchmark_type, summary_parameters, metrics = parse_summary_file(
results_dir + '/oltpbench.summary')
Expand Down Expand Up @@ -168,22 +187,10 @@ def _parse_db_metadata():

Warnings
--------
Giant hack that parses a hardcoded constant NOISEPAGE_VERSION
in src/include/common/version.h.
Giant hack that hardcodes version number.

If the hack is unsuccessful, it defaults to UNKNOWN_RESULT.
"""
regex = r"NOISEPAGE_VERSION[=\s].*(\d.\d.\d)"
curr_dir = os.path.dirname(os.path.realpath(__file__))
# TODO(WAN): Don't do this. We support SELECT VERSION(), do that instead.
version_file_relative = '../../../../src/include/common/version.h'
version_file = os.path.join(curr_dir, version_file_relative)
db_metadata = {'noisepage': {'db_version': UNKNOWN_RESULT}}
try:
with open(version_file) as f:
match = re.search(regex, f.read())
db_metadata['noisepage']['db_version'] = match.group(1)
except Exception as err:
LOG.error(err)

return db_metadata
return {'noisepage': {'db_version': '1.0.0'}}


1 change: 1 addition & 0 deletions script/testing/reporting/report_result.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,7 @@ def _send_result(env, path, username, password, result):
"""
url = f"{PERFORMANCE_STORAGE_SERVICE_API.get(env)}{path}"
LOG.debug(f"Sending results to: {url}")
LOG.info(f"Uploading result: {result}")

try:
result = requests.post(url, json=result, auth=(username, password))
Expand Down
3 changes: 3 additions & 0 deletions src/include/common/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@
namespace noisepage::common {

constexpr std::string_view NOISEPAGE_NAME = "NoisePage";
// TODO(WAN): There used to be a fragile hack in parse_data.py that would try to regex out the version number.
// Please update script/testing/reporting/parsers/parse_data.py manually if you change this version number.
// And also script/testing/reporting/parsers/summary_parser.py.
constexpr std::string_view NOISEPAGE_VERSION = "1.0.0";
constexpr std::string_view NOISEPAGE_VERSION_STR = "NoisePage 1.0.0";

Expand Down