diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..b1161a0 --- /dev/null +++ b/Makefile @@ -0,0 +1,5 @@ +build: + cargo build --release --package text-generation-inference-benchmark --bin text-generation-inference-benchmark + +run: build + cargo run --package text-generation-inference-benchmark --bin text-generation-inference-benchmark -- $@ \ No newline at end of file diff --git a/README.md b/README.md index ac97680..b857ca2 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,39 @@ # Text Generation Inference benchmarking tool -A lightweight benchmarking tool for inference servers. +A lightweight benchmarking tool for LLM inference servers. Benchmarks using constant arrival rate or constant virtual user count. + + ![ui.png](assets%2Fui.png) +## Table of contents + + +* [Text Generation Inference benchmarking tool](#text-generation-inference-benchmarking-tool) + * [Table of contents](#table-of-contents) + * [TODO](#todo) + * [Running a benchmark](#running-a-benchmark) + * [Development](#development) + * [Frequently Asked Questions](#frequently-asked-questions) + + ## TODO - [X] Customizable token count and variance - [ ] Check results -- [X] Allow for multiturn prompts for prefix caching +- [X] Allow for system prompts for prefix caching +- [ ] Allow for multi-turn prompts - [ ] Push results to Optimum benchmark backend -- [ ] Script to generate plots from results +- [X] Script to generate plots from results -## Running a benchmark -``` +## Get started + +### Run a benchmark + +Run a benchmark using Docker image: + +```shell # start a TGI/vLLM server somewhere, then run benchmark... # ... we mount results to the current directory $ docker run \ @@ -33,4 +52,81 @@ $ docker run \ --decode-options "num_tokens=50,max_tokens=60,min_tokens=40,variance=10" ``` -Results will be saved in `results.json` in current directory. \ No newline at end of file +Results will be saved in `results.json` in current directory. + + +### Configure your benchmark + +#### Benchmark mode + +In default mode, tool runs a `sweep` benchmark. It first runs a throughput test to find the maximum throughput, then +sweeps on QPS values up to the maximum throughput. + +Available modes: +- `sweep`: runs a sweep benchmark +- `rate`: runs a benchmark at a fixed request rate +- `throughput`: runs a benchmark at a fixed throughput (constant VUs) + + +#### Dataset configuration + +Prompts are sampled for a Hugging Face dataset file, using a [subset of ShareGPT +as default](https://huggingface.co/datasets/hlarcher/share_gpt_small). You can specify a different dataset file using the +`--dataset` and `--dataset-file` option. + +Dataset is expected to be JSON with the following format: +```json +[ + { + "conversations": [ + { + "role": "user", + "content": "rewrite that entire paragraph in the same style like this one: " + } + ] + } +] +``` + +To benchmark with prefix caching, you can use a system prompt that will be sent with each request from a discussion. +```json +[ + { + "conversations": [ + { + "role": "system", + "content": "You are a helpful assistant that makes jokes at each response." + }, + { + "role": "user", + "content": "rewrite that entire paragraph in the same style like this one:" + } + ] + } +] +``` + + +#### Prompt configuration +For consistent results you can configure the token count and variance. The tool will sample prompts with the specified +values, sampling token counts from a normal distribution with the specified variance. + +```shell +--prompt-options "num_tokens=50,max_tokens=60,min_tokens=40,variance=10" +``` + + +## Development + +You need [Rust](https://rustup.rs/) installed to build the benchmarking tool. +```shell +$ make build +``` + + +## Frequently Asked Questions +* **What's the difference between constant arrival rate and constant virtual user count?** + * **Constant virtual user count** means that the number of virtual users is fixed. Each virtual user can send a single requests and waits for server response. It's basically simulating a fixed number of users querying the server. + * **Constant arrival rate** means that the rate of requests is fixed and the number of virtual users is adjusted to maintain that rate. Queries hit the server independently of responses performances. + + **Constant virtual user count** is a closed loop model where the server's response time dictates the number of iterations. **Constant arrival rate** is an open-loop model more representative of real-life workloads. diff --git a/optimum.py b/optimum.py new file mode 100644 index 0000000..d468fcf --- /dev/null +++ b/optimum.py @@ -0,0 +1,252 @@ +import argparse +import hashlib +import json +import re +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Dict, Protocol, Optional +from urllib.parse import urlparse + +from opensearchpy import OpenSearch + +PERFORMANCE_RECORD_LATENCY_MS = "latency" +PERFORMANCE_RECORD_THROUGHPUT_SAMPLE_PER_SEC = "throughput" + + +@dataclass +class PerformanceRecord: + metric: str + kind: str + value: Any + + when: datetime = field(default_factory=lambda: datetime.now()) + meta: Dict[str, Any] = field(default_factory=dict) + + @staticmethod + def latency(metric: str, value_ms: float, meta: Optional[Dict[str, Any]] = None, when: Optional[datetime] = None): + r""" + Create a PerformanceRecord tracking latency information + Args: + `metric` (`str`): + Metric identifier + `value_ms` (`float`): + The recorded latency, in millisecond, for the underlying metric record + `meta` (`Optional[Dict[str, Any]]`, defaults to `{}`) + Information relative to the recorded metric to store alongside the metric readout + `when` (`Optional[datetime]`, defaults to `datetime.now()`) + Indicates when the underlying metric was recorded + Returns: + The performance record for the target metric representing latency + """ + return PerformanceRecord( + metric=metric, kind=PERFORMANCE_RECORD_LATENCY_MS, value=value_ms, when=when, meta=meta + ) + + @staticmethod + def throughput(metric: str, value_sample_per_sec: float, meta: Optional[Dict[str, Any]] = None, + when: Optional[datetime] = None): + r""" + Create a PerformanceRecord tracking throughput information + Args: + `metric` (`str`): + Metric identifier + `value_sample_per_sec` (`float`): + The recorded throughput, in samples per second, for the underlying metric record + `meta` (`Optional[Dict[str, Any]]`, defaults to `{}`) + Information relative to the recorded metric to store alongside the metric readout + `when` (`Optional[datetime]`, defaults to `datetime.now()`) + Indicates when the underlying metric was recorded + Returns: + The performance record for the target metric representing throughput + """ + return PerformanceRecord( + metric=metric, + kind=PERFORMANCE_RECORD_THROUGHPUT_SAMPLE_PER_SEC, + value=value_sample_per_sec, + when=when, + meta=meta + ) + + def as_document(self) -> Dict[str, Any]: + r""" + Convert the actual `PerformanceRecord` to a dictionary based representation compatible with document storage + Returns: + Dictionary of strings keys with the information stored in this record + """ + parcel = {"date": self.when.timestamp(), "metric": self.metric, "kind": self.kind, "value": self.value} + return parcel | self.meta + + +class PerformanceTrackerStore(Protocol): + r""" + Base interface defining a performance tracker tool + """ + + @staticmethod + def from_uri(uri: str) -> "PerformanceTrackerStore": + r""" + Create the `PerformanceTrackerStore` from the provided URI information + Args: + `uri` (`str`): + URI specifying over which protocol and where will be stored the record(s) + Returns: + Instance of a `PerformanceTrackerStore` which information are inferred from the specified URI + """ + pass + + def push(self, collection: str, record: "PerformanceRecord"): + r""" + Attempt to append the provided record to the underlying tracker putting under the specified collection + Args: + `collection` (`str`): + Name of the bucket the specified record should be pushed + `record` (`PerformanceRecord`): + The materialized record to push + """ + pass + + +class OpenSearchPerformanceTrackerStore(PerformanceTrackerStore): + r""" + Amazon Web Services (AWS) OpenSearch based PerformanceTrackerStore + Supported URIs are as follows: + - os://: + - os+aws://: + - os+aws://: - will use the stored aws credentials on the system + """ + + # Extract region and service from AWS url (ex: us-east-1.es.amazonaws.com) + AWS_URL_RE = re.compile(r"([a-z]+-[a-z]+-[0-9])\.(.*)?\.amazonaws.com") + + def __init__(self, url: str, auth): + uri = urlparse(url) + self._client = OpenSearch( + [{"host": uri.hostname, "port": uri.port or 443}], + http_auth=auth, + http_compress=True, + use_ssl=True + ) + + # Sanity check + self._client.info() + + @staticmethod + def from_uri(uri: str) -> "PerformanceTrackerStore": + if not (_uri := urlparse(uri)).scheme.startswith("es"): + raise ValueError(f"Invalid URI {uri}: should start with os:// or os+aws://") + + if _uri.scheme == "es+aws": + from boto3 import Session as AwsSession + from botocore.credentials import Credentials as AwsCredentials + from opensearchpy import Urllib3AWSV4SignerAuth + + # Create AWS session from the (eventual) creds + if not _uri.username and not _uri.password: + session = AwsSession() + creds = session.get_credentials() + else: + creds = AwsCredentials(_uri.username, _uri.password) + + # Parse the url to extract region and service + if len(match := re.findall(OpenSearchPerformanceTrackerStore.AWS_URL_RE, _uri.netloc)) != 1: + raise ValueError(f"Failed to parse AWS es service URL {uri}") + + region, service = match[0] + auth = Urllib3AWSV4SignerAuth(creds, region, service) + else: + auth = (_uri.username, _uri.password) + + return OpenSearchPerformanceTrackerStore(uri, auth) + + def _ensure_collection_exists(self, collection: str): + if not self._client.indices.exists(collection): + self._client.indices.create(collection) + + def push(self, collection: str, record: "PerformanceRecord"): + self._ensure_collection_exists(collection) + self._client.index(collection, record.as_document()) + + +class AutoPerformanceTracker: + + @staticmethod + def from_uri(uri: str) -> "PerformanceTrackerStore": + if uri.startswith("es://") or uri.startswith("es+aws://"): + return OpenSearchPerformanceTrackerStore.from_uri(uri) + + raise ValueError( + f"Unable to determine the service associated with URI: {uri}. " + "Valid schemas are es:// or es+aws://" + ) + + +def main(): + parser = argparse.ArgumentParser( + prog='text-generation-inference-benchmark-optimum', + description='Pushes benchmark results to an OpenSearch instance' + ) + parser.add_argument( + '--uri', + type=str, + required=False, + help='URI to the OpenSearch instance where to push the benchmark results', + default='"es+aws://search-optimum-benchmarks-kb3meoztyufprqul537nq7deny.us-east-1.es.amazonaws.com"' + ) + parser.add_argument( + '--collection', + type=str, + required=False, + help='Collection name where to push the benchmark results', + default='ci_tgi_performances_tracker' + ) + parser.add_argument( + '--meta', + action='append', + required=False, + help='Meta information to store alongside the benchmark results, use multiple times for multiple values', + nargs='?' + ) + parser.add_argument( + 'results', + type=str, + help='File containing the benchmark results to push', + ) + args = parser.parse_args() + meta = flatten(args.meta) + bench_id = hashlib.md5(open(args.results, 'rb').read()).hexdigest() + meta['bench_id'] = bench_id + + with open(args.results, 'r') as f: + data = json.load(f) + + tracker=AutoPerformanceTracker.from_uri("es+aws://search-optimum-benchmarks-kb3meoztyufprqul537nq7deny.us-east-1.es.amazonaws.com") + filtered_results = [result for result in data['results'] if + result['id'] != 'warmup' and result['id'] != 'throughput'] + latency_metrics_to_push = ['inter_token_latency_ms_p90', 'time_to_first_token_ms_p90', 'e2e_latency_ms_p90'] + throughput_metrics_to_push = ['token_throughput_secs'] + start_time = data['start_time'] + for result in filtered_results: + for metric in latency_metrics_to_push: + record = PerformanceRecord.latency(metric, result[metric], {**meta, 'qps': result['config']['rate']}, + when=start_time) + print(record) + tracker.push("ci_tgi_performances_tracker", record) + for metric in throughput_metrics_to_push: + record = PerformanceRecord.throughput(metric, result[metric], {**meta, 'qps': result['config']['rate']}, + when=start_time) + print(record) + tracker.push("ci_tgi_performances_tracker", record) + + # record=PerformanceRecord.latency("TIME_TO_FIRST_TOKEN", 100,{}) + + +def flatten(l: list[str]) -> dict[str, str]: + d = {} + for e in l: + e = e.split('=') + d[e[0]] = e[1] + return d + + +if __name__ == '__main__': + main() diff --git a/plot.py b/plot.py index fa5f9ac..cd3d000 100644 --- a/plot.py +++ b/plot.py @@ -53,13 +53,13 @@ def plot_inner(x_title, x_key, results, chart_title): labels = ['Time (ms)', 'Time (ms)', 'Time (ms)', 'Tokens/s', 'Count', '%'] - colors = ['#2F5BA1', '#FF9D00'] + colors = ['#2F5BA1'] # Plot each metric in its respective subplot for ax, metric, title, label in zip(axs.flatten(), metrics, titles, labels): for i, engine in enumerate(results['engine'].unique()): df_sorted = results[results['engine'] == engine].sort_values(by=x_key) - ax.plot(df_sorted[x_key], df_sorted[metric], marker='o', markersize=2, color=colors[i % len(colors)], + ax.plot(df_sorted[x_key], df_sorted[metric], marker='o', markersize=2, color=colors[i % len(colors)] if engine!='tgi' else '#FF9D00', label=f"{engine}") ax.set_title(title) ax.tick_params(axis='x', rotation=0) diff --git a/poetry.lock b/poetry.lock index d3ac9af..ead7ba6 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,5 +1,153 @@ # This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +[[package]] +name = "boto3" +version = "1.35.21" +description = "The AWS SDK for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "boto3-1.35.21-py3-none-any.whl", hash = "sha256:247f88eedce9ae4e014a8fc14a9473759bb8e391460d49396a3b600fb649f33b"}, + {file = "boto3-1.35.21.tar.gz", hash = "sha256:db5fbbd10248db060f2ccce3ae17764f1641c99c8b9f51d422c26ebe25703a1e"}, +] + +[package.dependencies] +botocore = ">=1.35.21,<1.36.0" +jmespath = ">=0.7.1,<2.0.0" +s3transfer = ">=0.10.0,<0.11.0" + +[package.extras] +crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] + +[[package]] +name = "botocore" +version = "1.35.21" +description = "Low-level, data-driven core of boto 3." +optional = false +python-versions = ">=3.8" +files = [ + {file = "botocore-1.35.21-py3-none-any.whl", hash = "sha256:3db9ddfe521edc0753fc8c68caef71c7806e1d2d21ce8cbabc2065b7d79192f2"}, + {file = "botocore-1.35.21.tar.gz", hash = "sha256:db917e7d7b3a2eed1310c6496784bc813c91f020a021c2ab5f9df7d28cdb4f1d"}, +] + +[package.dependencies] +jmespath = ">=0.7.1,<2.0.0" +python-dateutil = ">=2.1,<3.0.0" +urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""} + +[package.extras] +crt = ["awscrt (==0.21.5)"] + +[[package]] +name = "certifi" +version = "2024.8.30" +description = "Python package for providing Mozilla's CA Bundle." +optional = false +python-versions = ">=3.6" +files = [ + {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"}, + {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"}, +] + +[[package]] +name = "charset-normalizer" +version = "3.3.2" +description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet." +optional = false +python-versions = ">=3.7.0" +files = [ + {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"}, + {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"}, + {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"}, + {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"}, + {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"}, + {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"}, + {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"}, + {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"}, +] + [[package]] name = "contourpy" version = "1.3.0" @@ -99,6 +247,16 @@ files = [ docs = ["ipython", "matplotlib", "numpydoc", "sphinx"] tests = ["pytest", "pytest-cov", "pytest-xdist"] +[[package]] +name = "events" +version = "0.5" +description = "Bringing the elegance of C# EventHandler to Python" +optional = false +python-versions = "*" +files = [ + {file = "Events-0.5-py3-none-any.whl", hash = "sha256:a7286af378ba3e46640ac9825156c93bdba7502174dd696090fdfcd4d80a1abd"}, +] + [[package]] name = "fonttools" version = "4.53.1" @@ -164,6 +322,31 @@ ufo = ["fs (>=2.2.0,<3)"] unicode = ["unicodedata2 (>=15.1.0)"] woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] +[[package]] +name = "idna" +version = "3.10" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.6" +files = [ + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, +] + +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + +[[package]] +name = "jmespath" +version = "1.0.1" +description = "JSON Matching Expressions" +optional = false +python-versions = ">=3.7" +files = [ + {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"}, + {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"}, +] + [[package]] name = "kiwisolver" version = "1.4.7" @@ -412,6 +595,30 @@ files = [ {file = "numpy-2.1.1.tar.gz", hash = "sha256:d0cf7d55b1051387807405b3898efafa862997b4cba8aa5dbe657be794afeafd"}, ] +[[package]] +name = "opensearch-py" +version = "2.7.1" +description = "Python client for OpenSearch" +optional = false +python-versions = "<4,>=3.8" +files = [ + {file = "opensearch_py-2.7.1-py3-none-any.whl", hash = "sha256:5417650eba98a1c7648e502207cebf3a12beab623ffe0ebbf55f9b1b4b6e44e9"}, + {file = "opensearch_py-2.7.1.tar.gz", hash = "sha256:67ab76e9373669bc71da417096df59827c08369ac3795d5438c9a8be21cbd759"}, +] + +[package.dependencies] +certifi = ">=2024.07.04" +Events = "*" +python-dateutil = "*" +requests = ">=2.32.0,<3.0.0" +urllib3 = {version = ">=1.26.19,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1,<3", markers = "python_version >= \"3.10\""} + +[package.extras] +async = ["aiohttp (>=3.9.4,<4)"] +develop = ["black (>=24.3.0)", "botocore", "coverage (<8.0.0)", "jinja2", "myst-parser", "pytest (>=3.0.0)", "pytest-cov", "pytest-mock (<4.0.0)", "pytz", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +docs = ["aiohttp (>=3.9.4,<4)", "myst-parser", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"] +kerberos = ["requests-kerberos"] + [[package]] name = "packaging" version = "24.1" @@ -631,6 +838,44 @@ files = [ {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"}, ] +[[package]] +name = "requests" +version = "2.32.3" +description = "Python HTTP for Humans." +optional = false +python-versions = ">=3.8" +files = [ + {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"}, + {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"}, +] + +[package.dependencies] +certifi = ">=2017.4.17" +charset-normalizer = ">=2,<4" +idna = ">=2.5,<4" +urllib3 = ">=1.21.1,<3" + +[package.extras] +socks = ["PySocks (>=1.5.6,!=1.5.7)"] +use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"] + +[[package]] +name = "s3transfer" +version = "0.10.2" +description = "An Amazon S3 Transfer Manager" +optional = false +python-versions = ">=3.8" +files = [ + {file = "s3transfer-0.10.2-py3-none-any.whl", hash = "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69"}, + {file = "s3transfer-0.10.2.tar.gz", hash = "sha256:0711534e9356d3cc692fdde846b4a1e4b0cb6519971860796e6bc4c7aea00ef6"}, +] + +[package.dependencies] +botocore = ">=1.33.2,<2.0a.0" + +[package.extras] +crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"] + [[package]] name = "scienceplots" version = "2.1.1" @@ -667,7 +912,24 @@ files = [ {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"}, ] +[[package]] +name = "urllib3" +version = "2.2.3" +description = "HTTP library with thread-safe connection pooling, file post, and more." +optional = false +python-versions = ">=3.8" +files = [ + {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"}, + {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"}, +] + +[package.extras] +brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"] +h2 = ["h2 (>=4,<5)"] +socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"] +zstd = ["zstandard (>=0.18.0)"] + [metadata] lock-version = "2.0" python-versions = "^3.11" -content-hash = "17fb9a870caeb68360c0fd12584fcc9a2fc6e9a032172a768bccdc0f01c5a034" +content-hash = "6f8b913b00011cd3a4557203881c595b3e208215d1bfc7b9ede53cd79a2634ba" diff --git a/pyproject.toml b/pyproject.toml index fcbf265..8e63389 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -10,6 +10,8 @@ python = "^3.11" matplotlib = "^3.9.2" scienceplots = "^2.1.1" pandas = "^2.2.2" +opensearch-py = "^2.7.1" +boto3 = "^1.35.21" [build-system] diff --git a/src/main.rs b/src/main.rs index ad9db2c..a7b78cb 100644 --- a/src/main.rs +++ b/src/main.rs @@ -16,7 +16,7 @@ struct Args { #[clap(default_value = "128", short, long, env)] max_vus: u64, /// The duration of each benchmark step - #[clap(default_value = "10s", short, long, env)] + #[clap(default_value = "60s", short, long, env)] #[arg(value_parser = parse_duration)] duration: Duration, /// The rate of requests to send per second (only valid for the ConstantArrivalRate benchmark) diff --git a/src/requests.rs b/src/requests.rs index 1bcbfca..cce5227 100644 --- a/src/requests.rs +++ b/src/requests.rs @@ -448,13 +448,13 @@ impl TextGenerationAggregatedResponse { pub fn time_to_first_token(&self) -> Option { match self.start_time { - Some(start_time) => { + Some(_) => { match self.times_to_tokens.first() { Some(time_to_first_token) => { Some(time_to_first_token.clone()) } None => { - Some(start_time.elapsed()) + None } } } @@ -498,4 +498,4 @@ impl TextGenerationAggregatedResponse { } } } -} \ No newline at end of file +} diff --git a/src/results.rs b/src/results.rs index fdb85f2..ada359a 100644 --- a/src/results.rs +++ b/src/results.rs @@ -113,14 +113,8 @@ impl BenchmarkResults { } pub fn e2e_latency_percentile(&self, percentile: f64) -> anyhow::Result { - if self.is_ready() { - let mut times: Vec = self.get_successful_responses().iter().map(|response| response.e2e_latency().unwrap_or_default()).collect(); - times.sort(); - let index = (percentile * times.len() as f64) as usize; - Ok(times[index]) - } else { - Err(anyhow::anyhow!(NoResponses)) - } + let quantile = self.quantile_duration(self.get_successful_responses().iter().map(|response| response.e2e_latency().unwrap_or_default()).collect(), percentile)?; + Ok(Duration::from_secs_f64(quantile)) } pub fn time_to_first_token_avg(&self) -> anyhow::Result { @@ -135,18 +129,9 @@ impl BenchmarkResults { } } - pub fn time_to_first_token_percentile(&self, percentile: f64) -> anyhow::Result { - if self.is_ready() { - let mut times: Vec = self.get_successful_responses().iter().map(|response| response.time_to_first_token().unwrap_or_default()).collect(); - times.sort(); - let index = (percentile * times.len() as f64) as usize; - if index >= times.len() { - return Err(anyhow::anyhow!(NoResponses)); - } - Ok(times[index]) - } else { - Err(anyhow::anyhow!(NoResponses)) - } + pub fn time_to_first_token_percentile(&self, percentile: f64) -> anyhow::Result { + let quantile = self.quantile_duration(self.get_successful_responses().iter().map(|response| response.time_to_first_token().unwrap_or_default()).collect(), percentile)?; + Ok(Duration::from_secs_f64(quantile)) } pub fn inter_token_latency_avg(&self) -> anyhow::Result { @@ -161,15 +146,9 @@ impl BenchmarkResults { } } - pub fn inter_token_latency_percentile(&self, percentile: f64) -> anyhow::Result { - if self.is_ready() { - let mut times: Vec = self.get_successful_responses().iter().map(|response| response.inter_token_latency().unwrap_or_default()).collect(); - times.sort(); - let index = (percentile * times.len() as f64) as usize; - Ok(times[index]) - } else { - Err(anyhow::anyhow!(NoResponses)) - } + pub fn inter_token_latency_percentile(&self, percentile: f64) -> anyhow::Result { + let quantile = self.quantile_duration(self.get_successful_responses().iter().map(|response| response.inter_token_latency().unwrap_or_default()).collect(), percentile)?; + Ok(Duration::from_secs_f64(quantile)) } pub fn executor_type(&self) -> ExecutorType { @@ -187,6 +166,23 @@ impl BenchmarkResults { pub fn get_responses(&self) -> Vec { self.aggregated_responses.clone() } + + /// Calculate the quantile of a given data set using interpolation method + /// Results are similar to `numpy.percentile` + fn quantile_duration(&self, mut data: Vec, quantile: f64) -> anyhow::Result { + if self.is_ready() { + data.sort(); + let i = (quantile * (data.len() - 1) as f64).floor(); + let delta = (data.len() - 1) as f64 * quantile - i; + if i as usize >= data.len() { + return Err(anyhow::anyhow!(NoResponses)); + } + let quantile = (1. - delta) * data[i as usize].as_secs_f64() + delta * data[i as usize + 1].as_secs_f64(); + Ok(quantile) + } else { + Err(anyhow::anyhow!(NoResponses)) + } + } } impl Debug for BenchmarkResults { @@ -251,4 +247,56 @@ impl BenchmarkReport { pub fn end_time(&self) -> Option> { self.end_time } +} + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_time_to_first_token_percentile() { + let mut response1 = TextGenerationAggregatedResponse::default(); + response1.start_time = Some(std::time::Instant::now()); + response1.end_time = Some(std::time::Instant::now() + std::time::Duration::from_millis(100)); + response1.num_prompt_tokens = 10; + response1.num_generated_tokens = 100; + response1.failed = false; + response1.times_to_tokens = vec![Duration::from_millis(100), Duration::from_millis(200), Duration::from_millis(300), Duration::from_millis(400), Duration::from_millis(500)]; + + let mut response2 = TextGenerationAggregatedResponse::default(); + response2.start_time = Some(std::time::Instant::now()); + response2.end_time = Some(std::time::Instant::now() + std::time::Duration::from_millis(200)); + response2.num_prompt_tokens = 10; + response2.num_generated_tokens = 100; + response2.failed = false; + response2.times_to_tokens = vec![Duration::from_millis(600), Duration::from_millis(700), Duration::from_millis(800), Duration::from_millis(900), Duration::from_millis(1000)]; + + let mut response3 = TextGenerationAggregatedResponse::default(); + response3.start_time = Some(std::time::Instant::now()); + response3.end_time = Some(std::time::Instant::now() + std::time::Duration::from_millis(300)); + response3.num_prompt_tokens = 10; + response3.num_generated_tokens = 100; + response3.failed = false; + response3.times_to_tokens = vec![Duration::from_millis(1100), Duration::from_millis(1200), Duration::from_millis(1300), Duration::from_millis(1400), Duration::from_millis(1500)]; + + let mut response4 = TextGenerationAggregatedResponse::default(); + response4.start_time = Some(std::time::Instant::now()); + response4.end_time = Some(std::time::Instant::now() + std::time::Duration::from_millis(300)); + response4.num_prompt_tokens = 10; + response4.num_generated_tokens = 100; + response4.failed = false; + response4.times_to_tokens = vec![Duration::from_millis(1600), Duration::from_millis(1700), Duration::from_millis(1800), Duration::from_millis(1900), Duration::from_millis(2000)]; + + let mut results = BenchmarkResults::new("test".to_string(), ExecutorType::ConstantArrivalRate, ExecutorConfig { + max_vus: 0, + duration: Default::default(), + rate: None, + }); + results.add_response(response1); + results.add_response(response2); + results.add_response(response3); + results.add_response(response4); + + assert_eq!(results.time_to_first_token_percentile(0.9).unwrap(), Duration::from_millis(1450)); + assert_eq!(results.time_to_first_token_percentile(0.5).unwrap(), Duration::from_millis(850)); + } } \ No newline at end of file