diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..b1161a0
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,5 @@
+build:
+	cargo build --release --package text-generation-inference-benchmark --bin text-generation-inference-benchmark
+
+run: build
+	cargo run --package text-generation-inference-benchmark --bin text-generation-inference-benchmark -- $@
\ No newline at end of file
diff --git a/README.md b/README.md
index ac97680..b857ca2 100644
--- a/README.md
+++ b/README.md
@@ -1,20 +1,39 @@
 # Text Generation Inference benchmarking tool
 
-A lightweight benchmarking tool for inference servers.
+A lightweight benchmarking tool for LLM inference servers.
 Benchmarks using constant arrival rate or constant virtual user count.
 
+
+
 ![ui.png](assets%2Fui.png)
 
+## Table of contents
+
+<!-- TOC -->
+* [Text Generation Inference benchmarking tool](#text-generation-inference-benchmarking-tool)
+  * [Table of contents](#table-of-contents)
+  * [TODO](#todo)
+  * [Running a benchmark](#running-a-benchmark)
+  * [Development](#development)
+  * [Frequently Asked Questions](#frequently-asked-questions)
+<!-- TOC -->
+
 ## TODO
 - [X] Customizable token count and variance
 - [ ] Check results
-- [X] Allow for multiturn prompts for prefix caching
+- [X] Allow for system prompts for prefix caching
+- [ ] Allow for multi-turn prompts
 - [ ] Push results to Optimum benchmark backend
-- [ ] Script to generate plots from results
+- [X] Script to generate plots from results
 
-## Running a benchmark
 
-```
+## Get started
+
+### Run a benchmark
+
+Run a benchmark using Docker image:
+
+```shell
 # start a TGI/vLLM server somewhere, then run benchmark...
 # ... we mount results to the current directory
 $ docker run \
@@ -33,4 +52,81 @@ $ docker run \
     --decode-options "num_tokens=50,max_tokens=60,min_tokens=40,variance=10"
 ```
 
-Results will be saved in `results.json` in current directory.
\ No newline at end of file
+Results will be saved in `results.json` in current directory.
+
+
+### Configure your benchmark
+
+#### Benchmark mode
+
+In default mode, tool runs a `sweep` benchmark. It first runs a throughput test to find the maximum throughput, then
+sweeps on QPS values up to the maximum throughput.
+
+Available modes:
+- `sweep`: runs a sweep benchmark
+- `rate`: runs a benchmark at a fixed request rate
+- `throughput`: runs a benchmark at a fixed throughput (constant VUs)
+
+
+#### Dataset configuration
+
+Prompts are sampled for a Hugging Face dataset file, using a [subset of ShareGPT
+as default](https://huggingface.co/datasets/hlarcher/share_gpt_small). You can specify a different dataset file using the
+`--dataset` and `--dataset-file` option.
+
+Dataset is expected to be JSON with the following format:
+```json
+[
+  {
+    "conversations": [
+      {
+        "role": "user",
+        "content": "rewrite that entire paragraph in the same style like this one: "
+      }
+    ]
+  }
+]
+```
+
+To benchmark with prefix caching, you can use a system prompt that will be sent with each request from a discussion.
+```json
+[
+  {
+    "conversations": [
+      {
+        "role": "system",
+        "content": "You are a helpful assistant that makes jokes at each response."
+      },
+      {
+        "role": "user",
+        "content": "rewrite that entire paragraph in the same style like this one:"
+      }
+    ]
+  }
+]
+```
+
+
+#### Prompt configuration
+For consistent results you can configure the token count and variance. The tool will sample prompts with the specified
+values, sampling token counts from a normal distribution with the specified variance.
+
+```shell
+--prompt-options "num_tokens=50,max_tokens=60,min_tokens=40,variance=10"
+```
+
+
+## Development
+
+You need [Rust](https://rustup.rs/) installed to build the benchmarking tool.
+```shell
+$ make build
+```
+
+
+## Frequently Asked Questions
+* **What's the difference between constant arrival rate and constant virtual user count?**
+  * **Constant virtual user count** means that the number of virtual users is fixed. Each virtual user can send a single requests and waits for server response. It's basically simulating a fixed number of users querying the server.
+  * **Constant arrival rate** means that the rate of requests is fixed and the number of virtual users is adjusted to maintain that rate. Queries hit the server independently of responses performances.
+
+  **Constant virtual user count** is a closed loop model where the server's response time dictates the number of iterations. **Constant arrival rate** is an open-loop model more representative of real-life workloads.
diff --git a/optimum.py b/optimum.py
new file mode 100644
index 0000000..d468fcf
--- /dev/null
+++ b/optimum.py
@@ -0,0 +1,252 @@
+import argparse
+import hashlib
+import json
+import re
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import Any, Dict, Protocol, Optional
+from urllib.parse import urlparse
+
+from opensearchpy import OpenSearch
+
+PERFORMANCE_RECORD_LATENCY_MS = "latency"
+PERFORMANCE_RECORD_THROUGHPUT_SAMPLE_PER_SEC = "throughput"
+
+
+@dataclass
+class PerformanceRecord:
+    metric: str
+    kind: str
+    value: Any
+
+    when: datetime = field(default_factory=lambda: datetime.now())
+    meta: Dict[str, Any] = field(default_factory=dict)
+
+    @staticmethod
+    def latency(metric: str, value_ms: float, meta: Optional[Dict[str, Any]] = None, when: Optional[datetime] = None):
+        r"""
+        Create a PerformanceRecord tracking latency information
+        Args:
+            `metric` (`str`):
+                Metric identifier
+            `value_ms` (`float`):
+                The recorded latency, in millisecond, for the underlying metric record
+            `meta` (`Optional[Dict[str, Any]]`, defaults to `{}`)
+                Information relative to the recorded metric to store alongside the metric readout
+            `when` (`Optional[datetime]`, defaults to `datetime.now()`)
+                Indicates when the underlying metric was recorded
+        Returns:
+            The performance record for the target metric representing latency
+        """
+        return PerformanceRecord(
+            metric=metric, kind=PERFORMANCE_RECORD_LATENCY_MS, value=value_ms, when=when, meta=meta
+        )
+
+    @staticmethod
+    def throughput(metric: str, value_sample_per_sec: float, meta: Optional[Dict[str, Any]] = None,
+                   when: Optional[datetime] = None):
+        r"""
+        Create a PerformanceRecord tracking throughput information
+        Args:
+            `metric` (`str`):
+                Metric identifier
+            `value_sample_per_sec` (`float`):
+                The recorded throughput, in samples per second, for the underlying metric record
+            `meta` (`Optional[Dict[str, Any]]`, defaults to `{}`)
+                Information relative to the recorded metric to store alongside the metric readout
+            `when` (`Optional[datetime]`, defaults to `datetime.now()`)
+                Indicates when the underlying metric was recorded
+        Returns:
+            The performance record for the target metric representing throughput
+        """
+        return PerformanceRecord(
+            metric=metric,
+            kind=PERFORMANCE_RECORD_THROUGHPUT_SAMPLE_PER_SEC,
+            value=value_sample_per_sec,
+            when=when,
+            meta=meta
+        )
+
+    def as_document(self) -> Dict[str, Any]:
+        r"""
+        Convert the actual `PerformanceRecord` to a dictionary based representation compatible with document storage
+        Returns:
+            Dictionary of strings keys with the information stored in this record
+        """
+        parcel = {"date": self.when.timestamp(), "metric": self.metric, "kind": self.kind, "value": self.value}
+        return parcel | self.meta
+
+
+class PerformanceTrackerStore(Protocol):
+    r"""
+    Base interface defining a performance tracker tool
+    """
+
+    @staticmethod
+    def from_uri(uri: str) -> "PerformanceTrackerStore":
+        r"""
+        Create the `PerformanceTrackerStore` from the provided URI information
+        Args:
+         `uri` (`str`):
+            URI specifying over which protocol and where will be stored the record(s)
+        Returns:
+            Instance of a `PerformanceTrackerStore` which information are inferred from the specified URI
+        """
+        pass
+
+    def push(self, collection: str, record: "PerformanceRecord"):
+        r"""
+        Attempt to append the provided record to the underlying tracker putting under the specified collection
+        Args:
+            `collection` (`str`):
+                Name of the bucket the specified record should be pushed
+            `record` (`PerformanceRecord`):
+                The materialized record to push
+        """
+        pass
+
+
+class OpenSearchPerformanceTrackerStore(PerformanceTrackerStore):
+    r"""
+    Amazon Web Services (AWS) OpenSearch based PerformanceTrackerStore
+    Supported URIs are as follows:
+    - os://<username:password@><hostname>:<port>
+    - os+aws://<aws_access_key_id:aws_secret_access_key@><hostname>:<port>
+    - os+aws://<hostname>:<port> - will use the stored aws credentials on the system
+    """
+
+    # Extract region and service from AWS url (ex: us-east-1.es.amazonaws.com)
+    AWS_URL_RE = re.compile(r"([a-z]+-[a-z]+-[0-9])\.(.*)?\.amazonaws.com")
+
+    def __init__(self, url: str, auth):
+        uri = urlparse(url)
+        self._client = OpenSearch(
+            [{"host": uri.hostname, "port": uri.port or 443}],
+            http_auth=auth,
+            http_compress=True,
+            use_ssl=True
+        )
+
+        # Sanity check
+        self._client.info()
+
+    @staticmethod
+    def from_uri(uri: str) -> "PerformanceTrackerStore":
+        if not (_uri := urlparse(uri)).scheme.startswith("es"):
+            raise ValueError(f"Invalid URI {uri}: should start with os:// or os+aws://")
+
+        if _uri.scheme == "es+aws":
+            from boto3 import Session as AwsSession
+            from botocore.credentials import Credentials as AwsCredentials
+            from opensearchpy import Urllib3AWSV4SignerAuth
+
+            # Create AWS session from the (eventual) creds
+            if not _uri.username and not _uri.password:
+                session = AwsSession()
+                creds = session.get_credentials()
+            else:
+                creds = AwsCredentials(_uri.username, _uri.password)
+
+            # Parse the url to extract region and service
+            if len(match := re.findall(OpenSearchPerformanceTrackerStore.AWS_URL_RE, _uri.netloc)) != 1:
+                raise ValueError(f"Failed to parse AWS es service URL {uri}")
+
+            region, service = match[0]
+            auth = Urllib3AWSV4SignerAuth(creds, region, service)
+        else:
+            auth = (_uri.username, _uri.password)
+
+        return OpenSearchPerformanceTrackerStore(uri, auth)
+
+    def _ensure_collection_exists(self, collection: str):
+        if not self._client.indices.exists(collection):
+            self._client.indices.create(collection)
+
+    def push(self, collection: str, record: "PerformanceRecord"):
+        self._ensure_collection_exists(collection)
+        self._client.index(collection, record.as_document())
+
+
+class AutoPerformanceTracker:
+
+    @staticmethod
+    def from_uri(uri: str) -> "PerformanceTrackerStore":
+        if uri.startswith("es://") or uri.startswith("es+aws://"):
+            return OpenSearchPerformanceTrackerStore.from_uri(uri)
+
+        raise ValueError(
+            f"Unable to determine the service associated with URI: {uri}. "
+            "Valid schemas are es:// or es+aws://"
+        )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        prog='text-generation-inference-benchmark-optimum',
+        description='Pushes benchmark results to an OpenSearch instance'
+    )
+    parser.add_argument(
+        '--uri',
+        type=str,
+        required=False,
+        help='URI to the OpenSearch instance where to push the benchmark results',
+        default='"es+aws://search-optimum-benchmarks-kb3meoztyufprqul537nq7deny.us-east-1.es.amazonaws.com"'
+    )
+    parser.add_argument(
+        '--collection',
+        type=str,
+        required=False,
+        help='Collection name where to push the benchmark results',
+        default='ci_tgi_performances_tracker'
+    )
+    parser.add_argument(
+        '--meta',
+        action='append',
+        required=False,
+        help='Meta information to store alongside the benchmark results, use multiple times for multiple values',
+        nargs='?'
+    )
+    parser.add_argument(
+        'results',
+        type=str,
+        help='File containing the benchmark results to push',
+    )
+    args = parser.parse_args()
+    meta = flatten(args.meta)
+    bench_id = hashlib.md5(open(args.results, 'rb').read()).hexdigest()
+    meta['bench_id'] = bench_id
+
+    with open(args.results, 'r') as f:
+        data = json.load(f)
+
+    tracker=AutoPerformanceTracker.from_uri("es+aws://search-optimum-benchmarks-kb3meoztyufprqul537nq7deny.us-east-1.es.amazonaws.com")
+    filtered_results = [result for result in data['results'] if
+                        result['id'] != 'warmup' and result['id'] != 'throughput']
+    latency_metrics_to_push = ['inter_token_latency_ms_p90', 'time_to_first_token_ms_p90', 'e2e_latency_ms_p90']
+    throughput_metrics_to_push = ['token_throughput_secs']
+    start_time = data['start_time']
+    for result in filtered_results:
+        for metric in latency_metrics_to_push:
+            record = PerformanceRecord.latency(metric, result[metric], {**meta, 'qps': result['config']['rate']},
+                                               when=start_time)
+            print(record)
+            tracker.push("ci_tgi_performances_tracker", record)
+        for metric in throughput_metrics_to_push:
+            record = PerformanceRecord.throughput(metric, result[metric], {**meta, 'qps': result['config']['rate']},
+                                                  when=start_time)
+            print(record)
+            tracker.push("ci_tgi_performances_tracker", record)
+
+    # record=PerformanceRecord.latency("TIME_TO_FIRST_TOKEN", 100,{})
+
+
+def flatten(l: list[str]) -> dict[str, str]:
+    d = {}
+    for e in l:
+        e = e.split('=')
+        d[e[0]] = e[1]
+    return d
+
+
+if __name__ == '__main__':
+    main()
diff --git a/plot.py b/plot.py
index fa5f9ac..cd3d000 100644
--- a/plot.py
+++ b/plot.py
@@ -53,13 +53,13 @@ def plot_inner(x_title, x_key, results, chart_title):
 
     labels = ['Time (ms)', 'Time (ms)', 'Time (ms)', 'Tokens/s', 'Count', '%']
 
-    colors = ['#2F5BA1', '#FF9D00']
+    colors = ['#2F5BA1']
 
     # Plot each metric in its respective subplot
     for ax, metric, title, label in zip(axs.flatten(), metrics, titles, labels):
         for i, engine in enumerate(results['engine'].unique()):
             df_sorted = results[results['engine'] == engine].sort_values(by=x_key)
-            ax.plot(df_sorted[x_key], df_sorted[metric], marker='o', markersize=2, color=colors[i % len(colors)],
+            ax.plot(df_sorted[x_key], df_sorted[metric], marker='o', markersize=2, color=colors[i % len(colors)] if engine!='tgi' else '#FF9D00',
                     label=f"{engine}")
         ax.set_title(title)
         ax.tick_params(axis='x', rotation=0)
diff --git a/poetry.lock b/poetry.lock
index d3ac9af..ead7ba6 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,5 +1,153 @@
 # This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
 
+[[package]]
+name = "boto3"
+version = "1.35.21"
+description = "The AWS SDK for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "boto3-1.35.21-py3-none-any.whl", hash = "sha256:247f88eedce9ae4e014a8fc14a9473759bb8e391460d49396a3b600fb649f33b"},
+    {file = "boto3-1.35.21.tar.gz", hash = "sha256:db5fbbd10248db060f2ccce3ae17764f1641c99c8b9f51d422c26ebe25703a1e"},
+]
+
+[package.dependencies]
+botocore = ">=1.35.21,<1.36.0"
+jmespath = ">=0.7.1,<2.0.0"
+s3transfer = ">=0.10.0,<0.11.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.21.0,<2.0a0)"]
+
+[[package]]
+name = "botocore"
+version = "1.35.21"
+description = "Low-level, data-driven core of boto 3."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "botocore-1.35.21-py3-none-any.whl", hash = "sha256:3db9ddfe521edc0753fc8c68caef71c7806e1d2d21ce8cbabc2065b7d79192f2"},
+    {file = "botocore-1.35.21.tar.gz", hash = "sha256:db917e7d7b3a2eed1310c6496784bc813c91f020a021c2ab5f9df7d28cdb4f1d"},
+]
+
+[package.dependencies]
+jmespath = ">=0.7.1,<2.0.0"
+python-dateutil = ">=2.1,<3.0.0"
+urllib3 = {version = ">=1.25.4,<2.2.0 || >2.2.0,<3", markers = "python_version >= \"3.10\""}
+
+[package.extras]
+crt = ["awscrt (==0.21.5)"]
+
+[[package]]
+name = "certifi"
+version = "2024.8.30"
+description = "Python package for providing Mozilla's CA Bundle."
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"},
+    {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
+]
+
+[[package]]
+name = "charset-normalizer"
+version = "3.3.2"
+description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
+optional = false
+python-versions = ">=3.7.0"
+files = [
+    {file = "charset-normalizer-3.3.2.tar.gz", hash = "sha256:f30c3cb33b24454a82faecaf01b19c18562b1e89558fb6c56de4d9118a032fd5"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:25baf083bf6f6b341f4121c2f3c548875ee6f5339300e08be3f2b2ba1721cdd3"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:06435b539f889b1f6f4ac1758871aae42dc3a8c0e24ac9e60c2384973ad73027"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9063e24fdb1e498ab71cb7419e24622516c4a04476b17a2dab57e8baa30d6e03"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6897af51655e3691ff853668779c7bad41579facacf5fd7253b0133308cf000d"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1d3193f4a680c64b4b6a9115943538edb896edc190f0b222e73761716519268e"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:cd70574b12bb8a4d2aaa0094515df2463cb429d8536cfb6c7ce983246983e5a6"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8465322196c8b4d7ab6d1e049e4c5cb460d0394da4a27d23cc242fbf0034b6b5"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a9a8e9031d613fd2009c182b69c7b2c1ef8239a0efb1df3f7c8da66d5dd3d537"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:beb58fe5cdb101e3a055192ac291b7a21e3b7ef4f67fa1d74e331a7f2124341c"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:e06ed3eb3218bc64786f7db41917d4e686cc4856944f53d5bdf83a6884432e12"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_ppc64le.whl", hash = "sha256:2e81c7b9c8979ce92ed306c249d46894776a909505d8f5a4ba55b14206e3222f"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_s390x.whl", hash = "sha256:572c3763a264ba47b3cf708a44ce965d98555f618ca42c926a9c1616d8f34269"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fd1abc0d89e30cc4e02e4064dc67fcc51bd941eb395c502aac3ec19fab46b519"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-win32.whl", hash = "sha256:3d47fa203a7bd9c5b6cee4736ee84ca03b8ef23193c0d1ca99b5089f72645c73"},
+    {file = "charset_normalizer-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:10955842570876604d404661fbccbc9c7e684caf432c09c715ec38fbae45ae09"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:802fe99cca7457642125a8a88a084cef28ff0cf9407060f7b93dca5aa25480db"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:573f6eac48f4769d667c4442081b1794f52919e7edada77495aaed9236d13a96"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:549a3a73da901d5bc3ce8d24e0600d1fa85524c10287f6004fbab87672bf3e1e"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f27273b60488abe721a075bcca6d7f3964f9f6f067c8c4c605743023d7d3944f"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1ceae2f17a9c33cb48e3263960dc5fc8005351ee19db217e9b1bb15d28c02574"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:65f6f63034100ead094b8744b3b97965785388f308a64cf8d7c34f2f2e5be0c4"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:753f10e867343b4511128c6ed8c82f7bec3bd026875576dfd88483c5c73b2fd8"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4a78b2b446bd7c934f5dcedc588903fb2f5eec172f3d29e52a9096a43722adfc"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:e537484df0d8f426ce2afb2d0f8e1c3d0b114b83f8850e5f2fbea0e797bd82ae"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:eb6904c354526e758fda7167b33005998fb68c46fbc10e013ca97f21ca5c8887"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_ppc64le.whl", hash = "sha256:deb6be0ac38ece9ba87dea880e438f25ca3eddfac8b002a2ec3d9183a454e8ae"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_s390x.whl", hash = "sha256:4ab2fe47fae9e0f9dee8c04187ce5d09f48eabe611be8259444906793ab7cbce"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:80402cd6ee291dcb72644d6eac93785fe2c8b9cb30893c1af5b8fdd753b9d40f"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-win32.whl", hash = "sha256:7cd13a2e3ddeed6913a65e66e94b51d80a041145a026c27e6bb76c31a853c6ab"},
+    {file = "charset_normalizer-3.3.2-cp311-cp311-win_amd64.whl", hash = "sha256:663946639d296df6a2bb2aa51b60a2454ca1cb29835324c640dafb5ff2131a77"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:0b2b64d2bb6d3fb9112bafa732def486049e63de9618b5843bcdd081d8144cd8"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:ddbb2551d7e0102e7252db79ba445cdab71b26640817ab1e3e3648dad515003b"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:55086ee1064215781fff39a1af09518bc9255b50d6333f2e4c74ca09fac6a8f6"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8f4a014bc36d3c57402e2977dada34f9c12300af536839dc38c0beab8878f38a"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a10af20b82360ab00827f916a6058451b723b4e65030c5a18577c8b2de5b3389"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8d756e44e94489e49571086ef83b2bb8ce311e730092d2c34ca8f7d925cb20aa"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:90d558489962fd4918143277a773316e56c72da56ec7aa3dc3dbbe20fdfed15b"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6ac7ffc7ad6d040517be39eb591cac5ff87416c2537df6ba3cba3bae290c0fed"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:7ed9e526742851e8d5cc9e6cf41427dfc6068d4f5a3bb03659444b4cabf6bc26"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:8bdb58ff7ba23002a4c5808d608e4e6c687175724f54a5dade5fa8c67b604e4d"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_ppc64le.whl", hash = "sha256:6b3251890fff30ee142c44144871185dbe13b11bab478a88887a639655be1068"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_s390x.whl", hash = "sha256:b4a23f61ce87adf89be746c8a8974fe1c823c891d8f86eb218bb957c924bb143"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:efcb3f6676480691518c177e3b465bcddf57cea040302f9f4e6e191af91174d4"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-win32.whl", hash = "sha256:d965bba47ddeec8cd560687584e88cf699fd28f192ceb452d1d7ee807c5597b7"},
+    {file = "charset_normalizer-3.3.2-cp312-cp312-win_amd64.whl", hash = "sha256:96b02a3dc4381e5494fad39be677abcb5e6634bf7b4fa83a6dd3112607547001"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:95f2a5796329323b8f0512e09dbb7a1860c46a39da62ecb2324f116fa8fdc85c"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c002b4ffc0be611f0d9da932eb0f704fe2602a9a949d1f738e4c34c75b0863d5"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a981a536974bbc7a512cf44ed14938cf01030a99e9b3a06dd59578882f06f985"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3287761bc4ee9e33561a7e058c72ac0938c4f57fe49a09eae428fd88aafe7bb6"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:42cb296636fcc8b0644486d15c12376cb9fa75443e00fb25de0b8602e64c1714"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0a55554a2fa0d408816b3b5cedf0045f4b8e1a6065aec45849de2d6f3f8e9786"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c083af607d2515612056a31f0a8d9e0fcb5876b7bfc0abad3ecd275bc4ebc2d5"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:87d1351268731db79e0f8e745d92493ee2841c974128ef629dc518b937d9194c"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_ppc64le.whl", hash = "sha256:bd8f7df7d12c2db9fab40bdd87a7c09b1530128315d047a086fa3ae3435cb3a8"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_s390x.whl", hash = "sha256:c180f51afb394e165eafe4ac2936a14bee3eb10debc9d9e4db8958fe36afe711"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:8c622a5fe39a48f78944a87d4fb8a53ee07344641b0562c540d840748571b811"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-win32.whl", hash = "sha256:db364eca23f876da6f9e16c9da0df51aa4f104a972735574842618b8c6d999d4"},
+    {file = "charset_normalizer-3.3.2-cp37-cp37m-win_amd64.whl", hash = "sha256:86216b5cee4b06df986d214f664305142d9c76df9b6512be2738aa72a2048f99"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:6463effa3186ea09411d50efc7d85360b38d5f09b870c48e4600f63af490e56a"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:6c4caeef8fa63d06bd437cd4bdcf3ffefe6738fb1b25951440d80dc7df8c03ac"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:37e55c8e51c236f95b033f6fb391d7d7970ba5fe7ff453dad675e88cf303377a"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb69256e180cb6c8a894fee62b3afebae785babc1ee98b81cdf68bbca1987f33"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ae5f4161f18c61806f411a13b0310bea87f987c7d2ecdbdaad0e94eb2e404238"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2b0a0c0517616b6869869f8c581d4eb2dd83a4d79e0ebcb7d373ef9956aeb0a"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:45485e01ff4d3630ec0d9617310448a8702f70e9c01906b0d0118bdf9d124cf2"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:eb00ed941194665c332bf8e078baf037d6c35d7c4f3102ea2d4f16ca94a26dc8"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:2127566c664442652f024c837091890cb1942c30937add288223dc895793f898"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:a50aebfa173e157099939b17f18600f72f84eed3049e743b68ad15bd69b6bf99"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_ppc64le.whl", hash = "sha256:4d0d1650369165a14e14e1e47b372cfcb31d6ab44e6e33cb2d4e57265290044d"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_s390x.whl", hash = "sha256:923c0c831b7cfcb071580d3f46c4baf50f174be571576556269530f4bbd79d04"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:06a81e93cd441c56a9b65d8e1d043daeb97a3d0856d177d5c90ba85acb3db087"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-win32.whl", hash = "sha256:6ef1d82a3af9d3eecdba2321dc1b3c238245d890843e040e41e470ffa64c3e25"},
+    {file = "charset_normalizer-3.3.2-cp38-cp38-win_amd64.whl", hash = "sha256:eb8821e09e916165e160797a6c17edda0679379a4be5c716c260e836e122f54b"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:c235ebd9baae02f1b77bcea61bce332cb4331dc3617d254df3323aa01ab47bd4"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:5b4c145409bef602a690e7cfad0a15a55c13320ff7a3ad7ca59c13bb8ba4d45d"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:68d1f8a9e9e37c1223b656399be5d6b448dea850bed7d0f87a8311f1ff3dabb0"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22afcb9f253dac0696b5a4be4a1c0f8762f8239e21b99680099abd9b2b1b2269"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e27ad930a842b4c5eb8ac0016b0a54f5aebbe679340c26101df33424142c143c"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1f79682fbe303db92bc2b1136016a38a42e835d932bab5b3b1bfcfbf0640e519"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b261ccdec7821281dade748d088bb6e9b69e6d15b30652b74cbbac25e280b796"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:122c7fa62b130ed55f8f285bfd56d5f4b4a5b503609d181f9ad85e55c89f4185"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:d0eccceffcb53201b5bfebb52600a5fb483a20b61da9dbc885f8b103cbe7598c"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:9f96df6923e21816da7e0ad3fd47dd8f94b2a5ce594e00677c0013018b813458"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_ppc64le.whl", hash = "sha256:7f04c839ed0b6b98b1a7501a002144b76c18fb1c1850c8b98d458ac269e26ed2"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_s390x.whl", hash = "sha256:34d1c8da1e78d2e001f363791c98a272bb734000fcef47a491c1e3b0505657a8"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ff8fa367d09b717b2a17a052544193ad76cd49979c805768879cb63d9ca50561"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-win32.whl", hash = "sha256:aed38f6e4fb3f5d6bf81bfa990a07806be9d83cf7bacef998ab1a9bd660a581f"},
+    {file = "charset_normalizer-3.3.2-cp39-cp39-win_amd64.whl", hash = "sha256:b01b88d45a6fcb69667cd6d2f7a9aeb4bf53760d7fc536bf679ec94fe9f3ff3d"},
+    {file = "charset_normalizer-3.3.2-py3-none-any.whl", hash = "sha256:3e4d1f6587322d2788836a99c69062fbb091331ec940e02d12d179c1d53e25fc"},
+]
+
 [[package]]
 name = "contourpy"
 version = "1.3.0"
@@ -99,6 +247,16 @@ files = [
 docs = ["ipython", "matplotlib", "numpydoc", "sphinx"]
 tests = ["pytest", "pytest-cov", "pytest-xdist"]
 
+[[package]]
+name = "events"
+version = "0.5"
+description = "Bringing the elegance of C# EventHandler to Python"
+optional = false
+python-versions = "*"
+files = [
+    {file = "Events-0.5-py3-none-any.whl", hash = "sha256:a7286af378ba3e46640ac9825156c93bdba7502174dd696090fdfcd4d80a1abd"},
+]
+
 [[package]]
 name = "fonttools"
 version = "4.53.1"
@@ -164,6 +322,31 @@ ufo = ["fs (>=2.2.0,<3)"]
 unicode = ["unicodedata2 (>=15.1.0)"]
 woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
 
+[[package]]
+name = "idna"
+version = "3.10"
+description = "Internationalized Domain Names in Applications (IDNA)"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
+    {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
+]
+
+[package.extras]
+all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
+
+[[package]]
+name = "jmespath"
+version = "1.0.1"
+description = "JSON Matching Expressions"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
+    {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
+]
+
 [[package]]
 name = "kiwisolver"
 version = "1.4.7"
@@ -412,6 +595,30 @@ files = [
     {file = "numpy-2.1.1.tar.gz", hash = "sha256:d0cf7d55b1051387807405b3898efafa862997b4cba8aa5dbe657be794afeafd"},
 ]
 
+[[package]]
+name = "opensearch-py"
+version = "2.7.1"
+description = "Python client for OpenSearch"
+optional = false
+python-versions = "<4,>=3.8"
+files = [
+    {file = "opensearch_py-2.7.1-py3-none-any.whl", hash = "sha256:5417650eba98a1c7648e502207cebf3a12beab623ffe0ebbf55f9b1b4b6e44e9"},
+    {file = "opensearch_py-2.7.1.tar.gz", hash = "sha256:67ab76e9373669bc71da417096df59827c08369ac3795d5438c9a8be21cbd759"},
+]
+
+[package.dependencies]
+certifi = ">=2024.07.04"
+Events = "*"
+python-dateutil = "*"
+requests = ">=2.32.0,<3.0.0"
+urllib3 = {version = ">=1.26.19,<2.2.0 || >2.2.0,<2.2.1 || >2.2.1,<3", markers = "python_version >= \"3.10\""}
+
+[package.extras]
+async = ["aiohttp (>=3.9.4,<4)"]
+develop = ["black (>=24.3.0)", "botocore", "coverage (<8.0.0)", "jinja2", "myst-parser", "pytest (>=3.0.0)", "pytest-cov", "pytest-mock (<4.0.0)", "pytz", "pyyaml", "requests (>=2.0.0,<3.0.0)", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"]
+docs = ["aiohttp (>=3.9.4,<4)", "myst-parser", "sphinx", "sphinx-copybutton", "sphinx-rtd-theme"]
+kerberos = ["requests-kerberos"]
+
 [[package]]
 name = "packaging"
 version = "24.1"
@@ -631,6 +838,44 @@ files = [
     {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"},
 ]
 
+[[package]]
+name = "requests"
+version = "2.32.3"
+description = "Python HTTP for Humans."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
+    {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
+]
+
+[package.dependencies]
+certifi = ">=2017.4.17"
+charset-normalizer = ">=2,<4"
+idna = ">=2.5,<4"
+urllib3 = ">=1.21.1,<3"
+
+[package.extras]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
+
+[[package]]
+name = "s3transfer"
+version = "0.10.2"
+description = "An Amazon S3 Transfer Manager"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "s3transfer-0.10.2-py3-none-any.whl", hash = "sha256:eca1c20de70a39daee580aef4986996620f365c4e0fda6a86100231d62f1bf69"},
+    {file = "s3transfer-0.10.2.tar.gz", hash = "sha256:0711534e9356d3cc692fdde846b4a1e4b0cb6519971860796e6bc4c7aea00ef6"},
+]
+
+[package.dependencies]
+botocore = ">=1.33.2,<2.0a.0"
+
+[package.extras]
+crt = ["botocore[crt] (>=1.33.2,<2.0a.0)"]
+
 [[package]]
 name = "scienceplots"
 version = "2.1.1"
@@ -667,7 +912,24 @@ files = [
     {file = "tzdata-2024.1.tar.gz", hash = "sha256:2674120f8d891909751c38abcdfd386ac0a5a1127954fbc332af6b5ceae07efd"},
 ]
 
+[[package]]
+name = "urllib3"
+version = "2.2.3"
+description = "HTTP library with thread-safe connection pooling, file post, and more."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "urllib3-2.2.3-py3-none-any.whl", hash = "sha256:ca899ca043dcb1bafa3e262d73aa25c465bfb49e0bd9dd5d59f1d0acba2f8fac"},
+    {file = "urllib3-2.2.3.tar.gz", hash = "sha256:e7d814a81dad81e6caf2ec9fdedb284ecc9c73076b62654547cc64ccdcae26e9"},
+]
+
+[package.extras]
+brotli = ["brotli (>=1.0.9)", "brotlicffi (>=0.8.0)"]
+h2 = ["h2 (>=4,<5)"]
+socks = ["pysocks (>=1.5.6,!=1.5.7,<2.0)"]
+zstd = ["zstandard (>=0.18.0)"]
+
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.11"
-content-hash = "17fb9a870caeb68360c0fd12584fcc9a2fc6e9a032172a768bccdc0f01c5a034"
+content-hash = "6f8b913b00011cd3a4557203881c595b3e208215d1bfc7b9ede53cd79a2634ba"
diff --git a/pyproject.toml b/pyproject.toml
index fcbf265..8e63389 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -10,6 +10,8 @@ python = "^3.11"
 matplotlib = "^3.9.2"
 scienceplots = "^2.1.1"
 pandas = "^2.2.2"
+opensearch-py = "^2.7.1"
+boto3 = "^1.35.21"
 
 
 [build-system]
diff --git a/src/main.rs b/src/main.rs
index ad9db2c..a7b78cb 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -16,7 +16,7 @@ struct Args {
     #[clap(default_value = "128", short, long, env)]
     max_vus: u64,
     /// The duration of each benchmark step
-    #[clap(default_value = "10s", short, long, env)]
+    #[clap(default_value = "60s", short, long, env)]
     #[arg(value_parser = parse_duration)]
     duration: Duration,
     /// The rate of requests to send per second (only valid for the ConstantArrivalRate benchmark)
diff --git a/src/requests.rs b/src/requests.rs
index 1bcbfca..cce5227 100644
--- a/src/requests.rs
+++ b/src/requests.rs
@@ -448,13 +448,13 @@ impl TextGenerationAggregatedResponse {
 
     pub fn time_to_first_token(&self) -> Option<std::time::Duration> {
         match self.start_time {
-            Some(start_time) => {
+            Some(_) => {
                 match self.times_to_tokens.first() {
                     Some(time_to_first_token) => {
                         Some(time_to_first_token.clone())
                     }
                     None => {
-                        Some(start_time.elapsed())
+                        None
                     }
                 }
             }
@@ -498,4 +498,4 @@ impl TextGenerationAggregatedResponse {
             }
         }
     }
-}
\ No newline at end of file
+}
diff --git a/src/results.rs b/src/results.rs
index fdb85f2..ada359a 100644
--- a/src/results.rs
+++ b/src/results.rs
@@ -113,14 +113,8 @@ impl BenchmarkResults {
     }
 
     pub fn e2e_latency_percentile(&self, percentile: f64) -> anyhow::Result<std::time::Duration> {
-        if self.is_ready() {
-            let mut times: Vec<std::time::Duration> = self.get_successful_responses().iter().map(|response| response.e2e_latency().unwrap_or_default()).collect();
-            times.sort();
-            let index = (percentile * times.len() as f64) as usize;
-            Ok(times[index])
-        } else {
-            Err(anyhow::anyhow!(NoResponses))
-        }
+        let quantile = self.quantile_duration(self.get_successful_responses().iter().map(|response| response.e2e_latency().unwrap_or_default()).collect(), percentile)?;
+        Ok(Duration::from_secs_f64(quantile))
     }
 
     pub fn time_to_first_token_avg(&self) -> anyhow::Result<std::time::Duration> {
@@ -135,18 +129,9 @@ impl BenchmarkResults {
         }
     }
 
-    pub fn time_to_first_token_percentile(&self, percentile: f64) -> anyhow::Result<std::time::Duration> {
-        if self.is_ready() {
-            let mut times: Vec<std::time::Duration> = self.get_successful_responses().iter().map(|response| response.time_to_first_token().unwrap_or_default()).collect();
-            times.sort();
-            let index = (percentile * times.len() as f64) as usize;
-            if index >= times.len() {
-                return Err(anyhow::anyhow!(NoResponses));
-            }
-            Ok(times[index])
-        } else {
-            Err(anyhow::anyhow!(NoResponses))
-        }
+    pub fn time_to_first_token_percentile(&self, percentile: f64) -> anyhow::Result<Duration> {
+        let quantile = self.quantile_duration(self.get_successful_responses().iter().map(|response| response.time_to_first_token().unwrap_or_default()).collect(), percentile)?;
+        Ok(Duration::from_secs_f64(quantile))
     }
 
     pub fn inter_token_latency_avg(&self) -> anyhow::Result<std::time::Duration> {
@@ -161,15 +146,9 @@ impl BenchmarkResults {
         }
     }
 
-    pub fn inter_token_latency_percentile(&self, percentile: f64) -> anyhow::Result<std::time::Duration> {
-        if self.is_ready() {
-            let mut times: Vec<std::time::Duration> = self.get_successful_responses().iter().map(|response| response.inter_token_latency().unwrap_or_default()).collect();
-            times.sort();
-            let index = (percentile * times.len() as f64) as usize;
-            Ok(times[index])
-        } else {
-            Err(anyhow::anyhow!(NoResponses))
-        }
+    pub fn inter_token_latency_percentile(&self, percentile: f64) -> anyhow::Result<Duration> {
+        let quantile = self.quantile_duration(self.get_successful_responses().iter().map(|response| response.inter_token_latency().unwrap_or_default()).collect(), percentile)?;
+        Ok(Duration::from_secs_f64(quantile))
     }
 
     pub fn executor_type(&self) -> ExecutorType {
@@ -187,6 +166,23 @@ impl BenchmarkResults {
     pub fn get_responses(&self) -> Vec<TextGenerationAggregatedResponse> {
         self.aggregated_responses.clone()
     }
+
+    /// Calculate the quantile of a given data set using interpolation method
+    /// Results are similar to `numpy.percentile`
+    fn quantile_duration(&self, mut data: Vec<Duration>, quantile: f64) -> anyhow::Result<f64> {
+        if self.is_ready() {
+            data.sort();
+            let i = (quantile * (data.len() - 1) as f64).floor();
+            let delta = (data.len() - 1) as f64 * quantile - i;
+            if i as usize >= data.len() {
+                return Err(anyhow::anyhow!(NoResponses));
+            }
+            let quantile = (1. - delta) * data[i as usize].as_secs_f64() + delta * data[i as usize + 1].as_secs_f64();
+            Ok(quantile)
+        } else {
+            Err(anyhow::anyhow!(NoResponses))
+        }
+    }
 }
 
 impl Debug for BenchmarkResults {
@@ -251,4 +247,56 @@ impl BenchmarkReport {
     pub fn end_time(&self) -> Option<chrono::DateTime<Utc>> {
         self.end_time
     }
+}
+
+#[cfg(test)]
+mod test {
+    use super::*;
+    #[test]
+    fn test_time_to_first_token_percentile() {
+        let mut response1 = TextGenerationAggregatedResponse::default();
+        response1.start_time = Some(std::time::Instant::now());
+        response1.end_time = Some(std::time::Instant::now() + std::time::Duration::from_millis(100));
+        response1.num_prompt_tokens = 10;
+        response1.num_generated_tokens = 100;
+        response1.failed = false;
+        response1.times_to_tokens = vec![Duration::from_millis(100), Duration::from_millis(200), Duration::from_millis(300), Duration::from_millis(400), Duration::from_millis(500)];
+
+        let mut response2 = TextGenerationAggregatedResponse::default();
+        response2.start_time = Some(std::time::Instant::now());
+        response2.end_time = Some(std::time::Instant::now() + std::time::Duration::from_millis(200));
+        response2.num_prompt_tokens = 10;
+        response2.num_generated_tokens = 100;
+        response2.failed = false;
+        response2.times_to_tokens = vec![Duration::from_millis(600), Duration::from_millis(700), Duration::from_millis(800), Duration::from_millis(900), Duration::from_millis(1000)];
+
+        let mut response3 = TextGenerationAggregatedResponse::default();
+        response3.start_time = Some(std::time::Instant::now());
+        response3.end_time = Some(std::time::Instant::now() + std::time::Duration::from_millis(300));
+        response3.num_prompt_tokens = 10;
+        response3.num_generated_tokens = 100;
+        response3.failed = false;
+        response3.times_to_tokens = vec![Duration::from_millis(1100), Duration::from_millis(1200), Duration::from_millis(1300), Duration::from_millis(1400), Duration::from_millis(1500)];
+
+        let mut response4 = TextGenerationAggregatedResponse::default();
+        response4.start_time = Some(std::time::Instant::now());
+        response4.end_time = Some(std::time::Instant::now() + std::time::Duration::from_millis(300));
+        response4.num_prompt_tokens = 10;
+        response4.num_generated_tokens = 100;
+        response4.failed = false;
+        response4.times_to_tokens = vec![Duration::from_millis(1600), Duration::from_millis(1700), Duration::from_millis(1800), Duration::from_millis(1900), Duration::from_millis(2000)];
+
+        let mut results = BenchmarkResults::new("test".to_string(), ExecutorType::ConstantArrivalRate, ExecutorConfig {
+            max_vus: 0,
+            duration: Default::default(),
+            rate: None,
+        });
+        results.add_response(response1);
+        results.add_response(response2);
+        results.add_response(response3);
+        results.add_response(response4);
+
+        assert_eq!(results.time_to_first_token_percentile(0.9).unwrap(), Duration::from_millis(1450));
+        assert_eq!(results.time_to_first_token_percentile(0.5).unwrap(), Duration::from_millis(850));
+    }
 }
\ No newline at end of file