Skip to content

Commit

Permalink
Merge pull request #4 from andrewrreed/app-v1
Browse files Browse the repository at this point in the history
App v1
  • Loading branch information
andrewrreed authored Sep 17, 2024
2 parents 0617225 + 09c7826 commit 4abb859
Show file tree
Hide file tree
Showing 5 changed files with 267 additions and 27 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ sdk: gradio
sdk_version: "4.44.0"
app_file: app/ui.py
pinned: false
hf_oauth: true

---

# HF IE AutoBench
Expand Down
10 changes: 10 additions & 0 deletions app/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,22 @@ def get_go_bin():
# Install Go
def setup_k6():

# check if k6 is already installed
success, _ = run_command("k6 --version")
if not success:
logger.error("k6 is not installed. Installing k6...")
else:
logger.success("k6 is already installed")
return True

# check if go is installed
success, _ = run_command("go version")
if not success:
logger.error("Go is not installed. Installing Go...")
if not install_go():
sys.exit(1)
else:
logger.success("Go is already installed")

# Get the Go bin directory
go_bin = get_go_bin()
Expand Down
215 changes: 203 additions & 12 deletions app/ui.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,213 @@
import os
import gradio as gr


import pandas as pd
from dotenv import load_dotenv
from autobench.compute_manager import ComputeManager
from autobench.scheduler import run_scheduler
from autobench.logging_config import setup_logging
from autobench.report import gather_results, plot_metrics
from huggingface_hub import whoami
from app.setup import setup_k6


# install go and k6-sse
setup_k6()
if gr.NO_RELOAD:
setup_k6()
setup_logging()
load_dotenv(override=True)

cm = ComputeManager()


def format_viable_instances(viable_instances):

data_points = []
for _, v in viable_instances.items():
entry = {
# instance configs
"gpu_type": v["instance_config"].architecture,
"num_gpus": v["instance_config"].num_gpus,
"vendor": v["instance_config"].vendor,
"region": v["instance_config"].region,
"gpu_memory_in_gb": v["instance_config"].gpu_memory_in_gb,
"price_per_hour": v["instance_config"].price_per_hour,
# tgi configs
"max_input_length": v["tgi_config"].max_input_length,
"max_total_tokens": v["tgi_config"].max_total_tokens,
"max_batch_prefill_tokens": v["tgi_config"].max_batch_prefill_tokens,
"estimated_memory_in_gigabytes": v[
"tgi_config"
].estimated_memory_in_gigabytes,
}
data_points.append(entry)
return pd.DataFrame(data_points).sort_values(by=["num_gpus", "gpu_memory_in_gb"])


with gr.Blocks() as demo:
gr.HTML("<h1>IE AutoBench</h1>")
gr.HTML(
"<p>IE AutoBench is a tool for benchmarking the performance of large language models (LLMs) on various compute providers. This tool is currently in development and not all features are available.</p>"
)

session_state = gr.State()

with gr.Row(variant="panel"):
login_button = gr.LoginButton()
namespace_selector = gr.Dropdown(label="Namespace", visible=False)
model_selector = gr.Textbox(
label="Model ID",
info="The ID of the model to benchmark. Must be a model supported by TGI.",
value="meta-llama/Meta-Llama-3-8B-Instruct",
)

with gr.Row():
with gr.Column():
preferred_vendor_selector = gr.Dropdown(
label="Preferred Vendor",
choices=cm.options.vendor.unique().tolist(),
value="aws",
)
with gr.Column():
preferred_region_selector = gr.Dropdown(
label="Preferred Region",
choices=[region[:2] for region in cm.options.region.unique().tolist()],
value="us",
)

gpu_option_selector = gr.CheckboxGroup(
label="GPU Type",
choices=cm.options[["architecture", "instance_type"]]
.apply(tuple, axis=1)
.unique()
.tolist(),
)
validate_compute_instances_button = gr.Button("Validate Compute Options")

with gr.Row():
viable_compute_instances = gr.Dataframe(
label="Viable Compute Instances", visible=False
)

with gr.Row():
run_benchmark_button = gr.Button("Run Benchmark", visible=False)

with gr.Row():
state_display = gr.JSON(label="State")

with gr.Row():
session_test = gr.Image(label="Session Test")

@gr.on(
triggers=demo.load,
inputs=[],
outputs=[namespace_selector],
)
def load_demo(oauth_token: gr.OAuthToken | None):
if oauth_token:
user_details = whoami(oauth_token.token)
namespace_options = []
namespace_options.extend(
[
org["name"]
for org in user_details["orgs"]
if org.get("canPay", False)
]
) # add all orgs that can pay
if user_details["canPay"]:
namespace_options.insert(
0, user_details["name"]
) # add user's personal namespace

if len(namespace_options) == 0:
gr.Error(
"You do not have access to any namespaces that can pay for compute. Please add billing to your account or org."
)

return gr.Dropdown(
choices=namespace_options,
value=namespace_options[0],
visible=True,
interactive=True,
)

@gr.on(
triggers=validate_compute_instances_button.click,
inputs=[
model_selector,
preferred_vendor_selector,
preferred_region_selector,
gpu_option_selector,
],
outputs=[
session_state,
state_display,
viable_compute_instances,
viable_compute_instances,
preferred_vendor_selector,
preferred_region_selector,
gpu_option_selector,
model_selector,
validate_compute_instances_button,
run_benchmark_button,
],
)
def get_viable_instances(
model_id, preferred_vendor, preferred_region_prefix, gpu_types
):
possible_instances = cm.get_instance_details(
gpu_types=gpu_types,
preferred_vendor=preferred_vendor,
preferred_region_prefix=preferred_region_prefix,
)

viable_instances = cm.get_viable_instance_configs(
model_id=model_id, instances=possible_instances
)

viable_instances = {
instance["instance_config"].id: instance for instance in viable_instances
}

display_viable_instances = format_viable_instances(viable_instances)

return (
viable_instances,
viable_instances,
display_viable_instances,
gr.update(visible=True),
gr.update(interactive=False),
gr.update(interactive=False),
gr.update(interactive=False),
gr.update(interactive=False),
gr.update(interactive=False),
gr.update(visible=True),
)

@gr.on(
triggers=run_benchmark_button.click,
inputs=[
session_state,
namespace_selector,
],
outputs=[session_test],
)
def run_benchmark(session_state, namespace):

scheduler = run_scheduler(
viable_instances=[list(session_state.values())[0]],
namespace=namespace,
output_dir=os.path.join(os.path.dirname(__file__), "benchmark_results"),
)

results_df = gather_results(scheduler.output_dir)

def greet(name, intensity):
return "Hello, " + name + "!" * int(intensity)
plot_path = os.path.join(scheduler.output_dir, "benchmark_report")
plot_metrics(
df=results_df,
file_name=plot_path,
)

return gr.Image(plot_path + ".png")

demo = gr.Interface(
fn=greet,
inputs=["text", "slider"],
outputs=["text"],
)

demo.launch()
if __name__ == "__main__":
demo.launch()
65 changes: 51 additions & 14 deletions autobench/compute_manager.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from loguru import logger
import requests
import pandas as pd
from typing import Dict, List
from typing import Dict, List, Literal
from urllib.parse import urlencode

from autobench.config import TGIConfig, ComputeInstanceConfig
Expand Down Expand Up @@ -126,27 +126,64 @@ def _filter_options(df):
logger.info(f"Filtered {len(filtered_df)} available GPU options")
return filtered_df

def get_instance_details(self, vendor: str, region: str, gpu_types: List[str]):
def get_instance_details(
self,
gpu_types: List[str],
preferred_vendor: str = "aws",
preferred_region_prefix: Literal["us", "eu"] = "us",
):
"""
Retrieve instance details based on the specified vendor, region, and GPU types.
Retrieve instance details based on specified GPU types, with optional vendor and region preferences.
This method filters the available compute options based on the provided GPU types and sorts them
according to the specified preferences. It prioritizes instances from the preferred vendor and region,
and then sorts by price per hour in ascending order.
Args:
vendor (str): The vendor of the instances.
region (str): The region where the instances are located.
gpu_types (list): A list of GPU types to filter the instances.
gpu_types (List[str]): A list of GPU types to filter the instances.
preferred_vendor (str, optional): The preferred vendor for instances. Defaults to "aws".
preferred_region_prefix (Literal["us", "eu"], optional): The preferred region prefix. Defaults to "us".
Returns:
list: A list of dictionaries containing the instance details that match the specified criteria.
List[Dict]: A list of dictionaries containing the instance details that match the specified criteria.
Note:
The method first filters instances by GPU type, then sorts them based on the number of GPUs,
instance type, vendor preference, region preference, and price per hour. It then removes
duplicates, keeping the first occurrence (which will be the lowest priced option for each
unique combination of number of GPUs and instance type).
"""
result = self.options[
(self.options["vendor"] == vendor)
& (self.options["region"] == region)
& (self.options["instance_type"].isin(gpu_types))
].to_dict(orient="records")
logger.info(
f"Found {len(result)} instances matching criteria: vendor={vendor}, region={region}, gpu_types={gpu_types}"
f"Getting instance details for gpu_types={gpu_types}, preferred_vendor={preferred_vendor}, preferred_region_prefix={preferred_region_prefix}"
)
df = self.options[self.options["instance_type"].isin(gpu_types)]

df_sorted = df.sort_values(
by=[
"num_gpus",
"instance_type",
"vendor",
"region",
"price_per_hour",
],
key=lambda col: (
col
if col.name not in ["vendor", "region"]
else col.map(
lambda x: (
(0 if x == preferred_vendor else 1)
if col.name == "vendor"
else (0 if x.startswith(preferred_region_prefix) else 1)
)
)
),
)

df_deduplicated = df_sorted.drop_duplicates(
subset=["num_gpus", "instance_type"], keep="first"
)
return result

return df_deduplicated.to_dict(orient="records")

@staticmethod
def get_tgi_config(model_id: str, gpu_memory: int, num_gpus: int):
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@ tqdm==4.66.4
python-dotenv==1.0.0
matplotlib==3.9.2
tenacity==9.0.0
gradio==4.44.0
gradio[oauth]==4.44.0

0 comments on commit 4abb859

Please sign in to comment.