Skip to content

Commit

Permalink
Merge pull request #711 from basetenlabs/bump-version-0.7.14
Browse files Browse the repository at this point in the history
Release 0.7.14
  • Loading branch information
squidarth authored Oct 26, 2023
2 parents ee53336 + 7a73cc4 commit dec7081
Show file tree
Hide file tree
Showing 24 changed files with 529 additions and 308 deletions.
36 changes: 24 additions & 12 deletions bin/generate_truss_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,12 @@
```
$ poetry run python bin/generate_truss_examples.py
```
Development:
Run this on a branch of truss-examples repo with:
$ poetry run python bin/generate_truss_examples.py $BRANCH_NAME
"""
import enum
import json
Expand All @@ -20,6 +26,7 @@

DOC_CONFIGURATION_FILE = "doc.yaml"
TRUSS_EXAMPLES_REPO = "https://github.com/basetenlabs/truss-examples"
DEFAULT_BRANCH = "main"
DESTINATION_DIR = "truss-examples"
MINT_CONFIG_PATH = "docs/mint.json"

Expand All @@ -29,7 +36,7 @@ class FileType(enum.Enum):
PYTHON = "python"


def clone_repo():
def clone_repo(branch: str):
"""
If the destination directory exists, remove it.
Then, clone the given repo into the specified directory.
Expand All @@ -41,6 +48,7 @@ def clone_repo():
subprocess.run(
["git", "clone", TRUSS_EXAMPLES_REPO, DESTINATION_DIR], check=True
)
subprocess.run(["git", "checkout", branch], cwd=DESTINATION_DIR, check=True)
print(f"Successfully cloned {TRUSS_EXAMPLES_REPO} to {DESTINATION_DIR}")
except subprocess.CalledProcessError as e:
print(f"Error cloning the repo: {e}")
Expand Down Expand Up @@ -71,9 +79,9 @@ def _get_example_destination(truss_directory: str) -> Path:
Get the destination directory for the example.
"""
original_path = Path(truss_directory)
folder, example = original_path.parts[1:]
example_file = f"{example}.mdx"
return Path("docs/examples") / folder / example_file
example_path = "/".join(original_path.parts[1:])
example_file_path = f"{example_path}.mdx"
return Path("docs/examples") / example_file_path


def _get_file_type(file_path: str) -> FileType:
Expand Down Expand Up @@ -260,7 +268,9 @@ def update_toc(example_dirs: List[str]):
"""

# Exclude the root directory ("truss_examples") from the path
transformed_example_paths = [Path(example).parts[1:] for example in example_dirs]
transformed_example_paths = [
"/".join(Path(example).parts[1:]) for example in example_dirs
]

mint_config = json.loads(fetch_file_contents(MINT_CONFIG_PATH))
navigation = mint_config["navigation"]
Expand All @@ -269,24 +279,21 @@ def update_toc(example_dirs: List[str]):

# Sort examples by the group name
examples_section["pages"] = [
f"examples/{example_path[0]}/{example_path[1]}"
for example_path in sorted(
transformed_example_paths, key=lambda example: example[0]
)
f"examples/{example_path}" for example_path in sorted(transformed_example_paths)
]

serialized_mint_config = json.dumps(mint_config, indent=2)
Path(MINT_CONFIG_PATH).write_text(serialized_mint_config)


def generate_truss_examples():
def generate_truss_examples(branch: str = DEFAULT_BRANCH):
"""
Walk through the Truss examples repo, and for each
of the examples in the repo, generate documentation.
Finish the process by updating the table of contents.
"""
clone_repo()
clone_repo(branch)

example_dirs = _fetch_example_dirs(DESTINATION_DIR)
for truss_directory in example_dirs:
Expand All @@ -296,4 +303,9 @@ def generate_truss_examples():


if __name__ == "__main__":
generate_truss_examples()
# The first arg is optionally the branch name
# of truss-examples repo to use.
if len(sys.argv) > 1:
generate_truss_examples(sys.argv[1])
else:
generate_truss_examples()
231 changes: 209 additions & 22 deletions docs/_snippets/config-params.mdx
Original file line number Diff line number Diff line change
@@ -1,7 +1,35 @@
<ParamField body="description" type="str">
### `model_name`

Name of your model
### `description`

Describe your model for documentation purposes.
</ParamField>
<ParamField body="environment_variables" type="Dict[str, str]">
### `model_class_name`
(default: `Model`)

The name of the class that defines your Truss model. Note that this class must implement
at least a `predict` method.
### `model_module_dir`
(default: `model`)

Folder in the Truss where to find the model class.
### `data_dir`
(default: `data/`)

Folder where to place data files in your Truss. Note that you can access this within your model like so:


```python model/model.py
class Model:
def __init__(self, **kwargs):
data_dir = kwargs["data_dir"]

...
```


### `environment_variables`

<Warning>
Do not store secret values directly in environment variables (or anywhere in the config file). See the `secrets` arg for information on properly managing secrets.
</Warning>
Expand All @@ -13,19 +41,19 @@ environment_variables:
ENVIRONMENT: Staging
DB_URL: https://my_database.example.com/
```
</ParamField>
<ParamField body="model_metadata" type="Dict[str, str]">
### `model_metadata`
Set any additional metadata in this catch-all field. The entire contents of the config file are available to the model at runtime, so this is a good place to store any custom information that model needs. For example, scikit-learn models include a flag here that indicates whether the model supports returning probabilities alongside predictions.

```yaml
model_metadata:
supports_predict_proba: true
```
</ParamField>
<ParamField body="model_name" type="str">
The model's name, for documentation purposes.
</ParamField>
<ParamField body="requirements" type="List[str]">

This is also where display metdata can be stored

### `requirements`

List the Python dependencies that the model depends on. The requirements should be provided in the [pip requirements file format](https://pip.pypa.io/en/stable/reference/requirements-file-format/), but as a yaml list.

We strongly recommend pinning versions in your requirements.
Expand All @@ -38,19 +66,46 @@ requirements:
- numpy==1.20.3
- scipy==1.7.3
```
</ParamField>
<ParamField body="resources" type="Dict[str, str]">
Specify model server runtime resources such as CPU, RAM and GPU.

### `resources`

The `resources` section is where you specify the compute resources that your model needs. This includes CPU, memory, and GPU resources.
If you need a GPU, you must also set `resources.use_gpu` to `true`.

#### `resources.cpu`

CPU resources needed, expressed as either a raw number, or "millicpus". For example, `1000m` and `1` are equivalent.
Fractional CPU amounts can be requested using millicpus. For example, `500m` is half of a CPU core.

#### `resources.memory`

CPU RAM needed, expressed as a number with units. Units acceptable include "Gi" (Gibibytes), "G" (Gigabytes), "Mi" (Mebibytes), and"M" (Megabytes). For example, `1Gi` and `1024Mi` are equivalent.

#### `resources.use_gpu`

Whether or not a GPU is required for this model.

#### `resources.accelerator`

Which GPU you would like for your instance. Available Nvidia GPUs supported in Truss include:
* T4
* L4
* A10G
* V100
* A100

Note that if you need multiple GPUs to server your model, you can use the `:` operator to request multiple
GPUs on your instance, eg:

```yaml
resources:
cpu: "3"
memory: 14Gi
use_gpu: true
accelerator: A10G
...
accelerator: A10G:2 # Requests 2 A10Gs
```
</ParamField>
<ParamField body="secrets" type="Dict[str, str]">


### `secrets`
<Warning>
This field can be used to specify the keys for such secrets and dummy default
values. ***Never store actual secret values in the config***. Dummy default
Expand All @@ -66,8 +121,8 @@ information from s3 and may need access to AWS credentials for that.
secrets:
hf_access_token: "ACCESS TOKEN"
```
</ParamField>
<ParamField body="system_packages" type="List[str]">

### `system_packages`
Specify any system packages that you would typically install using `apt` on a Debian operating system.

```yaml
Expand All @@ -76,4 +131,136 @@ system_packages:
- libsm6
- libxext6
```
</ParamField>

### `python_version`

Which version of Python you'd like to use. Supported versions include:

* py39
* py310
* py311

### `base_image`

The `base_image` option is used if you need to bring your own custom base image.
Custom base images are useful if there are scripts that need to run at build time, or dependencies
that are complicated to install. After creating a custom base image, you can specify it
in this field.

See [Custom Base Images](guides/base-images) for more detail on how to use these.

#### `base_image.image`

A path to the docker image you'd like to use, as
an example, `nvcr.io/nvidia/nemo:23.03`.

#### `base_image.python_executable_path`

A path to the Python executable on the image. For instance, `/usr/bin/python`.

Tying it together, a custom base image configuration might look
like this:

```yaml
base_image:
image: nvcr.io/nvidia/nemo:23.03
python_executable_path: /usr/bin/python
```
### `runtime`

Runtime settings for your model instance.

#### `runtime.predict_concurrency`
(default: `1`)

This field governs how much concurrency can run in the predict method of your model. This is useful
if you have a model that has support for parallelism, and you'd like to take advantage of that.
By default, this value is set to 1, implying that `predict` can only run for one request at a time.
This protects the GPU from being over-utilized, and is a good default for many models.

See [How to configure concurrency](guides/concurrency) for more detail on how to set this value.
### `external_data`

Use `external_data` if you have data that you want to be bundled in your image at build time.
This is useful if you have a large amount of data that you want to be available to your model.
By including it at build-time, you reduce the cold-start time of your instance, as the data is
already available in the image. You can use it like so:

```yaml config.yaml
external_data:
- url: https://my-bucket.s3.amazonaws.com/my-data.tar.gz
local_data_path: data/my-data.tar.gz
name: my-data
```
#### `external_data.<list_item>.url`

The URL to download data from.
#### `external_data.<list_item>.local_data_path`

The path on the image where the data will be downloaded to.
#### `external_data.<list_item>.name`

You can set a name for the data, which is useful for readability-purposes.
Not required.
### `build`

The `build` section is used to define options for custom servers.
The two main model servers we support are `TGI` an `vLLM`. These are
highly optimized servers that are built to support specific LLMs.

See the following examples for how to use each of these:
* [TGI](examples/07-high-performance-tgi)
* [vLLM](examples/08-high-performance-vllm)

Example configuration for TGI, running Falcon-7B:

```yaml config.yaml
build:
arguments:
endpoint: generate_stream
model_id: tiiuae/falcon-7b
model_server: TGI
```

#### `build.model_server`

Either `VLLM` for vLLM, or `TGI` for TGI.

#### `build.arguments`

The arguments for the model server. This includes information such as which model you intend to load, and
which endpoin from the server you'd like to use.

### `hf_cache`

The `hf_cache` section is used for caching model weights at build-time. This is one of the biggest levers
for decreasing cold start times, as downloading weights can be one of the lengthiest parts of starting a new
model instance. Using this section ensures that model weights are cached at _build_ time.

See the [model cache guide](guides/model-cache) for the full details on how to use this field.

<Note>
Despite the fact that this field is called the `hf_cache`, there are multiple backends supported, not just Hugging Face. You can
also cache weights stored on GCS, for instance.
</Note>

#### `hf_cache.<list_item>.repo_id`

The endpoint for your cloud bucket. Currently, we support Hugging Face and Google Cloud Storage.

Example: `madebyollin/sdxl-vae-fp16-fix` for a Hugging Face repo, or `gcs://path-to-my-bucket` for
a GCS bucket.

#### `hf_cache.<list_item>.revision`

Points to your revision. This is only relevant if you are pulling By default, it refers to `main`.

#### `hf_cache.<list_item>.allow_patterns`

Only cache files that match specified patterns. Utilize Unix shell-style wildcards to denote these patterns.
By default, all paths are included.

#### `hf_cache.<list_item>.ignore_patterns`

Conversely, you can also denote file patterns to ignore, hence streamlining the caching process.
By default, nothing is ignored.
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ description: "Building your first Truss"

<Card
title="View on Github"
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/1_introduction/getting-started-bert">
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/01-getting-started-bert">
</Card>

In this example, we go through building your first Truss model. We'll be using the HuggingFace transformers
Expand Down
2 changes: 1 addition & 1 deletion docs/examples/3_LLMs/llm.mdx → docs/examples/02-llm.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ description: "Building an LLM"

<Card
title="View on Github"
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/3_LLMs/llm">
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/02-llm">
</Card>

In this example, we go through a Truss that serves an LLM. We
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ description: "Building an LLM with streaming output"

<Card
title="View on Github"
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/3_LLMs/llm-with-streaming">
icon="github" href="https://github.com/basetenlabs/truss-examples/tree/main/03-llm-with-streaming">
</Card>

In this example, we go through a Truss that serves an LLM, and streams the output to the client.
Expand Down
Loading

0 comments on commit dec7081

Please sign in to comment.