Skip to content

Commit

Permalink
add turboboost_tps param
Browse files Browse the repository at this point in the history
  • Loading branch information
zainhas committed Jan 2, 2025
1 parent 5cd3742 commit acc31b2
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/together/resources/chat/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def create(
response_format: Dict[str, str | Dict[str, Any]] | None = None,
tools: List[Dict[str, Any]] | None = None,
tool_choice: str | Dict[str, str | Dict[str, str]] | None = None,
turboboost_tps: float | None = None,
**kwargs: Any,
) -> ChatCompletionResponse | Iterator[ChatCompletionChunk]:
"""
Expand Down Expand Up @@ -103,6 +104,13 @@ def create(
via {"type": "function", "function": {"name": "my_function"}} forces the model to call that function.
Sets to `auto` if None.
Defaults to None.
turboboost_tps (float, optional): A parameter that controls the speed-quality tradeoff between
the draft model (faster, lower quality) and target model (slower, higher quality).
Values range from 0.0 (regular speculative decoding with target model quality) to
1.0 (draft model speed with 100% acceptance rate). Higher values increase speed
while potentially reducing quality.
Defaults to 0.0.
Returns:
ChatCompletionResponse | Iterator[ChatCompletionChunk]: Object containing the completions
Expand Down Expand Up @@ -135,6 +143,7 @@ def create(
response_format=response_format,
tools=tools,
tool_choice=tool_choice,
turboboost_tps=turboboost_tps,
**kwargs,
).model_dump(exclude_none=True)

Expand Down Expand Up @@ -183,6 +192,7 @@ async def create(
response_format: Dict[str, Any] | None = None,
tools: Dict[str, str | Dict[str, str | Dict[str, Any]]] | None = None,
tool_choice: str | Dict[str, str | Dict[str, str]] | None = None,
turboboost_tps: float | None = None,
**kwargs: Any,
) -> AsyncGenerator[ChatCompletionChunk, None] | ChatCompletionResponse:
"""
Expand Down Expand Up @@ -245,6 +255,12 @@ async def create(
via {"type": "function", "function": {"name": "my_function"}} forces the model to call that function.
Sets to `auto` if None.
Defaults to None.
turboboost_tps (float, optional): A parameter that controls the speed-quality tradeoff between
the draft model (faster, lower quality) and target model (slower, higher quality).
Values range from 0.0 (regular speculative decoding with target model quality) to
1.0 (draft model speed with 100% acceptance rate). Higher values increase speed
while potentially reducing quality.
Defaults to 0.0.
Returns:
AsyncGenerator[ChatCompletionChunk, None] | ChatCompletionResponse: Object containing the completions
Expand Down Expand Up @@ -277,6 +293,7 @@ async def create(
response_format=response_format,
tools=tools,
tool_choice=tool_choice,
turboboost_tps=turboboost_tps,
**kwargs,
).model_dump(exclude_none=True)

Expand Down
16 changes: 16 additions & 0 deletions src/together/resources/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def create(
echo: bool | None = None,
n: int | None = None,
safety_model: str | None = None,
turboboost_tps: float | None = None,
**kwargs: Any,
) -> CompletionResponse | Iterator[CompletionChunk]:
"""
Expand Down Expand Up @@ -88,6 +89,12 @@ def create(
safety_model (str, optional): A moderation model to validate tokens. Choice between available moderation
models found [here](https://docs.together.ai/docs/inference-models#moderation-models).
Defaults to None.
turboboost_tps (float, optional): A parameter that controls the speed-quality tradeoff between
the draft model (faster, lower quality) and target model (slower, higher quality).
Values range from 0.0 (regular speculative decoding with target model quality) to
1.0 (draft model speed with 100% acceptance rate). Higher values increase speed
while potentially reducing quality.
Defaults to 0.0.
Returns:
CompletionResponse | Iterator[CompletionChunk]: Object containing the completions
Expand Down Expand Up @@ -117,6 +124,7 @@ def create(
echo=echo,
n=n,
safety_model=safety_model,
turboboost_tps=turboboost_tps,
**kwargs,
).model_dump(exclude_none=True)

Expand Down Expand Up @@ -162,6 +170,7 @@ async def create(
echo: bool | None = None,
n: int | None = None,
safety_model: str | None = None,
turboboost_tps: float | None = None,
**kwargs: Any,
) -> AsyncGenerator[CompletionChunk, None] | CompletionResponse:
"""
Expand Down Expand Up @@ -212,6 +221,12 @@ async def create(
safety_model (str, optional): A moderation model to validate tokens. Choice between available moderation
models found [here](https://docs.together.ai/docs/inference-models#moderation-models).
Defaults to None.
turboboost_tps (float, optional): A parameter that controls the speed-quality tradeoff between
the draft model (faster, lower quality) and target model (slower, higher quality).
Values range from 0.0 (regular speculative decoding with target model quality) to
1.0 (draft model speed with 100% acceptance rate). Higher values increase speed
while potentially reducing quality.
Defaults to 0.0.
Returns:
AsyncGenerator[CompletionChunk, None] | CompletionResponse: Object containing the completions
Expand Down Expand Up @@ -241,6 +256,7 @@ async def create(
echo=echo,
n=n,
safety_model=safety_model,
turboboost_tps=turboboost_tps,
**kwargs,
).model_dump(exclude_none=True)

Expand Down

0 comments on commit acc31b2

Please sign in to comment.