Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pin IREE to 3.1.0rc20241220 #734

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci-libshortfin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ jobs:
repository: iree-org/iree
path: ${{ env.IREE_REPO_DIR }}
submodules: false
ref: iree-3.1.0rc20241204
ref: iree-3.1.0rc20241220

- name: Initalize IREE submodules
working-directory: ${{ env.IREE_REPO_DIR }}
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/ci-llama-large-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,11 @@ jobs:

# Install nightly IREE packages.
# We could also pin to a known working or stable version.
# Pinned to 1226 by Xida on Dec 30 due to
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler \
iree-base-runtime \
iree-turbine
iree-base-compiler==3.1.0rc20241220 \
iree-base-runtime==3.1.0rc20241220 \
iree-turbine==3.1.0rc20241220

pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/

Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/ci-llama-quick-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,11 @@ jobs:

# Install nightly IREE packages.
# We could also pin to a known working or stable version.
# Pinned to 1226 by Xida on Dec 30 due to
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler \
iree-base-runtime \
iree-turbine
iree-base-compiler==3.1.0rc20241220 \
iree-base-runtime==3.1.0rc20241220 \
iree-turbine==3.1.0rc20241220

pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
pip freeze
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/ci-sglang-integration-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ jobs:

# Use newest possible releases to be able to track commits that may
# cause errors.
# Pinned to 1226 by Xida on Dec 30 due to
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler \
iree-base-runtime \
iree-turbine \
iree-base-compiler==3.1.0rc20241220 \
iree-base-runtime==3.1.0rc20241220 \
iree-turbine==3.1.0rc20241220 \
"numpy<2.0"

pip install --no-compile -r requirements.txt -e sharktank/ shortfin/
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/ci-shark-ai.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,11 @@ jobs:

# Install nightly IREE packages.
# We could also pin to a known working or stable version.
# Pinned to 1226 by Xida on Dec 30 due to
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler \
iree-base-runtime \
iree-turbine
iree-base-compiler==3.1.0rc20241220 \
iree-base-runtime==3.1.0rc20241220 \
iree-turbine==3.1.0rc20241220

pip install --no-compile -r requirements.txt -e sharktank/ shortfin/

Expand Down
25 changes: 14 additions & 11 deletions .github/workflows/ci-sharktank.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,10 +78,11 @@ jobs:

# Install nightly IREE packages.
# We could also pin to a known working or stable version.
pip install -f https://iree.dev/pip-release-links.html --pre \
iree-base-compiler \
iree-base-runtime \
iree-turbine
# Pinned to 1226 by Xida on Dec 30 due to
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler==3.1.0rc20241220 \
iree-base-runtime==3.1.0rc20241220 \
iree-turbine==3.1.0rc20241220

pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/

Expand Down Expand Up @@ -128,10 +129,11 @@ jobs:

# Install nightly IREE packages.
# We could also pin to a known working or stable version.
# Pinned to 1226 by Xida on Dec 30 due to
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler \
iree-base-runtime \
iree-turbine
iree-base-compiler==3.1.0rc20241220 \
iree-base-runtime==3.1.0rc20241220 \
iree-turbine==3.1.0rc20241220

pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/

Expand Down Expand Up @@ -186,10 +188,11 @@ jobs:

# Install nightly IREE packages.
# We could also pin to a known working or stable version.
pip install -f https://iree.dev/pip-release-links.html --pre \
iree-base-compiler \
iree-base-runtime \
iree-turbine
# Pinned to 1226 by Xida on Dec 30 due to
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler==3.1.0rc20241220 \
iree-base-runtime==3.1.0rc20241220 \
iree-turbine==3.1.0rc20241220

pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/

Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/ci_eval.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,11 @@ jobs:

# Install nightly IREE packages.
# We could also pin to a known working or stable version.
# Pinned to 1226 by Xida on Dec 30 due to
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler \
iree-base-runtime \
iree-turbine
iree-base-compiler==3.1.0rc20241220 \
iree-base-runtime==3.1.0rc20241220 \
iree-turbine==3.1.0rc20241220

pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
pip freeze
Expand Down
7 changes: 4 additions & 3 deletions .github/workflows/ci_eval_short.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,11 @@ jobs:

# Install nightly IREE packages.
# We could also pin to a known working or stable version.
# Pinned to 1226 by Xida on Dec 30 due to
pip install -f https://iree.dev/pip-release-links.html --pre --upgrade \
iree-base-compiler \
iree-base-runtime \
iree-turbine
iree-base-compiler==3.1.0rc20241220 \
iree-base-runtime==3.1.0rc20241220 \
iree-turbine==3.1.0rc20241220

pip install --no-compile -r requirements.txt -r sharktank/requirements-tests.txt -e sharktank/
pip freeze
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci_linux_x64_asan-libshortfin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ jobs:
repository: iree-org/iree
path: ${{ env.IREE_SOURCE_DIR }}
submodules: false
ref: iree-3.1.0rc20241204
ref: iree-3.1.0rc20241220

- name: Initalize IREE submodules
working-directory: ${{ env.IREE_SOURCE_DIR }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci_linux_x64_nogil-libshortfin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ jobs:
repository: iree-org/iree
path: ${{ env.IREE_REPO_DIR }}
submodules: false
ref: iree-3.1.0rc20241204
ref: iree-3.1.0rc20241220

- name: Initalize IREE submodules
working-directory: ${{ env.IREE_REPO_DIR }}
Expand Down
103 changes: 68 additions & 35 deletions app_tests/integration_tests/llm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import subprocess
import sys
import time
from typing import Union

import requests
from transformers import AutoTokenizer
Expand All @@ -22,22 +23,68 @@ class AccuracyValidationException(RuntimeError):
pass


def run_subprocess(cmd, **kwargs):
"""
Run a subprocess command, combining stdout and stderr.
Only output if there's an error.

Args:
cmd: Command to run (list or string)
**kwargs: Additional arguments to pass to subprocess.Popen

Returns:
CompletedProcess instance

Raises:
subprocess.CalledProcessError: If the command returns non-zero exit status
"""
# Ensure we always capture output and combine streams
kwargs.update(
{
"stdout": subprocess.PIPE,
"stderr": subprocess.STDOUT,
"universal_newlines": True,
}
)

try:
# Run the process
process = subprocess.Popen(cmd, **kwargs)

# Capture output
output, _ = process.communicate()

# Check return code
if process.returncode != 0:
logger.error(f"Command failed with exit code {process.returncode}")
logger.error(f"Command output:\n{output}")
raise subprocess.CalledProcessError(process.returncode, cmd, output)

return subprocess.CompletedProcess(cmd, process.returncode, output, None)

except Exception as e:
if isinstance(e, subprocess.CalledProcessError):
raise
logger.error(f"Failed to execute command: {e}")
logger.error(f"Command was: {cmd}")
raise


def download_huggingface_model(local_dir, repo_id, model_file):
model_path = local_dir / model_file
logger.info(f"Preparing model_path: {model_path}..")
if not os.path.exists(model_path):
logger.info(f"Downloading model {repo_id} {model_file} from Hugging Face...")
subprocess.run(
run_subprocess(
f"huggingface-cli download --local-dir {local_dir} {repo_id} {model_file}",
shell=True,
check=True,
)
logger.info(f"Model downloaded to {model_path}")
else:
logger.info("Using cached model")


def download_with_hf_datasets(local_dir: Path | str, model_name: str):
def download_with_hf_datasets(local_dir: Union[Path, str], model_name: str):
"""Download a model using `sharktank.utils.hf_datasets` script.

Args:
Expand All @@ -48,16 +95,15 @@ def download_with_hf_datasets(local_dir: Path | str, model_name: str):
local_dir = str(local_dir)

logger.info(f"Download model {model_name} with `hf_datasets` to {local_dir}...")
subprocess.run(
run_subprocess(
[
"python",
"-m",
"sharktank.utils.hf_datasets",
model_name,
"--local-dir",
local_dir,
],
check=True,
]
)
logger.info(f"Model {model_name} successfully downloaded.")

Expand Down Expand Up @@ -85,7 +131,7 @@ def export_paged_llm_v1(mlir_path, config_path, model_path, batch_sizes):
f" Config Path: {config_path}\n"
f" Batch Sizes: {bs_string}"
)
subprocess.run(
run_subprocess(
[
"python",
"-m",
Expand All @@ -95,23 +141,21 @@ def export_paged_llm_v1(mlir_path, config_path, model_path, batch_sizes):
f"--output-mlir={mlir_path}",
f"--output-config={config_path}",
f"--bs={bs_string}",
],
check=True,
]
)
logger.info(f"Model successfully exported to {mlir_path}")


def compile_model(mlir_path, vmfb_path, device_settings):
logger.info(f"Compiling model to {vmfb_path}")
subprocess.run(
run_subprocess(
[
"iree-compile",
mlir_path,
"-o",
vmfb_path,
]
+ device_settings["device_flags"],
check=True,
+ device_settings["device_flags"]
)
logger.info(f"Model successfully compiled to {vmfb_path}")

Expand Down Expand Up @@ -174,35 +218,24 @@ def start_llm_server(
multi=False,
):
logger.info("Starting LLM server...")
cmd_args = _start_llm_server_args(
tokenizer_path,
model_config_path,
vmfb_path,
parameters_path,
settings,
port,
)

if multi:
server_process = multiprocessing.Process(
target=subprocess.Popen(
_start_llm_server_args(
tokenizer_path,
model_config_path,
vmfb_path,
parameters_path,
settings,
port,
),
)
target=run_subprocess, args=(cmd_args,)
)
server_process.start()

else:
# Start the server
server_process = subprocess.Popen(
_start_llm_server_args(
tokenizer_path,
model_config_path,
vmfb_path,
parameters_path,
settings,
port,
)
)
server_process = run_subprocess(cmd_args)

logger.info("Process started... waiting for server")
# Wait for server to start
wait_for_server(f"http://localhost:{port}", timeout)
return server_process

Expand Down
2 changes: 1 addition & 1 deletion shortfin/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ add_compile_options("$<$<C_COMPILER_ID:MSVC>:/utf-8>")
add_compile_options("$<$<CXX_COMPILER_ID:MSVC>:/utf-8>")

# Pins
set(SHORTFIN_IREE_GIT_TAG "iree-3.1.0rc20241204")
set(SHORTFIN_IREE_GIT_TAG "iree-3.1.0rc20241220")

# build options
option(SHORTFIN_BUILD_PYTHON_BINDINGS "Builds Python Bindings" OFF)
Expand Down
4 changes: 2 additions & 2 deletions shortfin/requirements-iree-compiler.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Keep in sync with "ref: iree-" in .github/workflows/* and GIT_TAG in CMakeLists.txt
-f https://iree.dev/pip-release-links.html
iree-base-compiler==3.1.0rc20241204
iree-base-runtime==3.1.0rc20241204
iree-base-compiler==3.1.0rc20241220
iree-base-runtime==3.1.0rc20241220
3 changes: 2 additions & 1 deletion shortfin/src/shortfin/array/storage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,8 @@ storage storage::subspan(iree_device_size_t byte_offset,
iree_device_size_t byte_length) {
storage new_storage(device_, {}, timeline_resource_);
SHORTFIN_THROW_IF_ERROR(iree_hal_buffer_subspan(
buffer_, byte_offset, byte_length, new_storage.buffer_.for_output()));
buffer_, byte_offset, byte_length, iree_allocator_system(),
new_storage.buffer_.for_output()));
return new_storage;
}

Expand Down
3 changes: 2 additions & 1 deletion shortfin/src/shortfin/local/program.cc
Original file line number Diff line number Diff line change
Expand Up @@ -663,7 +663,8 @@ void StaticProgramParameters::Load(std::filesystem::path file_path,

// Parse.
SHORTFIN_THROW_IF_ERROR(iree_io_parse_file_index(
to_iree_string_view(options.format), file_handle.get(), index_.get()));
to_iree_string_view(options.format), file_handle.get(), index_.get(),
iree_allocator_system()));
}

// -------------------------------------------------------------------------- //
Expand Down
Loading