Skip to content

Commit

Permalink
[UPDATE]: update to oneapi toolkit 2024 and torch version 2.1.0 (#239)
Browse files Browse the repository at this point in the history
Update to oneapi toolkit 2024 and update to torch 2.1.0.
They should be updated at the same time because ipex 1.13 package has
dynamic link to libraries in oneapi 2023.

---------

Co-authored-by: Pavel Chekin <[email protected]>
Co-authored-by: Ettore Tiotto <[email protected]>
  • Loading branch information
3 people authored Jan 17, 2024
1 parent d33dcc9 commit 47e7035
Show file tree
Hide file tree
Showing 24 changed files with 88 additions and 46 deletions.
11 changes: 3 additions & 8 deletions .github/dockerfiles/runner-base/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
ARG INSTALLER_IMAGE=docker-registry.docker-registry.svc.cluster.local:5000/oneapi-basekit:2023.2.0
ARG INSTALLER_IMAGE=docker-registry.docker-registry.svc.cluster.local:5000/oneapi-basekit:2024.0.1

FROM $INSTALLER_IMAGE as installer

Expand All @@ -8,17 +8,12 @@ USER root

RUN set -ex; \
export DEBIAN_FRONTEND=noninteractive; \
echo 'deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/graphics/ubuntu jammy max' > /etc/apt/sources.list.d/intel-graphics.list; \
curl -s https://repositories.intel.com/graphics/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg; \
echo 'deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy unified' > /etc/apt/sources.list.d/intel-gpu-jammy.list; \
curl -s https://repositories.intel.com/gpu/intel-graphics.key | gpg --dearmor --output /usr/share/keyrings/intel-graphics.gpg; \
apt-get update -y; \
apt-get install -y --no-install-recommends --fix-missing \
intel-opencl-icd \
clinfo \
; \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" > /etc/apt/sources.list.d/intel-oneapi.list; \
curl -s https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | gpg --dearmor --output /usr/share/keyrings/oneapi-archive-keyring.gpg; \
apt-get update -y; \
apt-get install -y --no-install-recommends --fix-missing \
intel-level-zero-gpu \
level-zero \
level-zero-dev \
Expand Down
11 changes: 9 additions & 2 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ jobs:
runs-on:
- glados
- spr
- pvc
- oneapi-2024.0.1
strategy:
matrix:
python:
Expand Down Expand Up @@ -156,7 +156,14 @@ jobs:
- name: Run core tests
run: |
pip install pytest pytest-xdist
pip install torch==1.13.0a0+git6c9b55e intel_extension_for_pytorch==1.13.120+xpu -f https://developer.intel.com/ipex-whl-stable-xpu
pip install torch==2.1.0a0+cxx11.abi intel_extension_for_pytorch==2.1.10+xpu -f https://developer.intel.com/ipex-whl-stable-xpu
wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.14828.8/intel-igc-core_1.0.14828.8_amd64.deb
wget https://github.com/intel/intel-graphics-compiler/releases/download/igc-1.0.14828.8/intel-igc-opencl_1.0.14828.8_amd64.deb
sudo dpkg -i ./intel-igc-core_1.0.14828.8_amd64.deb ./intel-igc-opencl_1.0.14828.8_amd64.deb
rm ./intel-igc-core_1.0.14828.8_amd64.deb ./intel-igc-opencl_1.0.14828.8_amd64.deb
echo 'deb [arch=amd64 signed-by=/usr/share/keyrings/intel-graphics.gpg] https://repositories.intel.com/gpu/ubuntu jammy unified' | sudo tee -a /etc/apt/sources.list.d/intel-gpu-jammy.list
sudo apt update -y
sudo apt-get install -y --no-install-recommends --allow-downgrades --fix-missing libigc1=1.0.14828.26-736~22.04
cd python/test/unit
python3 -m pytest -n 8 --verbose --device xpu language/ --ignore=language/test_line_info.py --ignore=language/test_subprocess.py
# run runtime tests serially to avoid race condition with cache handling.
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/docker-runner-base.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
run: |
docker build .github/dockerfiles/runner-base/ \
--tag $REGISTRY/$TAG \
--build-arg INSTALLER_IMAGE=$REGISTRY/oneapi-basekit:2023.2.0
--build-arg INSTALLER_IMAGE=$REGISTRY/oneapi-basekit:2024.0.1
- name: Push image
run: |
Expand Down
6 changes: 3 additions & 3 deletions python/test/backend/third_party_backends/test_xpu_backend.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,5 @@
import torch

import triton
import triton.language as tl


def test_xpu_backend(cmdopt):
if cmdopt == "xpu":
Expand All @@ -14,6 +11,9 @@ def test_xpu_backend(cmdopt):
except Exception:
has_ipex = False

import triton
import triton.language as tl

@triton.jit()
def kernel(x_ptr, y_ptr, out_ptr):
pid = tl.program_id(axis=0)
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/language/assert_helper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import sys

import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401
from torch.testing import assert_close

import triton
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/language/print_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import uuid

import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401
from torch.testing import assert_close

import triton
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/language/test_annotations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from __future__ import annotations

import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.language as tl
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/language/test_block_pointer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.language as tl
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/language/test_conversions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import numpy as np
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401
import pytest
import triton
import triton.language as tl
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/language/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import numpy as np
import pytest
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401
from numpy.random import RandomState

import triton
Expand Down
23 changes: 12 additions & 11 deletions python/test/unit/language/test_line_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytest
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.language as tl
Expand Down Expand Up @@ -152,27 +153,27 @@ def test_line_info(func: str):
file_lines = extract_file_lines(kernel_info.asm["spv"])

if func == "single":
assert (check_file_lines(file_lines, "test_line_info.py", 16))
assert (check_file_lines(file_lines, "test_line_info.py", 17))
assert (check_file_lines(file_lines, "test_line_info.py", 18))
elif func == "call":
assert (check_file_lines(file_lines, "test_line_info.py", 29))
assert (check_file_lines(file_lines, "test_line_info.py", 22))
assert (check_file_lines(file_lines, "test_line_info.py", 31))
assert (check_file_lines(file_lines, "test_line_info.py", 30))
assert (check_file_lines(file_lines, "test_line_info.py", 23))
assert (check_file_lines(file_lines, "test_line_info.py", 32))
elif func == "call_noinline":
assert (check_file_lines(file_lines, "test_line_info.py", 43))
assert (check_file_lines(file_lines, "test_line_info.py", 36))
assert (check_file_lines(file_lines, "test_line_info.py", 44))
assert (check_file_lines(file_lines, "test_line_info.py", 37))
assert (check_file_lines(file_lines, "test_line_info.py", 38))
assert (check_file_lines(file_lines, "test_line_info.py", 39))
elif func == "multi_files":
assert (check_file_lines(file_lines, "test_line_info.py", 48))
assert (check_file_lines(file_lines, "test_line_info.py", 50))
assert (check_file_lines(file_lines, "test_line_info.py", 49))
assert (check_file_lines(file_lines, "test_line_info.py", 51))
assert (check_file_lines(file_lines, "standard.py", 33))
assert (check_file_lines(file_lines, "standard.py", 34))
assert (check_file_lines(file_lines, "standard.py", 36))
elif func == "autotune":
assert (check_file_lines(file_lines, "test_line_info.py", 61))
assert (check_file_lines(file_lines, "test_line_info.py", 62))
assert (check_file_lines(file_lines, "test_line_info.py", 63))
assert (check_file_lines(file_lines, "test_line_info.py", 64))
elif func == "dot_combine":
assert (check_file_lines(file_lines, "test_line_info.py", 73))
assert (check_file_lines(file_lines, "test_line_info.py", 74, should_contain=False))
assert (check_file_lines(file_lines, "test_line_info.py", 74))
assert (check_file_lines(file_lines, "test_line_info.py", 75, should_contain=False))
1 change: 1 addition & 0 deletions python/test/unit/language/test_random.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import pytest
import scipy.stats
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.language as tl
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/operators/test_blocksparse.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.ops
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/operators/test_cross_entropy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.ops
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/operators/test_flash_attention.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.ops
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/operators/test_inductor.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.language as tl
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/operators/test_matmul.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import pytest
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.language as tl
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/runtime/test_autotuner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.language as tl
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/runtime/test_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pytest
import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.language as tl
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/runtime/test_driver.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import sys
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton

Expand Down
1 change: 1 addition & 0 deletions python/test/unit/runtime/test_launch.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import tracemalloc

import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.language as tl
Expand Down
1 change: 1 addition & 0 deletions python/test/unit/runtime/test_subproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import shutil

import torch
import intel_extension_for_pytorch # type: ignore # noqa: F401

import triton
import triton.language as tl
Expand Down
2 changes: 1 addition & 1 deletion scripts/test-triton.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ export TRITON_PROJ_BUILD=$TRITON_PROJ/python/build

python3 -m pip install lit
python3 -m pip install pytest
python3 -m pip install torch==1.13.0a0+git6c9b55e intel_extension_for_pytorch==1.13.120+xpu -f https://developer.intel.com/ipex-whl-stable-xpu
python3 -m pip install torch==2.1.0a0+cxx11.abi intel_extension_for_pytorch==2.1.10+xpu -f https://developer.intel.com/ipex-whl-stable-xpu
if [ $? -ne 0 ]; then
echo "FAILED: return code $?"
exit $?
Expand Down
62 changes: 42 additions & 20 deletions third_party/xpu/backend/driver.c
Original file line number Diff line number Diff line change
Expand Up @@ -144,15 +144,15 @@ bool update(sycl::queue sycl_queue) {
// Get l0-context
auto sycl_context = sycl_queue.get_context();
ze_context_handle_t hCtxt =
get_native<sycl::backend::level_zero>(sycl_context);
get_native<sycl::backend::ext_oneapi_level_zero>(sycl_context);
// Get l0-device
std::vector<sycl::device> sycl_devices = sycl_context.get_devices();
ze_device_handle_t hDev =
get_native<sycl::backend::level_zero>(sycl_devices[0]);
get_native<sycl::backend::ext_oneapi_level_zero>(sycl_devices[0]);
// Get l0-queue
bool immediate_cmd_list = false;
std::variant<ze_command_queue_handle_t, ze_command_list_handle_t> queue_var =
get_native<sycl::backend::level_zero>(sycl_queue);
get_native<sycl::backend::ext_oneapi_level_zero>(sycl_queue);
auto l0_queue = std::get_if<ze_command_queue_handle_t>(&queue_var);
if (l0_queue == nullptr) {
auto imm_cmd_list = std::get_if<ze_command_list_handle_t>(&queue_var);
Expand All @@ -170,15 +170,19 @@ bool update(sycl::queue sycl_queue) {
context = sycl_queue_map[sycl_queue].context;
uint32_t deviceCount = std::min(sycl_devices.size(), devices.size());
for (uint32_t i = 0; i < deviceCount; ++i) {
devices[i] = sycl::get_native<sycl::backend::level_zero>(sycl_devices[i]);
devices[i] =
sycl::get_native<sycl::backend::ext_oneapi_level_zero>(sycl_devices[i]);
}

return true;
}

static PyObject *initContext(PyObject *self, PyObject *args) {
void *queue;
if (!PyArg_ParseTuple(args, "K", &queue))
PyObject *cap;
void *queue = NULL;
if (!PyArg_ParseTuple(args, "O", &cap))
return NULL;
if (!(queue = PyCapsule_GetPointer(cap, PyCapsule_GetName(cap))))
return NULL;
sycl::queue *sycl_queue = static_cast<sycl::queue *>(queue);
if (sycl_queue_map.find(*sycl_queue) == sycl_queue_map.end()) {
Expand All @@ -202,8 +206,11 @@ static PyObject *initEventPool(PyObject *self, PyObject *args) {
}

static PyObject *initDevices(PyObject *self, PyObject *args) {
void *queue;
if (!PyArg_ParseTuple(args, "K", &queue))
PyObject *cap;
void *queue = NULL;
if (!PyArg_ParseTuple(args, "O", &cap))
return NULL;
if (!(queue = PyCapsule_GetPointer(cap, PyCapsule_GetName(cap))))
return NULL;
sycl::queue *sycl_queue = static_cast<sycl::queue *>(queue);

Expand All @@ -215,16 +222,19 @@ static PyObject *initDevices(PyObject *self, PyObject *args) {
// Retrieve devices
uint32_t deviceCount = sycl_devices.size();
for (uint32_t i = 0; i < deviceCount; ++i) {
devices.push_back(
sycl::get_native<sycl::backend::level_zero>(sycl_devices[i]));
devices.push_back(sycl::get_native<sycl::backend::ext_oneapi_level_zero>(
sycl_devices[i]));
}

return Py_BuildValue("(i)", deviceCount);
}

static PyObject *getL0ImmCommandList(PyObject *self, PyObject *args) {
void *queue;
if (!PyArg_ParseTuple(args, "K", &queue))
PyObject *cap;
void *queue = NULL;
if (!PyArg_ParseTuple(args, "O", &cap))
return NULL;
if (!(queue = PyCapsule_GetPointer(cap, PyCapsule_GetName(cap))))
return NULL;
sycl::queue *sycl_queue = static_cast<sycl::queue *>(queue);

Expand All @@ -234,8 +244,11 @@ static PyObject *getL0ImmCommandList(PyObject *self, PyObject *args) {
return Py_BuildValue("(K)", (uint64_t)(sycl_queue_map[*sycl_queue].cmd_list));
}
static PyObject *getL0Queue(PyObject *self, PyObject *args) {
void *queue;
if (!PyArg_ParseTuple(args, "K", &queue))
PyObject *cap;
void *queue = NULL;
if (!PyArg_ParseTuple(args, "O", &cap))
return NULL;
if (!(queue = PyCapsule_GetPointer(cap, PyCapsule_GetName(cap))))
return NULL;
sycl::queue *sycl_queue = static_cast<sycl::queue *>(queue);
if (sycl_queue_map.find(*sycl_queue) == sycl_queue_map.end()) {
Expand All @@ -244,8 +257,11 @@ static PyObject *getL0Queue(PyObject *self, PyObject *args) {
return Py_BuildValue("(K)", (uint64_t)(sycl_queue_map[*sycl_queue].queue));
}
static PyObject *getL0DevPtr(PyObject *self, PyObject *args) {
void *queue;
if (!PyArg_ParseTuple(args, "K", &queue))
PyObject *cap;
void *queue = NULL;
if (!PyArg_ParseTuple(args, "O", &cap))
return NULL;
if (!(queue = PyCapsule_GetPointer(cap, PyCapsule_GetName(cap))))
return NULL;
sycl::queue *sycl_queue = static_cast<sycl::queue *>(queue);
if (sycl_queue_map.find(*sycl_queue) == sycl_queue_map.end()) {
Expand All @@ -254,8 +270,11 @@ static PyObject *getL0DevPtr(PyObject *self, PyObject *args) {
return Py_BuildValue("(K)", (uint64_t)(sycl_queue_map[*sycl_queue].device));
}
static PyObject *getL0CtxtPtr(PyObject *self, PyObject *args) {
void *queue;
if (!PyArg_ParseTuple(args, "K", &queue))
PyObject *cap;
void *queue = NULL;
if (!PyArg_ParseTuple(args, "O", &cap))
return NULL;
if (!(queue = PyCapsule_GetPointer(cap, PyCapsule_GetName(cap))))
return NULL;
sycl::queue *sycl_queue = static_cast<sycl::queue *>(queue);
if (sycl_queue_map.find(*sycl_queue) == sycl_queue_map.end()) {
Expand All @@ -264,8 +283,11 @@ static PyObject *getL0CtxtPtr(PyObject *self, PyObject *args) {
return Py_BuildValue("(K)", (uint64_t)(sycl_queue_map[*sycl_queue].context));
}
static PyObject *isUsingICL(PyObject *self, PyObject *args) {
void *queue;
if (!PyArg_ParseTuple(args, "K", &queue))
PyObject *cap;
void *queue = NULL;
if (!PyArg_ParseTuple(args, "O", &cap))
return NULL;
if (!(queue = PyCapsule_GetPointer(cap, PyCapsule_GetName(cap))))
return NULL;
sycl::queue *sycl_queue = static_cast<sycl::queue *>(queue);
if (sycl_queue_map.find(*sycl_queue) == sycl_queue_map.end()) {
Expand Down

0 comments on commit 47e7035

Please sign in to comment.