Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][Offload] Add SYCLBIN format and dump tool #16873

Draft
wants to merge 9 commits into
base: sycl
Choose a base branch
from
Draft
107 changes: 92 additions & 15 deletions clang/tools/clang-linker-wrapper/ClangLinkerWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "llvm/Object/IRObjectFile.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hopefully we're planning on upstreaming this?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Absolutely! I am told there are some refactorings coming in for ClangLinkerWrapper, so I wanted to get some pieces in for now so development of other parts, like the driver, can go ahead in parallel.

#include "llvm/Object/ObjectFile.h"
#include "llvm/Object/OffloadBinary.h"
#include "llvm/Object/SYCLBIN.h"
#include "llvm/Option/ArgList.h"
#include "llvm/Option/OptTable.h"
#include "llvm/Option/Option.h"
Expand Down Expand Up @@ -156,6 +157,8 @@ static std::optional<llvm::module_split::IRSplitMode> SYCLModuleSplitMode;

static bool UseSYCLPostLinkTool;

static bool OutputSYCLBIN;

SmallString<128> SPIRVDumpDir;

using OffloadingImage = OffloadBinary::OffloadingImage;
Expand Down Expand Up @@ -1184,6 +1187,52 @@ static Expected<StringRef> runCompile(StringRef &InputFile,
return *OutputFileOrErr;
}

// Produce SYCLBIN data from a split module
static Expected<StringRef>
packageSYCLBIN(const SmallVector<SYCLBIN::ModuleDesc> &Modules) {
auto ErrorOrSYCLBIN = SYCLBIN::write(Modules);
if (!ErrorOrSYCLBIN)
return ErrorOrSYCLBIN.takeError();

OffloadingImage Image{};
Image.TheImageKind = IMG_SYCLBIN;
Image.TheOffloadKind = OFK_SYCL;
Image.Image = MemoryBuffer::getMemBufferCopy(*ErrorOrSYCLBIN);

std::unique_ptr<MemoryBuffer> Binary = MemoryBuffer::getMemBufferCopy(
OffloadBinary::write(Image), Image.Image->getBufferIdentifier());

auto OutFileOrErr =
createOutputFile(sys::path::filename(ExecutableName), "syclbin");
if (!OutFileOrErr)
return OutFileOrErr.takeError();

Expected<std::unique_ptr<FileOutputBuffer>> OutputOrErr =
FileOutputBuffer::create(*OutFileOrErr, Binary->getBufferSize());
if (!OutputOrErr)
return OutputOrErr.takeError();
std::unique_ptr<FileOutputBuffer> Output = std::move(*OutputOrErr);
llvm::copy(Binary->getBuffer(), Output->getBufferStart());
if (Error E = Output->commit())
return std::move(E);

return *OutFileOrErr;
}

Error mergeSYCLBIN(ArrayRef<StringRef> Files, const ArgList &Args) {
// Fast path for the general case where there's only one file. In this case we
// do not need to parse it and can instead simply copy it.
if (Files.size() == 1) {
if (std::error_code EC = sys::fs::copy_file(Files[0], ExecutableName))
return createFileError(ExecutableName, EC);
return Error::success();
}
// TODO: Merge SYCLBIN files here and write to ExecutableName output.
// Use the first file as the base and modify.
assert(Files.size() == 1);
return Error::success();
}

// Run wrapping library and clang
static Expected<StringRef>
runWrapperAndCompile(std::vector<module_split::SplitModule> &SplitModules,
Expand Down Expand Up @@ -2334,6 +2383,11 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
// object file.
SmallVector<StringRef> WrappedOutput;

// When creating SYCLBIN files, we need to store the compiled modules for
// combined packaging.
std::mutex SYCLBINModulesMtx;
SmallVector<SYCLBIN::ModuleDesc> SYCLBINModules;

// Initialize the images with any overriding inputs.
if (Args.hasArg(OPT_override_image))
if (Error Err = handleOverrideImages(Args, Images))
Expand Down Expand Up @@ -2447,18 +2501,26 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
}
}

// TODO(NOM7): Remove this call and use community flow for bundle/wrap
auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs);
if (!OutputFile)
return OutputFile.takeError();

// SYCL offload kind images are all ready to be sent to host linker.
// TODO: Currently, device code wrapping for SYCL offload happens in a
// separate path inside 'linkDevice' call seen above.
// This will eventually be refactored to use the 'common' wrapping logic
// that is used for other offload kinds.
std::scoped_lock Guard(ImageMtx);
WrappedOutput.push_back(*OutputFile);
if (OutputSYCLBIN) {
SYCLBIN::ModuleDesc MD;
MD.ArchString = LinkerArgs.getLastArgValue(OPT_arch_EQ);
MD.SplitModules = std::move(SplitModules);
std::scoped_lock Guard(SYCLBINModulesMtx);
SYCLBINModules.emplace_back(std::move(MD));
} else {
// TODO(NOM7): Remove this call and use community flow for bundle/wrap
auto OutputFile = sycl::runWrapperAndCompile(SplitModules, LinkerArgs);
if (!OutputFile)
return OutputFile.takeError();

// SYCL offload kind images are all ready to be sent to host linker.
// TODO: Currently, device code wrapping for SYCL offload happens in a
// separate path inside 'linkDevice' call seen above.
// This will eventually be refactored to use the 'common' wrapping logic
// that is used for other offload kinds.
std::scoped_lock Guard(ImageMtx);
WrappedOutput.push_back(*OutputFile);
}
}
if (HasNonSYCLOffloadKinds) {
// First link and remove all the input files containing bitcode.
Expand Down Expand Up @@ -2510,6 +2572,13 @@ Expected<SmallVector<StringRef>> linkAndWrapDeviceFiles(
if (Err)
return std::move(Err);

if (OutputSYCLBIN) {
auto OutputOrErr = sycl::packageSYCLBIN(SYCLBINModules);
if (!OutputOrErr)
return OutputOrErr.takeError();
WrappedOutput.push_back(*OutputOrErr);
}

for (auto &[Kind, Input] : Images) {
if (Kind == OFK_SYCL)
continue;
Expand Down Expand Up @@ -2965,6 +3034,9 @@ int main(int Argc, char **Argv) {
"-no-use-sycl-post-link-tool options can't "
"be used together."));

OutputSYCLBIN = Args.hasArg(OPT_syclbin);
// TODO: Check conflicting options: sycl_embed_ir

if (Args.hasArg(OPT_sycl_module_split_mode_EQ)) {
if (UseSYCLPostLinkTool)
reportError(createStringError(
Expand Down Expand Up @@ -3005,9 +3077,14 @@ int main(int Argc, char **Argv) {
if (!FilesOrErr)
reportError(FilesOrErr.takeError());

// Run the host linking job with the rendered arguments.
if (Error Err = runLinker(*FilesOrErr, Args))
reportError(std::move(Err));
if (OutputSYCLBIN) {
if (Error Err = sycl::mergeSYCLBIN(*FilesOrErr, Args))
reportError(std::move(Err));
} else {
// Run the host linking job with the rendered arguments.
if (Error Err = runLinker(*FilesOrErr, Args))
reportError(std::move(Err));
}
}

if (const opt::Arg *Arg = Args.getLastArg(OPT_wrapper_time_trace_eq)) {
Expand Down
5 changes: 5 additions & 0 deletions clang/tools/clang-linker-wrapper/LinkerWrapperOpts.td
Original file line number Diff line number Diff line change
Expand Up @@ -248,3 +248,8 @@ def sycl_dump_device_code_EQ : Joined<["--", "-"], "sycl-dump-device-code=">,
def sycl_allow_device_image_dependencies : Flag<["--", "-"], "sycl-allow-device-image-dependencies">,
Flags<[WrapperOnlyOption, HelpHidden]>,
HelpText<"Allow dependencies between device code images">;

// Options to force the output to be of the SYCLBIN format.
def syclbin : Flag<["--", "-"], "syclbin">,
Flags<[WrapperOnlyOption]>,
HelpText<"Output in the SYCLBIN binary format">;
4 changes: 4 additions & 0 deletions llvm/include/llvm/Object/Binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class Binary {
ID_GOFF,
ID_Wasm,

ID_SYCLBIN, // SYCLBIN binary file.
steffenlarsen marked this conversation as resolved.
Show resolved Hide resolved

ID_EndObjects
};

Expand Down Expand Up @@ -138,6 +140,8 @@ class Binary {

bool isOffloadFile() const { return TypeID == ID_Offload; }

bool isSYCLBINFile() const { return TypeID == ID_SYCLBIN; }

bool isCOFFImportFile() const {
return TypeID == ID_COFFImportFile;
}
Expand Down
1 change: 1 addition & 0 deletions llvm/include/llvm/Object/OffloadBinary.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ enum ImageKind : uint16_t {
IMG_Cubin,
IMG_Fatbinary,
IMG_PTX,
IMG_SYCLBIN,
IMG_LAST,
};

Expand Down
86 changes: 86 additions & 0 deletions llvm/include/llvm/Object/SYCLBIN.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
//===- SYCLBIN.h - SYCLBIN binary format support ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_OBJECT_SYCLBIN_H
#define LLVM_OBJECT_SYCLBIN_H

#include "llvm/ADT/SmallString.h"
#include "llvm/Object/Binary.h"
#include "llvm/SYCLLowerIR/ModuleSplitter.h"
#include "llvm/Support/MemoryBuffer.h"
#include <string>

namespace llvm {

namespace object {

class SYCLBIN : public Binary {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main question I have here is why don't we re-use existing OffloadBinary subclass?

class OffloadBinary : public Binary {

If we need a hierarchical structure, then we just make it nested by putting one OffloadBinary into each other, or concatenating a few of them together to represent a list.
We can subclass if we want to provide some custom getters like getArch.

OpenMP offloading emits this data structure and libompoffload (that is going to be turned into liboffload and eventually is what we will use instead of UR once we are in upstream) expects OffloadBinary.

I also think that whatever we produce for regular SYCL app compilation, for online compilation from SYCL sources and for SYCLBIN should have the same format for simplicity and uniformity of handling it in SYCL RT.
From that point of view, I vote for re-using OffloadBinary even more, because whilst I can at least somehow justify a custom format for SYCLBIN, I can't justify us not using OffloadBinary for regular SYCL compilation flow because it provides everything we have in our existing legacy custom format.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree, we should eventually switch even the common structure of kernel code over to this new format.

However, I think making this a subclass of OffloadBinary makes the abstraction a little less obvious. The current structure is that the SYCLBIN binary is contained inside the image of an offload binary, so they are not related formats but rather nested formats.

We can subclass if we want to provide some custom getters like getArch.

SYCLBIN does not have architecture at the top-level, as different contained modules can target different device architectures, so I worry that merging the two would make getters like it confusing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The current structure is that the SYCLBIN binary is contained inside the image of an offload binary, so they are not related formats but rather nested formats.

Oh, so the idea is to put SYCLBIN into OffloadBinary::OffloadingImage::Image, right? If so, then I withdraw my objections. In that case we can still have single top-level structure as an interface with SYCLBIN being a specialized version of the underlying content.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should add a comment to this class declaration to describe that it is supposed to be used as a more complicated from of OffloadBinary::OffloadingImage::Image to simplify working with it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added a comment and removed its inheritance from Binary.

public:
SYCLBIN(MemoryBufferRef Source);

enum class BundleState : uint8_t { Input = 0, Object = 1, Executable = 2 };
enum class IRType : uint8_t { SPIRV = 0, PTX = 1, AMDGCN = 2 };

struct ModuleDesc {
BundleState State;
std::string ArchString;
std::vector<module_split::SplitModule> SplitModules;
};

/// The current version of the binary used for backwards compatibility.
static constexpr uint32_t Version = 1;

/// Magic number used to identify SYCLBIN files.
static constexpr uint8_t MagicNumber[4] = {0x53, 0x59, 0x42, 0x49};

/// Serialize the contents of \p ModuleDescs to a binary buffer to be read
/// later.
static Expected<SmallString<0>> write(const SmallVector<ModuleDesc> &);

static Expected<std::unique_ptr<SYCLBIN>> read(MemoryBufferRef Source);
Comment on lines +46 to +48
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Implementation of read/write is quite complex, it has to be exhaustively tested. I'm not saying that all the testing should be here from day 1, but we at least need to lay some groundwork for it. For example what you can do is to have a unit-test which does write, then read and checks that result is the same as the initial input. See e471ba3 for inspiration.


static uint64_t getAlignment() { return 8; }
steffenlarsen marked this conversation as resolved.
Show resolved Hide resolved

static bool classof(const Binary *V) { return V->isSYCLBINFile(); }

struct IRModule {
IRType Type;
SmallVector<char> RawIRBytes;
};
struct NativeDeviceCodeImage {
SmallString<0> ArchString;
SmallVector<char> RawDeviceCodeImageBytes;
};

struct AbstractModule {
SmallVector<SmallString<0>> KernelNames;
SmallVector<SmallString<0>> ImportedSymbols;
SmallVector<SmallString<0>> ExportedSymbols;
std::unique_ptr<llvm::util::PropertySetRegistry> Properties;
steffenlarsen marked this conversation as resolved.
Show resolved Hide resolved

SmallVector<IRModule> IRModules;
SmallVector<NativeDeviceCodeImage> NativeDeviceCodeImages;
};

struct {
uint8_t Magic[4];
uint32_t Version;
BundleState State;
} Header;

SmallVector<AbstractModule, 4> AbstractModules;

private:
SYCLBIN(const SYCLBIN &Other) = delete;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need it to be declared as private to delete it? If we explicitly deleting it, then I think it is better to put it next to other constructors so that they are all next to each other.

};

} // namespace object

} // namespace llvm

#endif
2 changes: 1 addition & 1 deletion llvm/include/llvm/SYCLLowerIR/ModuleSplitter.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/Module.h"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: do we need this instead of forward declaring?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We do. Unlike shared_ptr, unique_ptr insists on the contained type not being incomplete.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

But why do we need this in a header file which is otherwise unchanged? It seems like either this #include should be moved straight into .cpp file, or into another .h file

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to be a dependency bug that just wasn't noticed because Module.h was included before ModuleSplitter.h in all other cases.

#include "llvm/Support/Error.h"
#include "llvm/Support/PropertySetIO.h"

Expand All @@ -29,7 +30,6 @@
namespace llvm {

class Function;
class Module;

namespace cl {
class OptionCategory;
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Support/PropertySetIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ class PropertyValue {
}
}

const char *data() const { return reinterpret_cast<const char *>(&Val); }

private:
template <typename T> T &getValueRef();
void copy(const PropertyValue &P);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Object/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ add_llvm_component_library(LLVMObject
OffloadBinary.cpp
RecordStreamer.cpp
RelocationResolver.cpp
SYCLBIN.cpp
SymbolicFile.cpp
SymbolSize.cpp
TapiFile.cpp
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Object/OffloadBinary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,7 @@ ImageKind object::getImageKind(StringRef Name) {
.Case("cubin", IMG_Cubin)
.Case("fatbin", IMG_Fatbinary)
.Case("s", IMG_PTX)
.Case("syclbin", IMG_SYCLBIN)
.Default(IMG_None);
}

Expand All @@ -342,6 +343,8 @@ StringRef object::getImageKindName(ImageKind Kind) {
return "fatbin";
case IMG_PTX:
return "s";
case IMG_SYCLBIN:
return "syclbin";
default:
return "";
}
Expand Down
Loading