-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
ARM Backend using ruy for fp32 and int8 #79
Changes from 55 commits
79d7b33
2ac7cbc
93b841b
9674973
f3e7818
5250b9e
26d3ba2
b7969b0
b271b70
efa5a85
179f239
0d189c8
8951261
49beb50
3cf85f7
4edc8ef
a414b60
e2069bf
1b4049a
e522e6c
90858a5
557de0c
d10009f
3a37966
418a7ce
b7412c3
071e0d4
ec886bd
4df1998
1defce6
c4be980
b181847
6e4c561
53636cf
be9e153
876a915
d399a35
06b6dd9
e310f73
3bf1133
5c8b1d2
9dd1eff
b055c11
d006196
39b7237
3a6c515
46db01b
63fea9a
82a15e1
4a8c0da
e17a5dd
3c8a149
800402c
9d648d0
d19a312
1b38e01
9027ea4
a0ee527
6285f28
8895fda
c6c3ac6
3baf620
aa1842c
8eae08b
9a541c4
4b80399
ac8de91
38b608a
86c8d44
861e31d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
name: ARM | ||
'on': | ||
push: | ||
branches: | ||
- main | ||
- ci-sandbox | ||
pull_request: | ||
branches: | ||
- '**' | ||
env: | ||
ccache_basedir: ${{ github.workspace }} | ||
ccache_dir: "${{ github.workspace }}/.ccache" | ||
ccache_compilercheck: content | ||
ccache_compress: 'true' | ||
ccache_compresslevel: 9 | ||
ccache_maxsize: 200M | ||
ccache_cmake: -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache | ||
ndk: "${{ github.workspace }}/android-ndk-r23b" | ||
abi: "arm64-v8a" | ||
minsdk_version : 28 | ||
android_platform: 28 | ||
|
||
jobs: | ||
ubuntu: | ||
name: "arm-v8a cross-compile via Android NDK" | ||
runs-on: ubuntu-latest | ||
|
||
steps: | ||
- name: Checkout | ||
uses: actions/checkout@v2 | ||
with: | ||
submodules: recursive | ||
|
||
- name: Install prerequisites | ||
run: | | ||
wget -c --quiet https://dl.google.com/android/repository/android-ndk-r23b-linux.zip | ||
unzip -qq android-ndk-r23b-linux.zip | ||
sudo apt-get -y install ccache cmake | ||
|
||
- name: Generate ccache_vars for ccache based on machine | ||
shell: bash | ||
id: ccache_vars | ||
run: |- | ||
echo "::set-output name=hash::$(echo ${{ env.ccache_compilercheck }})" | ||
echo "::set-output name=timestamp::$(date '+%Y-%m-%dT%H.%M.%S')" | ||
|
||
- name: Cache-op for build-cache through ccache | ||
uses: actions/cache@v2 | ||
with: | ||
path: ${{ env.ccache_dir }} | ||
key: ccache-${{ matrix.identifier }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }}-${{ steps.ccache_vars.outputs.timestamp }} | ||
restore-keys: |- | ||
ccache-${{ matrix.identifier }}-${{ steps.ccache_vars.outputs.hash }}-${{ github.ref }} | ||
ccache-${{ matrix.identifier }}-${{ steps.ccache_vars.outputs.hash }} | ||
ccache-${{ matrix.identifier }} | ||
|
||
- name: ccache environment setup | ||
run: |- | ||
echo "CCACHE_COMPILER_CHECK=${{ env.ccache_compilercheck }}" >> $GITHUB_ENV | ||
echo "CCACHE_BASEDIR=${{ env.ccache_basedir }}" >> $GITHUB_ENV | ||
echo "CCACHE_COMPRESS=${{ env.ccache_compress }}" >> $GITHUB_ENV | ||
echo "CCACHE_COMPRESSLEVEL=${{ env.ccache_compresslevel }}" >> $GITHUB_ENV | ||
echo "CCACHE_DIR=${{ env.ccache_dir }}" >> $GITHUB_ENV | ||
echo "CCACHE_MAXSIZE=${{ env.ccache_maxsize }}" >> $GITHUB_ENV | ||
|
||
- name: ccache prolog | ||
run: |- | ||
ccache -s # Print current cache stats | ||
ccache -z # Zero cache entry | ||
|
||
- name: Generate buildfiles for marian on android via cmake | ||
run: |- | ||
mkdir -p build | ||
cd build | ||
NDK=${{ env.ndk }} | ||
ABI=${{ env.abi }} | ||
MINSDK_VERSION=${{ env.minsdk_version }} | ||
ANDROID_PLATFORM=${{ env.android_platform }} | ||
OTHER_ANDROID_ARGS=( | ||
-DANDROID_ARM_NEON=TRUE | ||
) | ||
OTHER_MARIAN_ARGS=( | ||
-DCOMPILE_CUDA=off | ||
-DCOMPILE_CPU=on | ||
-DCMAKE_HAVE_THREADS_LIBRARY=1 | ||
-DCMAKE_USE_WIN32_THREADS_INIT=0 | ||
-DCMAKE_USE_PTHREADS_INIT=1 | ||
-DTHREADS_PREFER_PTHREAD_FLAG=ON | ||
-DBUILD_ARCH=armv8-a | ||
# -DCOMPILE_WITHOUT_EXCEPTIONS=on # Apparently this can reduce the binary size, let's see. | ||
) | ||
# Additionally list variables finally configured. | ||
cmake -L \ | ||
-DCMAKE_BUILD_TYPE=Release \ | ||
-DCMAKE_TOOLCHAIN_FILE=$NDK/build/cmake/android.toolchain.cmake \ | ||
-DANDROID_TOOLCHAIN=clang \ | ||
-DANDROID_ABI=$ABI \ | ||
-DANDROID_PLATFORM=$ANDROID_PLATFORM \ | ||
-DANDROID_NATIVE_API_LEVEL=$MINSDKVERSION \ | ||
-DANDROID_TOOLCHAIN_NAME=arm-linux-androideabi-4.8 \ | ||
-DANDROID_STL=c++_static \ | ||
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DCMAKE_C_COMPILER_LAUNCHER=ccache \ | ||
"${OTHER_ANDROID_ARGS[@]}" "${OTHER_MARIAN_ARGS[@]}" \ | ||
.. | ||
|
||
|
||
- name : Build marian for android | ||
working-directory: build | ||
run: |- | ||
# Only build marian (lib) for now. | ||
make -j2 | ||
|
||
- name: ccache epilog | ||
run: 'ccache -s # Print current cache stats' | ||
|
||
- uses: actions/upload-artifact@v2 | ||
with: | ||
path: ${{github.workspace}}/build/marian-decoder | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,6 +16,19 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) | |
|
||
include(CMakeDependentOption) | ||
|
||
# Architecture detection | ||
include(TargetArch) | ||
|
||
target_architecture(CMAKE_TARGET_ARCHITECTURES) | ||
list(LENGTH CMAKE_TARGET_ARCHITECTURES cmake_target_arch_len) | ||
if(NOT "${cmake_target_arch_len}" STREQUAL "1") | ||
set(CMAKE_TARGET_ARCHITECTURE_UNIVERSAL TRUE) | ||
set(CMAKE_TARGET_ARCHITECTURE_CODE "universal") | ||
else() | ||
set(CMAKE_TARGET_ARCHITECTURE_UNIVERSAL FALSE) | ||
set(CMAKE_TARGET_ARCHITECTURE_CODE "${CMAKE_TARGET_ARCHITECTURES}") | ||
endif() | ||
|
||
# Custom CMake options | ||
option(COMPILE_CPU "Compile CPU version" ON) | ||
option(COMPILE_CUDA "Compile GPU version" ON) | ||
|
@@ -31,24 +44,51 @@ option(USE_CCACHE "Use ccache compiler cache (https://ccache.dev)" OFF) | |
option(USE_CUDNN "Use CUDNN library" OFF) | ||
option(USE_DOXYGEN "Build documentation with Doxygen" ON) | ||
option(USE_FBGEMM "Use FBGEMM" OFF) | ||
option(USE_INTGEMM "Use INTGEMM" OFF) | ||
option(USE_RUY "Use Ruy" OFF) | ||
option(USE_MKL "Compile with MKL support" ON) | ||
option(USE_MPI "Use MPI library" OFF) | ||
option(USE_NCCL "Use NCCL library" ON) | ||
option(USE_SENTENCEPIECE "Download and compile SentencePiece" ON) | ||
option(USE_STATIC_LIBS "Link statically against non-system libs" OFF) | ||
option(GENERATE_MARIAN_INSTALL_TARGETS "Generate Marian install targets (requires CMake 3.12+)" OFF) | ||
option(M32_BINARIES "Generate 32bit binaries even when building outside of WASM. Useful for testing some WASM specific functionality without the need for the compiling to WASM." OFF) | ||
jerinphilip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
option(COMPILE_WASM "Compile (wasm compatible) marian for WASM target" OFF) | ||
option(USE_WASM_COMPATIBLE_SOURCE "Enable the minimal marian sources that compile to wasm. Useful for debugging wasm failures by building same sources natively" OFF) | ||
|
||
option(USE_SIMD_UTILS "Enable simde to target instruction sets" OFF) | ||
option(USE_RUY_SGEMM "Compile with Ruy SGEMM" OFF) | ||
option(COMPILE_WITHOUT_EXCEPTIONS "Compile without exceptions" OFF) | ||
|
||
# cmake options that are dependent on USE_WASM_COMPATIBLE_SOURCE cmake option | ||
CMAKE_DEPENDENT_OPTION(USE_THREADS "Compile with multi-threading support" OFF | ||
"USE_WASM_COMPATIBLE_SOURCE" ON) | ||
CMAKE_DEPENDENT_OPTION(USE_WASM_COMPATIBLE_BLAS "Compile with wasm compatible blas" ON | ||
jerinphilip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
CMAKE_DEPENDENT_OPTION(USE_ONNX_SGEMM "Compile with wasm compatible blas" ON | ||
"USE_WASM_COMPATIBLE_SOURCE" OFF) | ||
CMAKE_DEPENDENT_OPTION(COMPILE_WITHOUT_EXCEPTIONS "Compile without exceptions" ON | ||
jerinphilip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"USE_WASM_COMPATIBLE_SOURCE" OFF) | ||
|
||
if(${CMAKE_TARGET_ARCHITECTURE_CODE} MATCHES "arm") | ||
set(USE_RUY ON) | ||
set(USE_RUY_SGEMM ON) | ||
set(USE_SIMD_UTILS ON) | ||
else() | ||
set(USE_INTGEMM ON) | ||
endif() | ||
|
||
if(USE_INTGEMM) | ||
add_compile_definitions(USE_INTGEMM=1) | ||
endif(USE_INTGEMM) | ||
|
||
if(USE_SIMD_UTILS) | ||
add_compile_definitions(ARM SSE) #added for ARM | ||
jerinphilip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if(MSVC) | ||
add_compile_options(/flax-vector-conversions) | ||
else(MSVC) | ||
add_compile_options(-flax-vector-conversions) | ||
jerinphilip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
endif(MSVC) | ||
endif(USE_SIMD_UTILS) | ||
|
||
|
||
if (USE_WASM_COMPATIBLE_SOURCE) | ||
set(SPM_BUILD_LIBRARY_ONLY ON CACHE BOOL "Build only sentencepiece library (skip building executables)") | ||
add_compile_definitions(WASM_COMPATIBLE_SOURCE) | ||
|
@@ -61,10 +101,11 @@ if (COMPILE_WASM) | |
set(WORMHOLE ON CACHE BOOL "Use WASM wormhole in intgemm https://bugzilla.mozilla.org/show_bug.cgi?id=1672160") | ||
endif() | ||
|
||
if(M32_BINARIES OR COMPILE_WASM) | ||
|
||
if(COMPILE_WASM) | ||
set("BUILD_WIDTH" "-m32") | ||
else(M32_BINARIES OR COMPILE_WASM) | ||
set("BUILD_WIDTH" "-m64") | ||
else(COMPILE_WASM) | ||
set("BUILD_WIDTH" "") | ||
endif() | ||
|
||
if(NOT COMPILE_WASM) | ||
|
@@ -194,7 +235,6 @@ if(MSVC) | |
add_definitions(-DUSE_FBGEMM=1 -DFBGEMM_STATIC=1) | ||
endif(USE_FBGEMM) | ||
else(MSVC) | ||
|
||
# Check we are using at least g++ 5.0 | ||
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 5.0) | ||
message(FATAL_ERROR "FATAL ERROR: Compiling Marian requires at least g++ 5.0, your version is ${CMAKE_CXX_COMPILER_VERSION}") | ||
|
@@ -249,12 +289,14 @@ else(MSVC) | |
# -msse4.1 once marian can solely be compiled with intgemm ("onnxjs" will be removed in that case) | ||
set(INTRINSICS "-mssse3 -msimd128") | ||
else() | ||
set(INTRINSICS "-msse4.1") | ||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL x86_64 OR CMAKE_SYSTEM_PROCESSOR STREQUAL amd64) | ||
set(INTRINSICS "-msse4.1") | ||
endif () | ||
endif() | ||
|
||
if(USE_FBGEMM) | ||
set(EXT_LIBS ${EXT_LIBS} fbgemm dl) | ||
add_definitions(-DUSE_FBGEMM=1) | ||
add_compile_definitions(USE_FBGEMM=1) | ||
endif(USE_FBGEMM) | ||
|
||
if (CMAKE_CXX_COMPILER_ID MATCHES "Clang" AND CMAKE_CXX_COMPILER_VERSION VERSION_GREATER 9.0) | ||
|
@@ -266,7 +308,8 @@ else(MSVC) | |
set(DISABLE_GLOBALLY "-Wno-unused-result ${CLANG_IGNORE_UNKNOWN_CUDA} ${CLANG_IGNORE_UNUSED_VALUES}") # This needs to appear here as well to appease clang11+ on linux | ||
|
||
# These are used in src/CMakeLists.txt on a per-target basis | ||
list(APPEND ALL_WARNINGS -Wall; -Werror; -Wextra; -Wno-unused-result; -Wno-deprecated; | ||
list(APPEND ALL_WARNINGS -Wall; # -Werror; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What warning was introduced that made removal of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Do I do https://stackoverflow.com/a/3308675/4565794 to get around this? I can break it and get around it by something with the headers, I hope. Upstream appears to want to keep There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We should keep There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For the record: https://godbolt.org/z/6Mzhc1Tqq |
||
-Wextra; -Wno-unused-result; -Wno-deprecated; | ||
-Wno-pragmas; -Wno-unused-parameter; -Wno-unused-function; | ||
-Wno-unused-value; -Wno-unknown-pragmas; -Wno-sign-compare; | ||
-Wno-missing-field-initializers; ${CLANG_IGNORE_UNUSED_PRIVATE_FIELD}) | ||
|
@@ -324,6 +367,7 @@ else(MSVC) | |
endif(COMPILE_WASM) | ||
endif(MSVC) | ||
|
||
|
||
# with gcc 7.0 and above we need to mark fallthrough in switch case statements | ||
# that can be done in comments for backcompat, but CCACHE removes comments. | ||
# -C makes gcc keep comments. | ||
|
@@ -544,24 +588,31 @@ endif(USE_MPI) | |
############################################################################### | ||
# Find BLAS library for CPU compilation | ||
if(COMPILE_CPU) | ||
set(EXT_LIBS ${EXT_LIBS} intgemm) # Move the intgemm bits on top since they compile with every single variant | ||
if(USE_INTGEMM) | ||
set(EXT_LIBS ${EXT_LIBS} intgemm) # Move the intgemm bits on top since they compile with every single variant | ||
endif(USE_INTGEMM) | ||
|
||
if(USE_RUY) | ||
set(EXT_LIBS ${EXT_LIBS} ruy) | ||
add_compile_definitions(USE_RUY_SGEMM=1) | ||
endif(USE_RUY) | ||
jerinphilip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
add_definitions(-DCOMPILE_CPU=1) # Move the compile CPU definition on top since we want to compile intgemm when we set compile CPU | ||
# in case a BLAS vendor is not found, we have a runtime error, although we should probably not allow the compilation to go on | ||
# if there are BLAS vendors, we have other runtime checks with sane error messages. | ||
if(USE_WASM_COMPATIBLE_BLAS) | ||
if(USE_ONNX_SGEMM) | ||
## Use a wasm compatible BLAS | ||
## ^ SGEMM != BLAS | ||
set(EXT_LIBS ${EXT_LIBS} onnx-sgemm) | ||
set(BLAS_FOUND TRUE) | ||
set(BLAS_VENDOR "ONNX-SGEMM") | ||
add_definitions(-DBLAS_FOUND=1 -DWASM_COMPATIBLE_BLAS=1) # Might be required in some cmake files further down the line, let's avoid using add_compile_definitions in this codeblock | ||
add_definitions(-DUSE_ONNX_SGEMM=1) # Might be required in some cmake files further down the line, let's avoid using add_compile_definitions in this codeblock | ||
jerinphilip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
elseif(APPLE AND USE_APPLE_ACCELERATE) | ||
set(BLAS_VENDOR "Accelerate") | ||
# see https://developer.apple.com/documentation/accelerate for more info | ||
# you may need to install Xcode command line tools if you don't have them already (https://developer.apple.com/xcode/features/) | ||
include_directories("/Library/Developer/CommandLineTools/SDKs/MacOSX.sdk/System/Library/Frameworks/Accelerate.framework/Frameworks/vecLib.framework/Headers") | ||
set(EXT_LIBS ${EXT_LIBS} "-framework Accelerate") | ||
add_definitions(-DBLAS_FOUND=1) | ||
else(USE_WASM_COMPATIBLE_BLAS) | ||
else(USE_ONNX_SGEMM) | ||
jerinphilip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if(USE_MKL) | ||
find_package(MKL) | ||
endif(USE_MKL) | ||
|
@@ -582,7 +633,8 @@ if(COMPILE_CPU) | |
endif(CBLAS_FOUND) | ||
endif(BLAS_FOUND) | ||
endif(MKL_FOUND) | ||
endif(USE_WASM_COMPATIBLE_BLAS) | ||
endif(USE_ONNX_SGEMM) | ||
jerinphilip marked this conversation as resolved.
Show resolved
Hide resolved
jerinphilip marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
endif(COMPILE_CPU) | ||
|
||
############################################################################### | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What does this mean, sorry? Is this 32bit vs 64bit? A small clarifying comment?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does this catch the
unknown
arch condition, and is that desirable?