Skip to content

Commit

Permalink
CPU internal utils (part1) (#9)
Browse files Browse the repository at this point in the history
* move more internal utils on cpu

* test gpu build
  • Loading branch information
kaihsin authored Dec 23, 2024
1 parent 46b89e3 commit e7a9530
Show file tree
Hide file tree
Showing 13 changed files with 216 additions and 0 deletions.
10 changes: 10 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ message(STATUS " Build Target: ${CMAKE_SYSTEM_NAME}-${CMAKE_SYSTEM_PROCESSOR}")
message(STATUS " Installation Prefix: ${CMAKE_INSTALL_PREFIX}")
set(CMAKE_POSITION_INDEPENDENT_CODE ON)



# #####################################################################
# Project
# #####################################################################
Expand Down Expand Up @@ -109,6 +111,14 @@ else()
target_link_libraries(${PKG_NAME} PUBLIC ${LAPACK_LIBRARIES})
endif()

# ###########
# Options
# ###########
option(USE_CUDA "Build using Nvidia CUDA for GPU library" OFF)
if(USE_CUDA)
include(cmake/config_cuda.cmake)
endif()


## install
include(GNUInstallDirs)
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,10 @@ Running pytest:

* most of the deps should be able to install via pypi.

GPU:
- CUDA Toolit



## Compile directly the C++ package

Expand Down
38 changes: 38 additions & 0 deletions cmake/config_cuda.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
if(USE_CUDA)
message(STATUS " Enable CUDA Support")
set(CYTNX_VARIANT_INFO "${CYTNX_VARIANT_INFO} UNI_CUDA")
enable_language(CUDA)
find_package(CUDAToolkit REQUIRED)
message(STATUS "CUDA: ${CUDA_TOOLKIT_FOUND}")
if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD 17)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()


set_target_properties(${PKG_NAME} PROPERTIES
CUDA_SEPARABLE_COMPILATION ON
)
set_target_properties(${PKG_NAME} PROPERTIES CUDA_RESOLVE_DEVICE_SYMBOLS ON)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcudafe=--display_error_number -lineinfo -m64")
#set(CMAKE_CUDA_FLAGS "-Xcompiler=-Wall -Xcompiler=-Wno-deprecated-gpu-targets -Xcudafe=--display_error_number")
##set(CMAKE_CUDA_FLAGS "-Xcompiler=-Wall -Wno-deprecated-gpu-targets -Xcudafe=--display_error_number")
## set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-DUNI_GPU")
# set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-arch=sm_50 \
# -gencode=arch=compute_50,code=sm_50 \
# -gencode=arch=compute_52,code=sm_52 \
# -gencode=arch=compute_60,code=sm_60 \
# -gencode=arch=compute_61,code=sm_61 \
# -gencode=arch=compute_70,code=sm_70 \
# -gencode=arch=compute_75,code=sm_75 \
# -gencode=arch=compute_75,code=compute_75 ")
set_property(TARGET ${PKG_NAME} PROPERTY CUDA_ARCHITECTURES "80;86;90")
#et_property(TARGET ${PKG_NAME} PROPERTIES CUDA_ARCHITECTURES "80;86;90")
target_compile_definitions(${PKG_NAME} PUBLIC UNI_GPU)
target_include_directories(${PKG_NAME} PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
target_link_libraries(${PKG_NAME} PUBLIC CUDA::toolkit)
target_link_libraries(${PKG_NAME} PUBLIC CUDA::cudart CUDA::cublas CUDA::cusparse CUDA::curand CUDA::cusolver)
target_link_libraries(${PKG_NAME} PUBLIC -lcudadevrt)
else()
message( STATUS " Build CUDA Support: NO")
endif()
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ minimum-version = "build-system.requires"
build-dir = "build/{wheel_tag}"
build.tool-args = ["-j4"]

[tool.scikit-build.cmake.define]
USE_CUDA = "OFF"

# only use pypi registry
[[tool.uv.index]]
name = "pypi"
Expand Down
20 changes: 20 additions & 0 deletions src/cpp/src/utils_internal/cpu/Alloc_cpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include "Alloc_cpu.hpp"

using namespace std;

namespace cytnx_core {
namespace utils_internal {
void* Calloc_cpu(const cytnx_uint64& N, const cytnx_uint64& perelem_bytes) {
void* tmp = calloc(N, perelem_bytes);
cytnx_error_msg(((tmp == NULL) && (N > 0)), "[ERROR][calloc] Memory allocation failed.%s",
"\n");
return tmp;
}
void* Malloc_cpu(const cytnx_uint64& bytes) {
void* tmp = malloc(bytes);
cytnx_error_msg(((tmp == NULL) && (bytes > 0)), "[ERROR][malloc] Memory allocation failed.%s",
"\n");
return tmp;
}
} // namespace utils_internal
} // namespace cytnx_core
20 changes: 20 additions & 0 deletions src/cpp/src/utils_internal/cpu/Alloc_cpu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#ifndef CYTNX_BACKEND_UTILS_INTERNAL_CPU_ALLOC_CPU_H_
#define CYTNX_BACKEND_UTILS_INTERNAL_CPU_ALLOC_CPU_H_

#include <cstdio>
#include <cstdlib>
#include <stdint.h>
#include <climits>
#include <cytnx_core/Type.hpp>
#include <cytnx_core/errors/cytnx_error.hpp>

namespace cytnx_core {
namespace utils_internal {

void* Calloc_cpu(const cytnx_uint64& N, const cytnx_uint64& perelem_bytes);
void* Malloc_cpu(const cytnx_uint64& bytes);

} // namespace utils_internal
} // namespace cytnx_core

#endif // CYTNX_BACKEND_UTILS_INTERNAL_CPU_ALLOC_CPU_H_
5 changes: 5 additions & 0 deletions src/cpp/src/utils_internal/cpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
target_sources_local(cytnx_core
PRIVATE

Alloc_cpu.cpp
Alloc_cpu.hpp
Complexmem_cpu.cpp
Complexmem_cpu.hpp
Fill_cpu.hpp
SetZeros_cpu.cpp
SetZeros_cpu.hpp
)
37 changes: 37 additions & 0 deletions src/cpp/src/utils_internal/cpu/Fill_cpu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#ifndef CYTNX_BACKEND_UTILS_INTERNAL_CPU_FILL_CPU_H_
#define CYTNX_BACKEND_UTILS_INTERNAL_CPU_FILL_CPU_H_

#include <cytnx_core/Type.hpp>

#ifdef UNI_OMP
#include <omp.h>
#endif

namespace cytnx_core {
namespace utils_internal {

/**
* @brief Assign the given value to the first `count` elements in the range beginning at
* `first`.
*
* This function act the same as `std::fill_n`. The execution will be parallelized when OMP is
* enabled.
*
* @tparam DType the data type of the elements in the range
*
* @param first the beginning of the range
* @param value the value to be assigned
* @param count the number of elements to modify
*/
template <typename DType>
void FillCpu(void *first, const DType &value, cytnx_uint64 count) {
DType *typed_first = reinterpret_cast<DType *>(first);
#pragma omp parallel for schedule(static)
for (cytnx_uint64 i = 0; i < count; i++) {
typed_first[i] = value;
}
}
} // namespace utils_internal
} // namespace cytnx_core

#endif // CYTNX_BACKEND_UTILS_INTERNAL_CPU_FILL_CPU_H_
9 changes: 9 additions & 0 deletions src/cpp/src/utils_internal/cpu/SetZeros_cpu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "SetZeros_cpu.hpp"

using namespace std;

namespace cytnx_core {
namespace utils_internal {
void SetZeros(void* c_ptr, const cytnx_uint64& bytes) { memset(c_ptr, 0, bytes); }
} // namespace utils_internal
} // namespace cytnx_core
19 changes: 19 additions & 0 deletions src/cpp/src/utils_internal/cpu/SetZeros_cpu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
#ifndef CYTNX_BACKEND_UTILS_INTERNAL_CPU_SETZEROS_CPU_H_
#define CYTNX_BACKEND_UTILS_INTERNAL_CPU_SETZEROS_CPU_H_

#include <cstdio>
#include <cstdlib>
#include <stdint.h>
#include <climits>
#include <cytnx_core/Type.hpp>
#include <cytnx_core/errors/cytnx_error.hpp>

namespace cytnx_core {
namespace utils_internal {

void SetZeros(void* c_ptr, const cytnx_uint64& bytes);

}
} // namespace cytnx_core

#endif // CYTNX_BACKEND_UTILS_INTERNAL_CPU_SETZEROS_CPU_H_
7 changes: 7 additions & 0 deletions src/cpp/src/utils_internal/gpu/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
target_sources_local(cytnx_core
PRIVATE

cuAlloc_gpu.cu
cuAlloc_gpu.hpp

)
24 changes: 24 additions & 0 deletions src/cpp/src/utils_internal/gpu/cuAlloc_gpu.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
#include "cuAlloc_gpu.hpp"

using namespace std;

namespace cytnx_core {
namespace utils_internal {
#ifdef UNI_GPU
// void* Calloc_cpu(const cytnx_uint64 &N, const cytnx_uint64 &perelem_bytes){
// return calloc(M,perelem_bytes);
// }
void* cuCalloc_gpu(const cytnx_uint64& N, const cytnx_uint64& perelem_bytes) {
void* ptr;
checkCudaErrors(cudaMallocManaged((void**)&ptr, perelem_bytes * N));
checkCudaErrors(cudaMemset(ptr, 0, perelem_bytes * N));
return ptr;
}
void* cuMalloc_gpu(const cytnx_uint64& bytes) {
void* ptr;
checkCudaErrors(cudaMallocManaged(&ptr, bytes));
return ptr;
}
#endif
} // namespace utils_internal
} // namespace cytnx_core
20 changes: 20 additions & 0 deletions src/cpp/src/utils_internal/gpu/cuAlloc_gpu.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#ifndef CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUALLOC_GPU_H_
#define CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUALLOC_GPU_H_

#include <cstdio>
#include <cstdlib>
#include <stdint.h>
#include <climits>
#include <cytnx_core/Type.hpp>
#include <cytnx_core/errors/cytnx_error.hpp>
namespace cytnx_core {
namespace utils_internal {

#ifdef UNI_GPU
void* cuCalloc_gpu(const cytnx_uint64& N, const cytnx_uint64& perelem_bytes);
void* cuMalloc_gpu(const cytnx_uint64& bytes);
#endif
} // namespace utils_internal
} // namespace cytnx_core

#endif // CYTNX_BACKEND_UTILS_INTERNAL_GPU_CUALLOC_GPU_H_

0 comments on commit e7a9530

Please sign in to comment.