diff --git a/CMakeLists.txt b/CMakeLists.txt index f9066756a..7d5ce032c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,6 +45,13 @@ if(CMAKE_COMPILER_IS_GNUCXX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse2") # optimizations add_definitions(-O2) +else() + # apple + if(APPLE) + set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -m64 -msse2") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -msse2") + add_definitions(-O2) + endif() endif() # Common @@ -184,9 +191,17 @@ if (USE_CPU_TROMP) endif() if (USE_CPU_XENONCAT) add_library ( xenoncat_avx1 SHARED IMPORTED GLOBAL ) - set_target_properties ( xenoncat_avx1 PROPERTIES IMPORTED_LOCATION "../nheqminer/cpu_xenoncat/asm_linux/equihash_avx1.o" ) + if (APPLE) + set_target_properties ( xenoncat_avx1 PROPERTIES IMPORTED_LOCATION "../nheqminer/cpu_xenoncat/asm_mac/equihash_avx1.o" ) + else() + set_target_properties ( xenoncat_avx1 PROPERTIES IMPORTED_LOCATION "../nheqminer/cpu_xenoncat/asm_linux/equihash_avx1.o" ) + endif(APPLE) add_library ( xenoncat_avx2 SHARED IMPORTED GLOBAL ) - set_target_properties ( xenoncat_avx2 PROPERTIES IMPORTED_LOCATION "../nheqminer/cpu_xenoncat/asm_linux/equihash_avx2.o" ) + if (APPLE) + set_target_properties ( xenoncat_avx2 PROPERTIES IMPORTED_LOCATION "../nheqminer/cpu_xenoncat/asm_mac/equihash_avx2.o" ) + else() + set_target_properties ( xenoncat_avx2 PROPERTIES IMPORTED_LOCATION "../nheqminer/cpu_xenoncat/asm_linux/equihash_avx2.o" ) + endif(APPLE) target_link_libraries(${PROJECT_NAME} cpu_xenoncat xenoncat_avx1 xenoncat_avx2) endif() if (USE_CUDA_TROMP) diff --git a/cpu_tromp/osx_barrier.h b/cpu_tromp/osx_barrier.h new file mode 100644 index 000000000..da05b3552 --- /dev/null +++ b/cpu_tromp/osx_barrier.h @@ -0,0 +1,70 @@ +#ifdef __APPLE__ + +#ifndef PTHREAD_BARRIER_H_ +#define PTHREAD_BARRIER_H_ + +#include +#include + +typedef int pthread_barrierattr_t; +#define PTHREAD_BARRIER_SERIAL_THREAD 1 + +typedef struct +{ + pthread_mutex_t mutex; + pthread_cond_t cond; + int count; + int tripCount; +} pthread_barrier_t; + + +int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count) +{ + if(count == 0) + { + errno = EINVAL; + return -1; + } + if(pthread_mutex_init(&barrier->mutex, 0) < 0) + { + return -1; + } + if(pthread_cond_init(&barrier->cond, 0) < 0) + { + pthread_mutex_destroy(&barrier->mutex); + return -1; + } + barrier->tripCount = count; + barrier->count = 0; + + return 0; +} + +int pthread_barrier_destroy(pthread_barrier_t *barrier) +{ + pthread_cond_destroy(&barrier->cond); + pthread_mutex_destroy(&barrier->mutex); + return 0; +} + +int pthread_barrier_wait(pthread_barrier_t *barrier) +{ + pthread_mutex_lock(&barrier->mutex); + ++(barrier->count); + if(barrier->count >= barrier->tripCount) + { + barrier->count = 0; + pthread_cond_broadcast(&barrier->cond); + pthread_mutex_unlock(&barrier->mutex); + return PTHREAD_BARRIER_SERIAL_THREAD; + } + else + { + pthread_cond_wait(&barrier->cond, &(barrier->mutex)); + pthread_mutex_unlock(&barrier->mutex); + return 0; + } +} + +#endif // PTHREAD_BARRIER_H_ +#endif // __APPLE__ diff --git a/cpu_xenoncat/asm_mac/assemble.sh b/cpu_xenoncat/asm_mac/assemble.sh new file mode 100644 index 000000000..122e7df68 --- /dev/null +++ b/cpu_xenoncat/asm_mac/assemble.sh @@ -0,0 +1,4 @@ +./fasm -m 1280000 ../asm_linux/equihash_avx1.asm equihash_avx1.elf.o +./fasm -m 1280000 ../asm_linux/equihash_avx2.asm equihash_avx2.elf.o +./objconv -fmacho64 -nu equihash_avx1.elf.o equihash_avx1.o +./objconv -fmacho64 -nu equihash_avx2.elf.o equihash_avx2.o diff --git a/cpu_xenoncat/asm_mac/fasm b/cpu_xenoncat/asm_mac/fasm new file mode 100755 index 000000000..ef3f91936 Binary files /dev/null and b/cpu_xenoncat/asm_mac/fasm differ diff --git a/cpu_xenoncat/asm_mac/objconv b/cpu_xenoncat/asm_mac/objconv new file mode 100755 index 000000000..912d0811c Binary files /dev/null and b/cpu_xenoncat/asm_mac/objconv differ diff --git a/cuda_djezo/cuda_djezo.cpp b/cuda_djezo/cuda_djezo.cpp index 30d672e89..b2b236370 100644 --- a/cuda_djezo/cuda_djezo.cpp +++ b/cuda_djezo/cuda_djezo.cpp @@ -24,9 +24,9 @@ cuda_djezo::cuda_djezo(int platf_id, int dev_id) major = atoi(m_version.substr(0, n).c_str()); minor = atoi(m_version.substr(n + 1, m_version.length() - n - 1).c_str()); - if (major < 5) + if (major < 3) { - throw std::runtime_error("Only CUDA devices with SM 5.0 and higher are supported."); + throw std::runtime_error("Only CUDA devices with SM 3.0 and higher are supported."); } else if (major == 5 && minor == 0) { diff --git a/cuda_djezo/eqcuda.hpp b/cuda_djezo/eqcuda.hpp index 48d663a45..c8b2dbade 100644 --- a/cuda_djezo/eqcuda.hpp +++ b/cuda_djezo/eqcuda.hpp @@ -3,7 +3,6 @@ #include "cuda.h" #include "cuda_runtime.h" #include "device_launch_parameters.h" -#include "device_functions_decls.h" #include "../cpu_tromp/blake2/blake2.h" #include "cuda_djezo.hpp" diff --git a/cuda_djezo/equi_miner.cu b/cuda_djezo/equi_miner.cu index 6ef9f45f2..cebaf5233 100644 --- a/cuda_djezo/equi_miner.cu +++ b/cuda_djezo/equi_miner.cu @@ -200,10 +200,25 @@ __device__ __forceinline__ uint4 operator^ (uint4 a, uint4 b) __device__ __forceinline__ uint2 ROR2(const uint2 a, const int offset) { uint2 result; +#if __CUDA_ARCH__ > 300 { asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.x) : "r"(a.y), "r"(a.x), "r"(offset)); asm("shf.r.wrap.b32 %0, %1, %2, %3;" : "=r"(result.y) : "r"(a.x), "r"(a.y), "r"(offset)); } +#else + if (!offset) + result = a; + else if (offset < 32) { + result.y = ((a.y >> offset) | (a.x << (32 - offset))); + result.x = ((a.x >> offset) | (a.y << (32 - offset))); + } else if (offset == 32) { + result.y = a.x; + result.x = a.y; + } else { + result.y = ((a.x >> (offset - 32)) | (a.y << (64 - offset))); + result.x = ((a.y >> (offset - 32)) | (a.x << (64 - offset))); + } +#endif return result; } @@ -308,7 +323,11 @@ __global__ void digit_first(equi* eq, u32 nonce) u32* hash_h32 = (u32*)hash_h; if (threadIdx.x < 16) +#if __CUDA_ARCH__ > 300 hash_h32[threadIdx.x] = __ldca(&eq->blake_h32[threadIdx.x]); +#else + hash_h32[threadIdx.x] = eq->blake_h32[threadIdx.x]; +#endif __syncthreads(); diff --git a/nheqminer/crypto/common.h b/nheqminer/crypto/common.h index 4b2b16ac0..70dfdbad7 100644 --- a/nheqminer/crypto/common.h +++ b/nheqminer/crypto/common.h @@ -13,7 +13,7 @@ #include //#include "sodium.h" -#ifdef WIN32 +#if (defined(WIN32) || defined(__APPLE__)) #include "compat/endian.h" #else #include diff --git a/nheqminer/libstratum/ZcashStratum.cpp b/nheqminer/libstratum/ZcashStratum.cpp index 7eac71999..5bef4dada 100644 --- a/nheqminer/libstratum/ZcashStratum.cpp +++ b/nheqminer/libstratum/ZcashStratum.cpp @@ -490,7 +490,7 @@ ZcashJob* ZcashMiner::parseJob(const Array& params) // TODO: On a LE host shouldn't this be le32toh? ret->header.nVersion = be32toh(version); - if (ret->header.nVersion == 4) { + if (ret->header.nVersion == 4 || ret->header.nVersion == 0x20000000) { if (params.size() < 8) { throw std::logic_error("Invalid job params"); } diff --git a/nheqminer/main.cpp b/nheqminer/main.cpp index 6f7a230c5..7d10ff44a 100644 --- a/nheqminer/main.cpp +++ b/nheqminer/main.cpp @@ -37,7 +37,7 @@ namespace src = boost::log::sources; namespace attrs = boost::log::attributes; namespace keywords = boost::log::keywords; -#ifdef __linux__ +#if (defined(__linux__) || defined(__APPLE__)) #define __cpuid(out, infoType)\ asm("cpuid": "=a" (out[0]), "=b" (out[1]), "=c" (out[2]), "=d" (out[3]): "a" (infoType)); #define __cpuidex(out, infoType, ecx)\ diff --git a/nheqminer/serialize.h b/nheqminer/serialize.h index ecb6cf13c..843a58615 100644 --- a/nheqminer/serialize.h +++ b/nheqminer/serialize.h @@ -6,7 +6,7 @@ #ifndef BITCOIN_SERIALIZE_H #define BITCOIN_SERIALIZE_H -#ifdef WIN32 +#if (defined(WIN32) || defined(__APPLE__)) #include "compat/endian.h" #else #include @@ -654,7 +654,11 @@ void Serialize_impl(Stream& os, const std::vector& v, int nType, int nVers { WriteCompactSize(os, v.size()); for (typename std::vector::const_iterator vi = v.begin(); vi != v.end(); ++vi) +#if defined(__APPLE__) + ::Serialize(os, static_cast(*vi), nType, nVersion); +#else ::Serialize(os, (*vi), nType, nVersion); +#endif } template