Skip to content

Commit

Permalink
Merge branch 'avx2'
Browse files Browse the repository at this point in the history
  • Loading branch information
kimwalisch committed Mar 4, 2017
2 parents 59fb050 + 8f082a3 commit b926763
Show file tree
Hide file tree
Showing 15 changed files with 372 additions and 211 deletions.
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ env:
- OMP_NUM_THREADS=4

before_install:
- sudo add-apt-repository ppa:george-edison55/precise-backports -y
- sudo apt-get update -qq

# Install cmake, OpenMPI, cppcheck
install:
- sudo apt-get install -y cmake openmpi-bin libopenmpi-dev cppcheck
- sudo apt-get install -y cmake cmake-data openmpi-bin libopenmpi-dev cppcheck

# Test build.sh script
before_script:
Expand Down
51 changes: 41 additions & 10 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,20 +1,21 @@
cmake_minimum_required(VERSION 2.8.4)
cmake_minimum_required(VERSION 3.0)
project(primecount)
set(PRIMECOUNT_VERSION_MAJOR 3)
set(PRIMECOUNT_VERSION_MINOR 5)
set(PRIMECOUNT_VERSION_MINOR 6)
set(PRIMECOUNT_VERSION "${PRIMECOUNT_VERSION_MAJOR}.${PRIMECOUNT_VERSION_MINOR}")
include_directories(include src/primesieve/include)
set(CMAKE_BUILD_TYPE Release)

# Build options ######################################################

option(ENABLE_POPCNT "Enable POPCNT instruction" ON)
option(ENABLE_MPI "Enable MPI (Message Passing Interface)" OFF)
option(ENABLE_MPI "Enable MPI (Message Passing Interface)" OFF)

# Include Check* #####################################################

include(CheckCXXCompilerFlag)
include(CheckCXXSourceCompiles)
include(CheckCXXSourceRuns)
include(CheckCXXCompilerFlag)
include(CheckTypeSize)

# primecount binary source files #####################################
Expand All @@ -26,6 +27,7 @@ set(BIN_SRC src/app/cmdoptions.cpp
# primecount library source files ####################################

set(LIB_SRC src/BitSieve.cpp
src/BitSieve-popcnt.cpp
src/FactorTable.cpp
src/Li.cpp
src/P2.cpp
Expand Down Expand Up @@ -103,19 +105,48 @@ endif()
# Check for OpenMP ###################################################

find_package(OpenMP QUIET)
if (OPENMP_FOUND)
if(OPENMP_FOUND)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()

# Check for -mpopcnt compiler option #################################
# Check -mpopcnt compiler flag #######################################

if(ENABLE_POPCNT)
CHECK_CXX_COMPILER_FLAG(-mpopcnt MPOPCNT_FLAG)
if(MPOPCNT_FLAG)
set(COPY_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS -Werror)
check_cxx_compiler_flag(-mpopcnt mpopcnt)
set(CMAKE_CXX_FLAGS "${COPY_CXX_FLAGS}")
if(mpopcnt)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mpopcnt")
endif()
endif()

# Check -mpopcntd compiler flag ######################################

if(ENABLE_POPCNT)
set(COPY_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS -Werror)
check_cxx_compiler_flag(-mpopcntd mpopcntd)
set(CMAKE_CXX_FLAGS "${COPY_CXX_FLAGS}")
if(mpopcntd)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mpopcntd")
endif()
endif()

# Check AVX2 (x86_64) compiler support ##############################

check_cxx_source_compiles("
#include <immintrin.h>
__attribute__ ((target (\"avx2\")))
void func() { __m256i v = _mm256_setzero_si256(); }
int x = __builtin_cpu_supports(\"avx2\");
int main() { return 0; }"
AVX2)

if(AVX2)
add_definitions(-DHAVE_AVX2)
endif()

# Check if int128_t type exists ######################################

check_type_size("int128_t" INT128_T)
Expand Down Expand Up @@ -164,11 +195,11 @@ check_cxx_source_runs("
return 0;
}"
HAVE_LIBDIVIDE)
use_libdivide)

# Include S2_easy* source files ######################################

if(HAVE_LIBDIVIDE)
if(use_libdivide)
set(LIB_SRC ${LIB_SRC} src/deleglise-rivat/S2_easy_libdivide.cpp)
if(ENABLE_MPI)
set(LIB_SRC ${LIB_SRC} src/mpi/deleglise-rivat/S2_easy_mpi_libdivide.cpp)
Expand Down
2 changes: 1 addition & 1 deletion COPYING
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
BSD 2-Clause License

Copyright (c) 2013 - 2016, Kim Walisch.
Copyright (c) 2013 - 2017, Kim Walisch.
All rights reserved.

Redistribution and use in source and binary forms, with or without
Expand Down
13 changes: 13 additions & 0 deletions ChangeLog
Original file line number Diff line number Diff line change
@@ -1,3 +1,16 @@
2017-03-04 Kim Walisch <[email protected]>

Version 3.6 released.

This version features a new AVX2 popcount algorithm which
computes the hard special leaves up to 15% faster on x86 CPUs
with AVX2 support (2013 or later).

* BitSieve-popcnt.cpp: New AVX2 popcount algorithm.
* popcnt.hpp: Fix clang performance bug.
* primecount.cpp: Fix clang time measuring.
* CMakeLists.txt: Add AVX2 check.

2016-12-16 Kim Walisch <[email protected]>

Version 3.5 released.
Expand Down
39 changes: 20 additions & 19 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,14 @@ used to compute several world records e.g.

Binaries
--------
Below are the latest precompiled binaries for Windows 64-bit, Linux and OS X.
Below are the latest primecount binaries for Windows 64-bit, Linux and macOS.
These binaries are statically linked and require a CPU which supports the POPCNT
instruction (2010 or later).
instruction (2010 or later). primecount also uses the AVX2 instruction set (if
available) to speed up the computation of the hard special leaves.

* [primecount-3.5-win64.zip](https://dl.bintray.com/kimwalisch/primecount/primecount-3.5-win64.zip), 404K
* [primecount-3.5-linux-x64.tar.gz](https://dl.bintray.com/kimwalisch/primecount/primecount-3.5-linux-x64.tar.gz), 915K
* [primecount-3.5-macOS-x64.tar.gz](https://dl.bintray.com/kimwalisch/primecount/primecount-3.5-macOS-x64.tar.gz), 688K
* [primecount-3.6-win64.zip](https://dl.bintray.com/kimwalisch/primecount/primecount-3.6-win64.zip), 400 KB
* [primecount-3.6-linux-x64.tar.gz](https://dl.bintray.com/kimwalisch/primecount/primecount-3.6-linux-x64.tar.gz), 1 MB
* [primecount-3.6-macOS-x64.zip](https://dl.bintray.com/kimwalisch/primecount/primecount-3.6-macOS-x64.zip), 899 KB
* Binaries with backup functionality are available [here](https://github.com/kimwalisch/primecount/tree/backup#primecount-backup)

Usage examples
Expand Down Expand Up @@ -207,64 +208,64 @@ Benchmarks
<td>3,204,941,750,802</td>
<td>4.01s</td>
<td>1.96s</td>
<td>0.43s</td>
<td>0.38s</td>
<td>0.22s</td>
</tr>
<tr align="right">
<td>10<sup>15</sup></td>
<td>29,844,570,422,669</td>
<td>27.75s</td>
<td>12.08s</td>
<td>1.65s</td>
<td>0.77s</td>
<td>1.52s</td>
<td>0.76s</td>
</tr>
<tr align="right">
<td>10<sup>16</sup></td>
<td>279,238,341,033,925</td>
<td>232.30s</td>
<td>92.09s</td>
<td>7.31s</td>
<td>2.83s</td>
<td>6.87s</td>
<td>2.67s</td>
</tr>
<tr align="right">
<td>10<sup>17</sup></td>
<td>2,623,557,157,654,233</td>
<td>1,836.73s</td>
<td>731.35s</td>
<td>33.66s</td>
<td>11.20s</td>
<td>31.63s</td>
<td>10.66s</td>
</tr>
<tr align="right">
<td>10<sup>18</sup></td>
<td>24,739,954,287,740,860</td>
<td>14,949.16s</td>
<td>6,631.73s</td>
<td>157.15s</td>
<td>47.07s</td>
<td>146.55s</td>
<td>44.54s</td>
</tr>
<tr align="right">
<td>10<sup>19</sup></td>
<td>234,057,667,276,344,607</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>225.08s</td>
<td>209.57s</td>
</tr>
<tr align="right">
<td>10<sup>20</sup></td>
<td>2,220,819,602,560,918,840</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>1,029.41s</td>
<td>939.88s</td>
</tr>
<tr align="right">
<td>10<sup>21</sup></td>
<td>21,127,269,486,018,731,928</td>
<td>NaN</td>
<td>NaN</td>
<td>NaN</td>
<td>4,867.45s</td>
<td>4,536.14s</td>
</tr>
<tr align="right">
<td>10<sup>22</sup></td>
Expand All @@ -278,14 +279,14 @@ Benchmarks

The benchmarks above were run on an Intel Core i7-6700 CPU (4 x 3.4 GHz) from
2015 using a Linux x64 operating system and primecount was compiled using
GCC 5.2.
GCC 5.4.

Build instructions
------------------
You need to have installed a C++ compiler, cmake and make to build primecount.

Download
[primecount-3.5.zip](https://github.com/kimwalisch/primecount/archive/v3.5.zip)
[primecount-3.6.zip](https://github.com/kimwalisch/primecount/archive/v3.6.zip)
and build it using:

```sh
Expand Down
4 changes: 0 additions & 4 deletions appveyor.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,6 @@

version: 1.0.{build}

branches:
only:
- master

platform:
- x86
- x64
Expand Down
2 changes: 1 addition & 1 deletion doc/primecount-MPI.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ sudo apt-get install g++ make cmake libopenmpi-dev openmpi-bin
```

Then download
[primecount-3.5.zip](https://github.com/kimwalisch/primecount/archive/v3.5.zip)
[primecount-3.6.zip](https://github.com/kimwalisch/primecount/archive/v3.6.zip)
and build it using:
```sh
cmake -DENABLE_MPI=ON .
Expand Down
6 changes: 3 additions & 3 deletions include/PhiTiny.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
///
/// @file PhiTiny.hpp
///
/// Copyright (C) 2016 Kim Walisch, <[email protected]>
/// Copyright (C) 2017 Kim Walisch, <[email protected]>
///
/// This file is distributed under the BSD License. See the COPYING
/// file in the top level directory.
Expand Down Expand Up @@ -56,6 +56,8 @@ class PhiTiny {
static const int totients[7];
};

extern const PhiTiny phiTiny;

inline bool is_phi_tiny(int64_t a)
{
return PhiTiny::is_tiny(a);
Expand All @@ -66,7 +68,6 @@ inline bool is_phi_tiny(int64_t a)
template <typename X, typename A>
typename prt::make_signed<X>::type phi_tiny(X x, A a)
{
extern const PhiTiny phiTiny;
return phiTiny.phi(x, a);
}

Expand All @@ -75,7 +76,6 @@ typename prt::make_signed<X>::type phi_tiny(X x, A a)
template <typename X, typename A>
X phi_tiny(X x, A a)
{
extern const PhiTiny phiTiny;
return phiTiny.phi(x, a);
}

Expand Down
Loading

0 comments on commit b926763

Please sign in to comment.