From e2a9777c955208d617417b109b20cfc4e4df7978 Mon Sep 17 00:00:00 2001 From: Michael Basaman Date: Wed, 30 May 2018 05:19:23 +0700 Subject: [PATCH 1/5] intial checkin --- .../backend/cpu/crypto/cryptonight_aesni.h | 419 +++++++++++++++++- xmrstak/backend/cpu/minethd.cpp | 288 +++++++++++- xmrstak/backend/cpu/minethd.hpp | 1 + 3 files changed, 700 insertions(+), 8 deletions(-) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index f06ae4e24..6c3d80923 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -724,12 +724,18 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto extra_hashes[ctx[1]->hash_state[0] & 3](ctx[1]->hash_state, 200, (char*)output + 32); } -#define CN_STEP1(a, b, c, l, ptr, idx) \ +#define CN_STEP1_A(a, b, c, l, ptr, idx) \ ptr = (__m128i *)&l[idx & MASK]; \ if(PREFETCH) \ - _mm_prefetch((const char*)ptr, _MM_HINT_T0); \ + _mm_prefetch((const char*)ptr, _MM_HINT_T0); + +#define CN_STEP1_B(a, b, c, l, ptr, idx) \ c = _mm_load_si128(ptr); +#define CN_STEP1(a, b, c, l, ptr, idx) \ + CN_STEP1_A(a, b, c, l, ptr, idx) \ + CN_STEP1_B(a, b, c, l, ptr, idx) + #define CN_STEP2(a, b, c, l, ptr, idx) \ if(SOFT_AES) \ c = soft_aesenc(c, a); \ @@ -741,13 +747,19 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto else \ _mm_store_si128(ptr, b);\ -#define CN_STEP3(a, b, c, l, ptr, idx) \ +#define CN_STEP3_A(a, b, c, l, ptr, idx) \ idx = _mm_cvtsi128_si64(c); \ ptr = (__m128i *)&l[idx & MASK]; \ if(PREFETCH) \ - _mm_prefetch((const char*)ptr, _MM_HINT_T0); \ + _mm_prefetch((const char*)ptr, _MM_HINT_T0); + +#define CN_STEP3_B(a, b, c, l, ptr, idx) \ b = _mm_load_si128(ptr); +#define CN_STEP3(a, b, c, l, ptr, idx) \ + CN_STEP3_A(a, b, c, l, ptr, idx) \ + CN_STEP3_B(a, b, c, l, ptr, idx) + #define CN_STEP4(a, b, c, l, mc, ptr, idx) \ lo = _umul128(idx, _mm_cvtsi128_si64(b), &hi); \ a = _mm_add_epi64(a, _mm_set_epi64x(lo, hi)); \ @@ -770,6 +782,20 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto idx = d ^ q; \ } +#define CN2_STEP1(a, b, c, l, mc, ptr, idx) \ + CN_STEP3_B(a, c, b, l, ptr, idx) \ + CN_STEP4(a, c, b, l, mc, ptr, idx) \ + CN_STEP1_A(a, b, c, l, ptr, idx) + +#define CN2_STEP2(a, b, c, l, mc, ptr, idx) \ + CN_STEP1_B(a, b, c, l, ptr, idx) \ + CN_STEP2(a, b, c, l, ptr, idx) \ + CN_STEP3_A(a, b, c, l, ptr, idx) + +#define CN2_STEP3(a, b, c, l, mc, ptr, idx) \ + CN_STEP3_B(a, b, c, l, ptr, idx) \ + CN_STEP4(a, b, c, l, mc, ptr, idx) + #define CONST_INIT(ctx, n) \ __m128i mc##n = _mm_set_epi64x(*reinterpret_cast(reinterpret_cast(input) + n * len + 35) ^ \ *(reinterpret_cast((ctx)->hash_state) + 24), 0); @@ -1100,3 +1126,388 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i); } } + +// this seems to improve PREFETCH performance on a few CPU's that I tried with monero7 +// +// #! /bin/sh +// INDEX=0 +// while [ "$INDEX" -lt 20 ] +// do +// echo "CONST_INIT(ctx[$INDEX], $INDEX);" +// INDEX=`expr $INDEX + 1` +// done +// +// feel free to clean up the macros and comments if you want it +// +// 452YzXHGTKVf6a9zbqaSBLPHeNsZVTvkhLPUv2hn6oRgXNP95ikasL64nC8oeqXmMSbKTeMfPbVHNfF8otAuCqHXEEWVxxw +// +template +void cryptonight_twenty_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) +{ + constexpr size_t MASK = cn_select_mask(); + constexpr size_t ITERATIONS = cn_select_iter(); + constexpr size_t MEM = cn_select_memory(); + + if((ALGO == cryptonight_monero || ALGO == cryptonight_aeon) && len < 43) + { + memset(output, 0, 32 * 5); + return; + } + + for (size_t i = 0; i < 20; i++) + { + keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200); + cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state); + } + + CONST_INIT(ctx[0], 0); + CONST_INIT(ctx[1], 1); + CONST_INIT(ctx[2], 2); + CONST_INIT(ctx[3], 3); + CONST_INIT(ctx[4], 4); + CONST_INIT(ctx[5], 5); + CONST_INIT(ctx[6], 6); + CONST_INIT(ctx[7], 7); + CONST_INIT(ctx[8], 8); + CONST_INIT(ctx[9], 9); + CONST_INIT(ctx[10], 10); + CONST_INIT(ctx[11], 11); + CONST_INIT(ctx[12], 12); + CONST_INIT(ctx[13], 13); + CONST_INIT(ctx[14], 14); + CONST_INIT(ctx[15], 15); + CONST_INIT(ctx[16], 16); + CONST_INIT(ctx[17], 17); + CONST_INIT(ctx[18], 18); + CONST_INIT(ctx[19], 19); + + uint8_t* l0 = ctx[0]->long_state; + uint8_t* l1 = ctx[1]->long_state; + uint8_t* l2 = ctx[2]->long_state; + uint8_t* l3 = ctx[3]->long_state; + uint8_t* l4 = ctx[4]->long_state; + uint8_t* l5 = ctx[5]->long_state; + uint8_t* l6 = ctx[6]->long_state; + uint8_t* l7 = ctx[7]->long_state; + uint8_t* l8 = ctx[8]->long_state; + uint8_t* l9 = ctx[9]->long_state; + uint8_t* l10 = ctx[10]->long_state; + uint8_t* l11 = ctx[11]->long_state; + uint8_t* l12 = ctx[12]->long_state; + uint8_t* l13 = ctx[13]->long_state; + uint8_t* l14 = ctx[14]->long_state; + uint8_t* l15 = ctx[15]->long_state; + uint8_t* l16 = ctx[16]->long_state; + uint8_t* l17 = ctx[17]->long_state; + uint8_t* l18 = ctx[18]->long_state; + uint8_t* l19 = ctx[19]->long_state; + + uint64_t* h0 = (uint64_t*)ctx[0]->hash_state; + uint64_t* h1 = (uint64_t*)ctx[1]->hash_state; + uint64_t* h2 = (uint64_t*)ctx[2]->hash_state; + uint64_t* h3 = (uint64_t*)ctx[3]->hash_state; + uint64_t* h4 = (uint64_t*)ctx[4]->hash_state; + uint64_t* h5 = (uint64_t*)ctx[5]->hash_state; + uint64_t* h6 = (uint64_t*)ctx[6]->hash_state; + uint64_t* h7 = (uint64_t*)ctx[7]->hash_state; + uint64_t* h8 = (uint64_t*)ctx[8]->hash_state; + uint64_t* h9 = (uint64_t*)ctx[9]->hash_state; + uint64_t* h10 = (uint64_t*)ctx[10]->hash_state; + uint64_t* h11 = (uint64_t*)ctx[11]->hash_state; + uint64_t* h12 = (uint64_t*)ctx[12]->hash_state; + uint64_t* h13 = (uint64_t*)ctx[13]->hash_state; + uint64_t* h14 = (uint64_t*)ctx[14]->hash_state; + uint64_t* h15 = (uint64_t*)ctx[15]->hash_state; + uint64_t* h16 = (uint64_t*)ctx[16]->hash_state; + uint64_t* h17 = (uint64_t*)ctx[17]->hash_state; + uint64_t* h18 = (uint64_t*)ctx[18]->hash_state; + uint64_t* h19 = (uint64_t*)ctx[19]->hash_state; + + __m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]); + __m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]); + __m128i ax2 = _mm_set_epi64x(h2[1] ^ h2[5], h2[0] ^ h2[4]); + __m128i ax3 = _mm_set_epi64x(h3[1] ^ h3[5], h3[0] ^ h3[4]); + __m128i ax4 = _mm_set_epi64x(h4[1] ^ h4[5], h4[0] ^ h4[4]); + __m128i ax5 = _mm_set_epi64x(h5[1] ^ h5[5], h5[0] ^ h5[4]); + __m128i ax6 = _mm_set_epi64x(h6[1] ^ h6[5], h6[0] ^ h6[4]); + __m128i ax7 = _mm_set_epi64x(h7[1] ^ h7[5], h7[0] ^ h7[4]); + __m128i ax8 = _mm_set_epi64x(h8[1] ^ h8[5], h8[0] ^ h8[4]); + __m128i ax9 = _mm_set_epi64x(h9[1] ^ h9[5], h9[0] ^ h9[4]); + __m128i ax10 = _mm_set_epi64x(h10[1] ^ h10[5], h10[0] ^ h10[4]); + __m128i ax11 = _mm_set_epi64x(h11[1] ^ h11[5], h11[0] ^ h11[4]); + __m128i ax12 = _mm_set_epi64x(h12[1] ^ h12[5], h12[0] ^ h12[4]); + __m128i ax13 = _mm_set_epi64x(h13[1] ^ h13[5], h13[0] ^ h13[4]); + __m128i ax14 = _mm_set_epi64x(h14[1] ^ h14[5], h14[0] ^ h14[4]); + __m128i ax15 = _mm_set_epi64x(h15[1] ^ h15[5], h15[0] ^ h15[4]); + __m128i ax16 = _mm_set_epi64x(h16[1] ^ h16[5], h16[0] ^ h16[4]); + __m128i ax17 = _mm_set_epi64x(h17[1] ^ h17[5], h17[0] ^ h17[4]); + __m128i ax18 = _mm_set_epi64x(h18[1] ^ h18[5], h18[0] ^ h18[4]); + __m128i ax19 = _mm_set_epi64x(h19[1] ^ h19[5], h19[0] ^ h19[4]); + + __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); + __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); + __m128i bx2 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]); + __m128i bx3 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]); + __m128i bx4 = _mm_set_epi64x(h4[3] ^ h4[7], h4[2] ^ h4[6]); + __m128i bx5 = _mm_set_epi64x(h5[3] ^ h5[7], h5[2] ^ h5[6]); + __m128i bx6 = _mm_set_epi64x(h6[3] ^ h6[7], h6[2] ^ h6[6]); + __m128i bx7 = _mm_set_epi64x(h7[3] ^ h7[7], h7[2] ^ h7[6]); + __m128i bx8 = _mm_set_epi64x(h8[3] ^ h8[7], h8[2] ^ h8[6]); + __m128i bx9 = _mm_set_epi64x(h9[3] ^ h9[7], h9[2] ^ h9[6]); + __m128i bx10 = _mm_set_epi64x(h10[3] ^ h10[7], h10[2] ^ h10[6]); + __m128i bx11 = _mm_set_epi64x(h11[3] ^ h11[7], h11[2] ^ h11[6]); + __m128i bx12 = _mm_set_epi64x(h12[3] ^ h12[7], h12[2] ^ h12[6]); + __m128i bx13 = _mm_set_epi64x(h13[3] ^ h13[7], h13[2] ^ h13[6]); + __m128i bx14 = _mm_set_epi64x(h14[3] ^ h14[7], h14[2] ^ h14[6]); + __m128i bx15 = _mm_set_epi64x(h15[3] ^ h15[7], h15[2] ^ h15[6]); + __m128i bx16 = _mm_set_epi64x(h16[3] ^ h16[7], h16[2] ^ h16[6]); + __m128i bx17 = _mm_set_epi64x(h17[3] ^ h17[7], h17[2] ^ h17[6]); + __m128i bx18 = _mm_set_epi64x(h18[3] ^ h18[7], h18[2] ^ h18[6]); + __m128i bx19 = _mm_set_epi64x(h19[3] ^ h19[7], h19[2] ^ h19[6]); + + __m128i cx0 = _mm_set_epi64x(0, 0); + __m128i cx1 = _mm_set_epi64x(0, 0); + __m128i cx2 = _mm_set_epi64x(0, 0); + __m128i cx3 = _mm_set_epi64x(0, 0); + __m128i cx4 = _mm_set_epi64x(0, 0); + __m128i cx5 = _mm_set_epi64x(0, 0); + __m128i cx6 = _mm_set_epi64x(0, 0); + __m128i cx7 = _mm_set_epi64x(0, 0); + __m128i cx8 = _mm_set_epi64x(0, 0); + __m128i cx9 = _mm_set_epi64x(0, 0); + __m128i cx10 = _mm_set_epi64x(0, 0); + __m128i cx11 = _mm_set_epi64x(0, 0); + __m128i cx12 = _mm_set_epi64x(0, 0); + __m128i cx13 = _mm_set_epi64x(0, 0); + __m128i cx14 = _mm_set_epi64x(0, 0); + __m128i cx15 = _mm_set_epi64x(0, 0); + __m128i cx16 = _mm_set_epi64x(0, 0); + __m128i cx17 = _mm_set_epi64x(0, 0); + __m128i cx18 = _mm_set_epi64x(0, 0); + __m128i cx19 = _mm_set_epi64x(0, 0); + + uint64_t idx0 = _mm_cvtsi128_si64(ax0); + uint64_t idx1 = _mm_cvtsi128_si64(ax1); + uint64_t idx2 = _mm_cvtsi128_si64(ax2); + uint64_t idx3 = _mm_cvtsi128_si64(ax3); + uint64_t idx4 = _mm_cvtsi128_si64(ax4); + uint64_t idx5 = _mm_cvtsi128_si64(ax5); + uint64_t idx6 = _mm_cvtsi128_si64(ax6); + uint64_t idx7 = _mm_cvtsi128_si64(ax7); + uint64_t idx8 = _mm_cvtsi128_si64(ax8); + uint64_t idx9 = _mm_cvtsi128_si64(ax9); + uint64_t idx10 = _mm_cvtsi128_si64(ax10); + uint64_t idx11 = _mm_cvtsi128_si64(ax11); + uint64_t idx12 = _mm_cvtsi128_si64(ax12); + uint64_t idx13 = _mm_cvtsi128_si64(ax13); + uint64_t idx14 = _mm_cvtsi128_si64(ax14); + uint64_t idx15 = _mm_cvtsi128_si64(ax15); + uint64_t idx16 = _mm_cvtsi128_si64(ax16); + uint64_t idx17 = _mm_cvtsi128_si64(ax17); + uint64_t idx18 = _mm_cvtsi128_si64(ax18); + uint64_t idx19 = _mm_cvtsi128_si64(ax19); + + __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4, *ptr5, *ptr6, *ptr7, *ptr8, *ptr9; + __m128i *ptr10, *ptr11, *ptr12, *ptr13, *ptr14, *ptr15, *ptr16, *ptr17, *ptr18, *ptr19; + + uint64_t hi, lo; + + CN_STEP1_A(ax0, bx0, cx0, l0, ptr0, idx0); + CN_STEP1_A(ax1, bx1, cx1, l1, ptr1, idx1); + CN_STEP1_A(ax2, bx2, cx2, l2, ptr2, idx2); + CN_STEP1_A(ax3, bx3, cx3, l3, ptr3, idx3); + CN_STEP1_A(ax4, bx4, cx4, l4, ptr4, idx4); + CN_STEP1_A(ax5, bx5, cx5, l5, ptr5, idx5); + CN_STEP1_A(ax6, bx6, cx6, l6, ptr6, idx6); + CN_STEP1_A(ax7, bx7, cx7, l7, ptr7, idx7); + CN_STEP1_A(ax8, bx8, cx8, l8, ptr8, idx8); + CN_STEP1_A(ax9, bx9, cx9, l9, ptr9, idx9); + CN_STEP1_A(ax10, bx10, cx10, l10, ptr10, idx10); + CN_STEP1_A(ax11, bx11, cx11, l11, ptr11, idx11); + CN_STEP1_A(ax12, bx12, cx12, l12, ptr12, idx12); + CN_STEP1_A(ax13, bx13, cx13, l13, ptr13, idx13); + CN_STEP1_A(ax14, bx14, cx14, l14, ptr14, idx14); + CN_STEP1_A(ax15, bx15, cx15, l15, ptr15, idx15); + CN_STEP1_A(ax16, bx16, cx16, l16, ptr16, idx16); + CN_STEP1_A(ax17, bx17, cx17, l17, ptr17, idx17); + CN_STEP1_A(ax18, bx18, cx18, l18, ptr18, idx18); + CN_STEP1_A(ax19, bx19, cx19, l19, ptr19, idx19); + + CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, bx4, cx4, l4, mc4, ptr4, idx4); + CN2_STEP2(ax5, bx5, cx5, l5, mc5, ptr5, idx5); + CN2_STEP2(ax6, bx6, cx6, l6, mc6, ptr6, idx6); + CN2_STEP2(ax7, bx7, cx7, l7, mc7, ptr7, idx7); + CN2_STEP2(ax8, bx8, cx8, l8, mc8, ptr8, idx8); + CN2_STEP2(ax9, bx9, cx9, l9, mc9, ptr9, idx9); + CN2_STEP2(ax10, bx10, cx10, l10, mc10, ptr10, idx10); + CN2_STEP2(ax11, bx11, cx11, l11, mc11, ptr11, idx11); + CN2_STEP2(ax12, bx12, cx12, l12, mc12, ptr12, idx12); + CN2_STEP2(ax13, bx13, cx13, l13, mc13, ptr13, idx13); + CN2_STEP2(ax14, bx14, cx14, l14, mc14, ptr14, idx14); + CN2_STEP2(ax15, bx15, cx15, l15, mc15, ptr15, idx15); + CN2_STEP2(ax16, bx16, cx16, l16, mc16, ptr16, idx16); + CN2_STEP2(ax17, bx17, cx17, l17, mc17, ptr17, idx17); + CN2_STEP2(ax18, bx18, cx18, l18, mc18, ptr18, idx18); + CN2_STEP2(ax19, bx19, cx19, l19, mc19, ptr19, idx19); + + CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP1(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP1(ax5, cx5, bx5, l5, mc5, ptr5, idx5); + CN2_STEP1(ax6, cx6, bx6, l6, mc6, ptr6, idx6); + CN2_STEP1(ax7, cx7, bx7, l7, mc7, ptr7, idx7); + CN2_STEP1(ax8, cx8, bx8, l8, mc8, ptr8, idx8); + CN2_STEP1(ax9, cx9, bx9, l9, mc9, ptr9, idx9); + CN2_STEP1(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + CN2_STEP1(ax11, cx11, bx11, l11, mc11, ptr11, idx11); + CN2_STEP1(ax12, cx12, bx12, l12, mc12, ptr12, idx12); + CN2_STEP1(ax13, cx13, bx13, l13, mc13, ptr13, idx13); + CN2_STEP1(ax14, cx14, bx14, l14, mc14, ptr14, idx14); + CN2_STEP1(ax15, cx15, bx15, l15, mc15, ptr15, idx15); + CN2_STEP1(ax16, cx16, bx16, l16, mc16, ptr16, idx16); + CN2_STEP1(ax17, cx17, bx17, l17, mc17, ptr17, idx17); + CN2_STEP1(ax18, cx18, bx18, l18, mc18, ptr18, idx18); + CN2_STEP1(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + + CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP2(ax5, cx5, bx5, l5, mc5, ptr5, idx5); + CN2_STEP2(ax6, cx6, bx6, l6, mc6, ptr6, idx6); + CN2_STEP2(ax7, cx7, bx7, l7, mc7, ptr7, idx7); + CN2_STEP2(ax8, cx8, bx8, l8, mc8, ptr8, idx8); + CN2_STEP2(ax9, cx9, bx9, l9, mc9, ptr9, idx9); + CN2_STEP2(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + CN2_STEP2(ax11, cx11, bx11, l11, mc11, ptr11, idx11); + CN2_STEP2(ax12, cx12, bx12, l12, mc12, ptr12, idx12); + CN2_STEP2(ax13, cx13, bx13, l13, mc13, ptr13, idx13); + CN2_STEP2(ax14, cx14, bx14, l14, mc14, ptr14, idx14); + CN2_STEP2(ax15, cx15, bx15, l15, mc15, ptr15, idx15); + CN2_STEP2(ax16, cx16, bx16, l16, mc16, ptr16, idx16); + CN2_STEP2(ax17, cx17, bx17, l17, mc17, ptr17, idx17); + CN2_STEP2(ax18, cx18, bx18, l18, mc18, ptr18, idx18); + CN2_STEP2(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + + for (size_t i = 1; i < ITERATIONS/2; i++) + { + CN2_STEP1(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + CN2_STEP1(ax4, bx4, cx4, l4, mc4, ptr4, idx4); + CN2_STEP1(ax5, bx5, cx5, l5, mc5, ptr5, idx5); + CN2_STEP1(ax6, bx6, cx6, l6, mc6, ptr6, idx6); + CN2_STEP1(ax7, bx7, cx7, l7, mc7, ptr7, idx7); + CN2_STEP1(ax8, bx8, cx8, l8, mc8, ptr8, idx8); + CN2_STEP1(ax9, bx9, cx9, l9, mc9, ptr9, idx9); + CN2_STEP1(ax10, bx10, cx10, l10, mc10, ptr10, idx10); + CN2_STEP1(ax11, bx11, cx11, l11, mc11, ptr11, idx11); + CN2_STEP1(ax12, bx12, cx12, l12, mc12, ptr12, idx12); + CN2_STEP1(ax13, bx13, cx13, l13, mc13, ptr13, idx13); + CN2_STEP1(ax14, bx14, cx14, l14, mc14, ptr14, idx14); + CN2_STEP1(ax15, bx15, cx15, l15, mc15, ptr15, idx15); + CN2_STEP1(ax16, bx16, cx16, l16, mc16, ptr16, idx16); + CN2_STEP1(ax17, bx17, cx17, l17, mc17, ptr17, idx17); + CN2_STEP1(ax18, bx18, cx18, l18, mc18, ptr18, idx18); + CN2_STEP1(ax19, bx19, cx19, l19, mc19, ptr19, idx19); + + CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, bx4, cx4, l4, mc4, ptr4, idx4); + CN2_STEP2(ax5, bx5, cx5, l5, mc5, ptr5, idx5); + CN2_STEP2(ax6, bx6, cx6, l6, mc6, ptr6, idx6); + CN2_STEP2(ax7, bx7, cx7, l7, mc7, ptr7, idx7); + CN2_STEP2(ax8, bx8, cx8, l8, mc8, ptr8, idx8); + CN2_STEP2(ax9, bx9, cx9, l9, mc9, ptr9, idx9); + CN2_STEP2(ax10, bx10, cx10, l10, mc10, ptr10, idx10); + CN2_STEP2(ax11, bx11, cx11, l11, mc11, ptr11, idx11); + CN2_STEP2(ax12, bx12, cx12, l12, mc12, ptr12, idx12); + CN2_STEP2(ax13, bx13, cx13, l13, mc13, ptr13, idx13); + CN2_STEP2(ax14, bx14, cx14, l14, mc14, ptr14, idx14); + CN2_STEP2(ax15, bx15, cx15, l15, mc15, ptr15, idx15); + CN2_STEP2(ax16, bx16, cx16, l16, mc16, ptr16, idx16); + CN2_STEP2(ax17, bx17, cx17, l17, mc17, ptr17, idx17); + CN2_STEP2(ax18, bx18, cx18, l18, mc18, ptr18, idx18); + CN2_STEP2(ax19, bx19, cx19, l19, mc19, ptr19, idx19); + + CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP1(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP1(ax5, cx5, bx5, l5, mc5, ptr5, idx5); + CN2_STEP1(ax6, cx6, bx6, l6, mc6, ptr6, idx6); + CN2_STEP1(ax7, cx7, bx7, l7, mc7, ptr7, idx7); + CN2_STEP1(ax8, cx8, bx8, l8, mc8, ptr8, idx8); + CN2_STEP1(ax9, cx9, bx9, l9, mc9, ptr9, idx9); + CN2_STEP1(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + CN2_STEP1(ax11, cx11, bx11, l11, mc11, ptr11, idx11); + CN2_STEP1(ax12, cx12, bx12, l12, mc12, ptr12, idx12); + CN2_STEP1(ax13, cx13, bx13, l13, mc13, ptr13, idx13); + CN2_STEP1(ax14, cx14, bx14, l14, mc14, ptr14, idx14); + CN2_STEP1(ax15, cx15, bx15, l15, mc15, ptr15, idx15); + CN2_STEP1(ax16, cx16, bx16, l16, mc16, ptr16, idx16); + CN2_STEP1(ax17, cx17, bx17, l17, mc17, ptr17, idx17); + CN2_STEP1(ax18, cx18, bx18, l18, mc18, ptr18, idx18); + CN2_STEP1(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + + CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP2(ax5, cx5, bx5, l5, mc5, ptr5, idx5); + CN2_STEP2(ax6, cx6, bx6, l6, mc6, ptr6, idx6); + CN2_STEP2(ax7, cx7, bx7, l7, mc7, ptr7, idx7); + CN2_STEP2(ax8, cx8, bx8, l8, mc8, ptr8, idx8); + CN2_STEP2(ax9, cx9, bx9, l9, mc9, ptr9, idx9); + CN2_STEP2(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + CN2_STEP2(ax11, cx11, bx11, l11, mc11, ptr11, idx11); + CN2_STEP2(ax12, cx12, bx12, l12, mc12, ptr12, idx12); + CN2_STEP2(ax13, cx13, bx13, l13, mc13, ptr13, idx13); + CN2_STEP2(ax14, cx14, bx14, l14, mc14, ptr14, idx14); + CN2_STEP2(ax15, cx15, bx15, l15, mc15, ptr15, idx15); + CN2_STEP2(ax16, cx16, bx16, l16, mc16, ptr16, idx16); + CN2_STEP2(ax17, cx17, bx17, l17, mc17, ptr17, idx17); + CN2_STEP2(ax18, cx18, bx18, l18, mc18, ptr18, idx18); + CN2_STEP2(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + } + + CN2_STEP3(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP3(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP3(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP3(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP3(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP3(ax5, cx5, bx5, l5, mc5, ptr5, idx5); + CN2_STEP3(ax6, cx6, bx6, l6, mc6, ptr6, idx6); + CN2_STEP3(ax7, cx7, bx7, l7, mc7, ptr7, idx7); + CN2_STEP3(ax8, cx8, bx8, l8, mc8, ptr8, idx8); + CN2_STEP3(ax9, cx9, bx9, l9, mc9, ptr9, idx9); + CN2_STEP3(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + CN2_STEP3(ax11, cx11, bx11, l11, mc11, ptr11, idx11); + CN2_STEP3(ax12, cx12, bx12, l12, mc12, ptr12, idx12); + CN2_STEP3(ax13, cx13, bx13, l13, mc13, ptr13, idx13); + CN2_STEP3(ax14, cx14, bx14, l14, mc14, ptr14, idx14); + CN2_STEP3(ax15, cx15, bx15, l15, mc15, ptr15, idx15); + CN2_STEP3(ax16, cx16, bx16, l16, mc16, ptr16, idx16); + CN2_STEP3(ax17, cx17, bx17, l17, mc17, ptr17, idx17); + CN2_STEP3(ax18, cx18, bx18, l18, mc18, ptr18, idx18); + CN2_STEP3(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + + for (size_t i = 0; i < 20; i++) + { + cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); + keccakf((uint64_t*)ctx[i]->hash_state, 24); + extra_hashes[ctx[i]->hash_state[0] & 3](ctx[i]->hash_state, 200, (char*)output + 32 * i); + } +} diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 8f796678e..65e856d33 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -119,6 +119,9 @@ minethd::minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, switch (iMultiway) { + case 6: + oWorkThd = std::thread(&minethd::twenty_work_main, this); + break; case 5: oWorkThd = std::thread(&minethd::penta_work_main, this); break; @@ -181,7 +184,7 @@ cryptonight_ctx* minethd::minethd_alloc_ctx() return nullptr; //Should never happen } -static constexpr size_t MAX_N = 5; +static constexpr size_t MAX_N = 20; bool minethd::self_test() { alloc_msg msg = { 0 }; @@ -276,12 +279,252 @@ bool minethd::self_test() "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 160) == 0; + + hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); + hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx); + bResult &= memcmp(out, + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" + , 640) == 0; } else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_lite) { } else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_monero) { + unsigned char out[32 * MAX_N]; + cn_hash_fun hashf; + cn_hash_fun_multi hashf_multi; + + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf( + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx[0]); + bResult = memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 32) == 0; + + hashf = func_selector(::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf( + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx[0]); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 32) == 0; + + hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 64) == 0; + + hashf_multi = func_multi_selector(2, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 64) == 0; + + hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 96) == 0; + + hashf_multi = func_multi_selector(3, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 96) == 0; + + hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 128) == 0; + + hashf_multi = func_multi_selector(4, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 128) == 0; + + hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 160) == 0; + + hashf_multi = func_multi_selector(5, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 160) == 0; + + hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 640) == 0; + + hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi( + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + "The quick brown fox jumps over the lazy dog" + , 43, out, ctx); + bResult &= memcmp(out, + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" + , 640) == 0; } else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_aeon) { @@ -528,6 +771,11 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, // therefore we will build a binary digit and select the // function as a two digit binary + size_t NN(N); + if(NN > 5) { + NN = 6; + } + uint8_t algv; switch(algo) { @@ -574,6 +822,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -591,6 +843,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -608,6 +864,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -625,6 +885,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -642,7 +906,11 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, - + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_double_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -659,6 +927,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -676,13 +948,17 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash, + cryptonight_twenty_hash }; std::bitset<2> digit; digit.set(0, !bHaveAes); digit.set(1, !bNoPrefetch); - - return func_table[algv << 4 | (N-2) << 2 | digit.to_ulong()]; + + return func_table[algv * 20 | (NN-2) << 2 | digit.to_ulong()]; } void minethd::double_work_main() @@ -705,6 +981,10 @@ void minethd::penta_work_main() multiway_work_main<5u>(); } +void minethd::twenty_work_main() { + multiway_work_main<20u>(); +} + template void minethd::prep_multiway_work(uint8_t *bWorkBlob, uint32_t **piNonce) { diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp index 2d40ce314..8aee370ae 100644 --- a/xmrstak/backend/cpu/minethd.hpp +++ b/xmrstak/backend/cpu/minethd.hpp @@ -46,6 +46,7 @@ class minethd : public iBackend void triple_work_main(); void quad_work_main(); void penta_work_main(); + void twenty_work_main(); uint64_t iJobNo; From fea0ae332dc1c62dec08a5ec204c475075694a61 Mon Sep 17 00:00:00 2001 From: Michael Basaman Date: Fri, 1 Jun 2018 08:12:55 +0700 Subject: [PATCH 2/5] 10 hashes at a time --- .../backend/cpu/crypto/cryptonight_aesni.h | 70 +++++++++---------- xmrstak/backend/cpu/minethd.cpp | 60 ++-------------- 2 files changed, 40 insertions(+), 90 deletions(-) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 6c3d80923..1952cef23 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -1154,7 +1154,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto return; } - for (size_t i = 0; i < 20; i++) + for (size_t i = 0; i < 10; i++) { keccak((const uint8_t *)input + len * i, len, ctx[i]->hash_state, 200); cn_explode_scratchpad((__m128i*)ctx[i]->hash_state, (__m128i*)ctx[i]->long_state); @@ -1170,7 +1170,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CONST_INIT(ctx[7], 7); CONST_INIT(ctx[8], 8); CONST_INIT(ctx[9], 9); - CONST_INIT(ctx[10], 10); + /* CONST_INIT(ctx[10], 10); CONST_INIT(ctx[11], 11); CONST_INIT(ctx[12], 12); CONST_INIT(ctx[13], 13); @@ -1179,7 +1179,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CONST_INIT(ctx[16], 16); CONST_INIT(ctx[17], 17); CONST_INIT(ctx[18], 18); - CONST_INIT(ctx[19], 19); + CONST_INIT(ctx[19], 19); */ uint8_t* l0 = ctx[0]->long_state; uint8_t* l1 = ctx[1]->long_state; @@ -1191,7 +1191,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto uint8_t* l7 = ctx[7]->long_state; uint8_t* l8 = ctx[8]->long_state; uint8_t* l9 = ctx[9]->long_state; - uint8_t* l10 = ctx[10]->long_state; + /* uint8_t* l10 = ctx[10]->long_state; uint8_t* l11 = ctx[11]->long_state; uint8_t* l12 = ctx[12]->long_state; uint8_t* l13 = ctx[13]->long_state; @@ -1200,7 +1200,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto uint8_t* l16 = ctx[16]->long_state; uint8_t* l17 = ctx[17]->long_state; uint8_t* l18 = ctx[18]->long_state; - uint8_t* l19 = ctx[19]->long_state; + uint8_t* l19 = ctx[19]->long_state; */ uint64_t* h0 = (uint64_t*)ctx[0]->hash_state; uint64_t* h1 = (uint64_t*)ctx[1]->hash_state; @@ -1212,7 +1212,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto uint64_t* h7 = (uint64_t*)ctx[7]->hash_state; uint64_t* h8 = (uint64_t*)ctx[8]->hash_state; uint64_t* h9 = (uint64_t*)ctx[9]->hash_state; - uint64_t* h10 = (uint64_t*)ctx[10]->hash_state; + /* uint64_t* h10 = (uint64_t*)ctx[10]->hash_state; uint64_t* h11 = (uint64_t*)ctx[11]->hash_state; uint64_t* h12 = (uint64_t*)ctx[12]->hash_state; uint64_t* h13 = (uint64_t*)ctx[13]->hash_state; @@ -1221,7 +1221,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto uint64_t* h16 = (uint64_t*)ctx[16]->hash_state; uint64_t* h17 = (uint64_t*)ctx[17]->hash_state; uint64_t* h18 = (uint64_t*)ctx[18]->hash_state; - uint64_t* h19 = (uint64_t*)ctx[19]->hash_state; + uint64_t* h19 = (uint64_t*)ctx[19]->hash_state; */ __m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]); __m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]); @@ -1233,7 +1233,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto __m128i ax7 = _mm_set_epi64x(h7[1] ^ h7[5], h7[0] ^ h7[4]); __m128i ax8 = _mm_set_epi64x(h8[1] ^ h8[5], h8[0] ^ h8[4]); __m128i ax9 = _mm_set_epi64x(h9[1] ^ h9[5], h9[0] ^ h9[4]); - __m128i ax10 = _mm_set_epi64x(h10[1] ^ h10[5], h10[0] ^ h10[4]); + /* __m128i ax10 = _mm_set_epi64x(h10[1] ^ h10[5], h10[0] ^ h10[4]); __m128i ax11 = _mm_set_epi64x(h11[1] ^ h11[5], h11[0] ^ h11[4]); __m128i ax12 = _mm_set_epi64x(h12[1] ^ h12[5], h12[0] ^ h12[4]); __m128i ax13 = _mm_set_epi64x(h13[1] ^ h13[5], h13[0] ^ h13[4]); @@ -1242,7 +1242,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto __m128i ax16 = _mm_set_epi64x(h16[1] ^ h16[5], h16[0] ^ h16[4]); __m128i ax17 = _mm_set_epi64x(h17[1] ^ h17[5], h17[0] ^ h17[4]); __m128i ax18 = _mm_set_epi64x(h18[1] ^ h18[5], h18[0] ^ h18[4]); - __m128i ax19 = _mm_set_epi64x(h19[1] ^ h19[5], h19[0] ^ h19[4]); + __m128i ax19 = _mm_set_epi64x(h19[1] ^ h19[5], h19[0] ^ h19[4]); */ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); @@ -1254,7 +1254,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto __m128i bx7 = _mm_set_epi64x(h7[3] ^ h7[7], h7[2] ^ h7[6]); __m128i bx8 = _mm_set_epi64x(h8[3] ^ h8[7], h8[2] ^ h8[6]); __m128i bx9 = _mm_set_epi64x(h9[3] ^ h9[7], h9[2] ^ h9[6]); - __m128i bx10 = _mm_set_epi64x(h10[3] ^ h10[7], h10[2] ^ h10[6]); + /* __m128i bx10 = _mm_set_epi64x(h10[3] ^ h10[7], h10[2] ^ h10[6]); __m128i bx11 = _mm_set_epi64x(h11[3] ^ h11[7], h11[2] ^ h11[6]); __m128i bx12 = _mm_set_epi64x(h12[3] ^ h12[7], h12[2] ^ h12[6]); __m128i bx13 = _mm_set_epi64x(h13[3] ^ h13[7], h13[2] ^ h13[6]); @@ -1263,7 +1263,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto __m128i bx16 = _mm_set_epi64x(h16[3] ^ h16[7], h16[2] ^ h16[6]); __m128i bx17 = _mm_set_epi64x(h17[3] ^ h17[7], h17[2] ^ h17[6]); __m128i bx18 = _mm_set_epi64x(h18[3] ^ h18[7], h18[2] ^ h18[6]); - __m128i bx19 = _mm_set_epi64x(h19[3] ^ h19[7], h19[2] ^ h19[6]); + __m128i bx19 = _mm_set_epi64x(h19[3] ^ h19[7], h19[2] ^ h19[6]); */ __m128i cx0 = _mm_set_epi64x(0, 0); __m128i cx1 = _mm_set_epi64x(0, 0); @@ -1275,7 +1275,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto __m128i cx7 = _mm_set_epi64x(0, 0); __m128i cx8 = _mm_set_epi64x(0, 0); __m128i cx9 = _mm_set_epi64x(0, 0); - __m128i cx10 = _mm_set_epi64x(0, 0); + /* __m128i cx10 = _mm_set_epi64x(0, 0); __m128i cx11 = _mm_set_epi64x(0, 0); __m128i cx12 = _mm_set_epi64x(0, 0); __m128i cx13 = _mm_set_epi64x(0, 0); @@ -1284,7 +1284,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto __m128i cx16 = _mm_set_epi64x(0, 0); __m128i cx17 = _mm_set_epi64x(0, 0); __m128i cx18 = _mm_set_epi64x(0, 0); - __m128i cx19 = _mm_set_epi64x(0, 0); + __m128i cx19 = _mm_set_epi64x(0, 0); */ uint64_t idx0 = _mm_cvtsi128_si64(ax0); uint64_t idx1 = _mm_cvtsi128_si64(ax1); @@ -1296,7 +1296,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto uint64_t idx7 = _mm_cvtsi128_si64(ax7); uint64_t idx8 = _mm_cvtsi128_si64(ax8); uint64_t idx9 = _mm_cvtsi128_si64(ax9); - uint64_t idx10 = _mm_cvtsi128_si64(ax10); + /* uint64_t idx10 = _mm_cvtsi128_si64(ax10); uint64_t idx11 = _mm_cvtsi128_si64(ax11); uint64_t idx12 = _mm_cvtsi128_si64(ax12); uint64_t idx13 = _mm_cvtsi128_si64(ax13); @@ -1305,10 +1305,10 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto uint64_t idx16 = _mm_cvtsi128_si64(ax16); uint64_t idx17 = _mm_cvtsi128_si64(ax17); uint64_t idx18 = _mm_cvtsi128_si64(ax18); - uint64_t idx19 = _mm_cvtsi128_si64(ax19); + uint64_t idx19 = _mm_cvtsi128_si64(ax19); */ __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4, *ptr5, *ptr6, *ptr7, *ptr8, *ptr9; - __m128i *ptr10, *ptr11, *ptr12, *ptr13, *ptr14, *ptr15, *ptr16, *ptr17, *ptr18, *ptr19; + //__m128i *ptr10, *ptr11, *ptr12, *ptr13, *ptr14, *ptr15, *ptr16, *ptr17, *ptr18, *ptr19; uint64_t hi, lo; @@ -1322,7 +1322,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN_STEP1_A(ax7, bx7, cx7, l7, ptr7, idx7); CN_STEP1_A(ax8, bx8, cx8, l8, ptr8, idx8); CN_STEP1_A(ax9, bx9, cx9, l9, ptr9, idx9); - CN_STEP1_A(ax10, bx10, cx10, l10, ptr10, idx10); + /* CN_STEP1_A(ax10, bx10, cx10, l10, ptr10, idx10); CN_STEP1_A(ax11, bx11, cx11, l11, ptr11, idx11); CN_STEP1_A(ax12, bx12, cx12, l12, ptr12, idx12); CN_STEP1_A(ax13, bx13, cx13, l13, ptr13, idx13); @@ -1331,7 +1331,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN_STEP1_A(ax16, bx16, cx16, l16, ptr16, idx16); CN_STEP1_A(ax17, bx17, cx17, l17, ptr17, idx17); CN_STEP1_A(ax18, bx18, cx18, l18, ptr18, idx18); - CN_STEP1_A(ax19, bx19, cx19, l19, ptr19, idx19); + CN_STEP1_A(ax19, bx19, cx19, l19, ptr19, idx19); */ CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); @@ -1343,7 +1343,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax7, bx7, cx7, l7, mc7, ptr7, idx7); CN2_STEP2(ax8, bx8, cx8, l8, mc8, ptr8, idx8); CN2_STEP2(ax9, bx9, cx9, l9, mc9, ptr9, idx9); - CN2_STEP2(ax10, bx10, cx10, l10, mc10, ptr10, idx10); + /* CN2_STEP2(ax10, bx10, cx10, l10, mc10, ptr10, idx10); CN2_STEP2(ax11, bx11, cx11, l11, mc11, ptr11, idx11); CN2_STEP2(ax12, bx12, cx12, l12, mc12, ptr12, idx12); CN2_STEP2(ax13, bx13, cx13, l13, mc13, ptr13, idx13); @@ -1352,7 +1352,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax16, bx16, cx16, l16, mc16, ptr16, idx16); CN2_STEP2(ax17, bx17, cx17, l17, mc17, ptr17, idx17); CN2_STEP2(ax18, bx18, cx18, l18, mc18, ptr18, idx18); - CN2_STEP2(ax19, bx19, cx19, l19, mc19, ptr19, idx19); + CN2_STEP2(ax19, bx19, cx19, l19, mc19, ptr19, idx19); */ CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); @@ -1364,7 +1364,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP1(ax7, cx7, bx7, l7, mc7, ptr7, idx7); CN2_STEP1(ax8, cx8, bx8, l8, mc8, ptr8, idx8); CN2_STEP1(ax9, cx9, bx9, l9, mc9, ptr9, idx9); - CN2_STEP1(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + /* CN2_STEP1(ax10, cx10, bx10, l10, mc10, ptr10, idx10); CN2_STEP1(ax11, cx11, bx11, l11, mc11, ptr11, idx11); CN2_STEP1(ax12, cx12, bx12, l12, mc12, ptr12, idx12); CN2_STEP1(ax13, cx13, bx13, l13, mc13, ptr13, idx13); @@ -1373,7 +1373,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP1(ax16, cx16, bx16, l16, mc16, ptr16, idx16); CN2_STEP1(ax17, cx17, bx17, l17, mc17, ptr17, idx17); CN2_STEP1(ax18, cx18, bx18, l18, mc18, ptr18, idx18); - CN2_STEP1(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + CN2_STEP1(ax19, cx19, bx19, l19, mc19, ptr19, idx19); */ CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); @@ -1385,7 +1385,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax7, cx7, bx7, l7, mc7, ptr7, idx7); CN2_STEP2(ax8, cx8, bx8, l8, mc8, ptr8, idx8); CN2_STEP2(ax9, cx9, bx9, l9, mc9, ptr9, idx9); - CN2_STEP2(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + /* CN2_STEP2(ax10, cx10, bx10, l10, mc10, ptr10, idx10); CN2_STEP2(ax11, cx11, bx11, l11, mc11, ptr11, idx11); CN2_STEP2(ax12, cx12, bx12, l12, mc12, ptr12, idx12); CN2_STEP2(ax13, cx13, bx13, l13, mc13, ptr13, idx13); @@ -1394,7 +1394,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax16, cx16, bx16, l16, mc16, ptr16, idx16); CN2_STEP2(ax17, cx17, bx17, l17, mc17, ptr17, idx17); CN2_STEP2(ax18, cx18, bx18, l18, mc18, ptr18, idx18); - CN2_STEP2(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + CN2_STEP2(ax19, cx19, bx19, l19, mc19, ptr19, idx19); */ for (size_t i = 1; i < ITERATIONS/2; i++) { @@ -1408,7 +1408,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP1(ax7, bx7, cx7, l7, mc7, ptr7, idx7); CN2_STEP1(ax8, bx8, cx8, l8, mc8, ptr8, idx8); CN2_STEP1(ax9, bx9, cx9, l9, mc9, ptr9, idx9); - CN2_STEP1(ax10, bx10, cx10, l10, mc10, ptr10, idx10); + /* CN2_STEP1(ax10, bx10, cx10, l10, mc10, ptr10, idx10); CN2_STEP1(ax11, bx11, cx11, l11, mc11, ptr11, idx11); CN2_STEP1(ax12, bx12, cx12, l12, mc12, ptr12, idx12); CN2_STEP1(ax13, bx13, cx13, l13, mc13, ptr13, idx13); @@ -1417,7 +1417,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP1(ax16, bx16, cx16, l16, mc16, ptr16, idx16); CN2_STEP1(ax17, bx17, cx17, l17, mc17, ptr17, idx17); CN2_STEP1(ax18, bx18, cx18, l18, mc18, ptr18, idx18); - CN2_STEP1(ax19, bx19, cx19, l19, mc19, ptr19, idx19); + CN2_STEP1(ax19, bx19, cx19, l19, mc19, ptr19, idx19); */ CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); @@ -1429,7 +1429,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax7, bx7, cx7, l7, mc7, ptr7, idx7); CN2_STEP2(ax8, bx8, cx8, l8, mc8, ptr8, idx8); CN2_STEP2(ax9, bx9, cx9, l9, mc9, ptr9, idx9); - CN2_STEP2(ax10, bx10, cx10, l10, mc10, ptr10, idx10); + /* CN2_STEP2(ax10, bx10, cx10, l10, mc10, ptr10, idx10); CN2_STEP2(ax11, bx11, cx11, l11, mc11, ptr11, idx11); CN2_STEP2(ax12, bx12, cx12, l12, mc12, ptr12, idx12); CN2_STEP2(ax13, bx13, cx13, l13, mc13, ptr13, idx13); @@ -1438,7 +1438,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax16, bx16, cx16, l16, mc16, ptr16, idx16); CN2_STEP2(ax17, bx17, cx17, l17, mc17, ptr17, idx17); CN2_STEP2(ax18, bx18, cx18, l18, mc18, ptr18, idx18); - CN2_STEP2(ax19, bx19, cx19, l19, mc19, ptr19, idx19); + CN2_STEP2(ax19, bx19, cx19, l19, mc19, ptr19, idx19); */ CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); @@ -1450,7 +1450,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP1(ax7, cx7, bx7, l7, mc7, ptr7, idx7); CN2_STEP1(ax8, cx8, bx8, l8, mc8, ptr8, idx8); CN2_STEP1(ax9, cx9, bx9, l9, mc9, ptr9, idx9); - CN2_STEP1(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + /* CN2_STEP1(ax10, cx10, bx10, l10, mc10, ptr10, idx10); CN2_STEP1(ax11, cx11, bx11, l11, mc11, ptr11, idx11); CN2_STEP1(ax12, cx12, bx12, l12, mc12, ptr12, idx12); CN2_STEP1(ax13, cx13, bx13, l13, mc13, ptr13, idx13); @@ -1459,7 +1459,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP1(ax16, cx16, bx16, l16, mc16, ptr16, idx16); CN2_STEP1(ax17, cx17, bx17, l17, mc17, ptr17, idx17); CN2_STEP1(ax18, cx18, bx18, l18, mc18, ptr18, idx18); - CN2_STEP1(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + CN2_STEP1(ax19, cx19, bx19, l19, mc19, ptr19, idx19); */ CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); @@ -1471,7 +1471,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax7, cx7, bx7, l7, mc7, ptr7, idx7); CN2_STEP2(ax8, cx8, bx8, l8, mc8, ptr8, idx8); CN2_STEP2(ax9, cx9, bx9, l9, mc9, ptr9, idx9); - CN2_STEP2(ax10, cx10, bx10, l10, mc10, ptr10, idx10); + /* CN2_STEP2(ax10, cx10, bx10, l10, mc10, ptr10, idx10); CN2_STEP2(ax11, cx11, bx11, l11, mc11, ptr11, idx11); CN2_STEP2(ax12, cx12, bx12, l12, mc12, ptr12, idx12); CN2_STEP2(ax13, cx13, bx13, l13, mc13, ptr13, idx13); @@ -1480,7 +1480,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax16, cx16, bx16, l16, mc16, ptr16, idx16); CN2_STEP2(ax17, cx17, bx17, l17, mc17, ptr17, idx17); CN2_STEP2(ax18, cx18, bx18, l18, mc18, ptr18, idx18); - CN2_STEP2(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + CN2_STEP2(ax19, cx19, bx19, l19, mc19, ptr19, idx19); */ } CN2_STEP3(ax0, cx0, bx0, l0, mc0, ptr0, idx0); @@ -1493,7 +1493,7 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP3(ax7, cx7, bx7, l7, mc7, ptr7, idx7); CN2_STEP3(ax8, cx8, bx8, l8, mc8, ptr8, idx8); CN2_STEP3(ax9, cx9, bx9, l9, mc9, ptr9, idx9); - CN2_STEP3(ax10, cx10, bx10, l10, mc10, ptr10, idx10); +/* CN2_STEP3(ax10, cx10, bx10, l10, mc10, ptr10, idx10); CN2_STEP3(ax11, cx11, bx11, l11, mc11, ptr11, idx11); CN2_STEP3(ax12, cx12, bx12, l12, mc12, ptr12, idx12); CN2_STEP3(ax13, cx13, bx13, l13, mc13, ptr13, idx13); @@ -1502,9 +1502,9 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP3(ax16, cx16, bx16, l16, mc16, ptr16, idx16); CN2_STEP3(ax17, cx17, bx17, l17, mc17, ptr17, idx17); CN2_STEP3(ax18, cx18, bx18, l18, mc18, ptr18, idx18); - CN2_STEP3(ax19, cx19, bx19, l19, mc19, ptr19, idx19); + CN2_STEP3(ax19, cx19, bx19, l19, mc19, ptr19, idx19); */ - for (size_t i = 0; i < 20; i++) + for (size_t i = 0; i < 10; i++) { cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); keccakf((uint64_t*)ctx[i]->hash_state, 24); diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index 65e856d33..bce619c89 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -281,7 +281,7 @@ bool minethd::self_test() "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 160) == 0; hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); - hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx); + hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx); bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" @@ -293,17 +293,7 @@ bool minethd::self_test() "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" - , 640) == 0; + , 320) == 0; } else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_lite) { @@ -436,16 +426,6 @@ bool minethd::self_test() hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); hashf_multi( - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" "The quick brown fox jumps over the lazy dog" "The quick brown fox jumps over the lazy dog" "The quick brown fox jumps over the lazy dog" @@ -468,30 +448,10 @@ bool minethd::self_test() "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - , 640) == 0; + , 320) == 0; hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); hashf_multi( - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" - "The quick brown fox jumps over the lazy dog" "The quick brown fox jumps over the lazy dog" "The quick brown fox jumps over the lazy dog" "The quick brown fox jumps over the lazy dog" @@ -514,17 +474,7 @@ bool minethd::self_test() "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" - , 640) == 0; + , 320) == 0; } else if(::jconf::inst()->GetCurrentCoinSelection().GetDescription(1).GetMiningAlgo() == cryptonight_aeon) { @@ -982,7 +932,7 @@ void minethd::penta_work_main() } void minethd::twenty_work_main() { - multiway_work_main<20u>(); + multiway_work_main<10u>(); } template From fcb9e188bb6842c017fc6c4891a63b33b1901ceb Mon Sep 17 00:00:00 2001 From: Michael Basaman Date: Fri, 1 Jun 2018 13:01:07 +0700 Subject: [PATCH 3/5] use the updated macros in power level 5 as well --- .../backend/cpu/crypto/cryptonight_aesni.h | 98 ++++++++++--------- 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 1952cef23..3a2fcc453 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -1063,62 +1063,68 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton idx3 = _mm_cvtsi128_si64(ax3); idx4 = _mm_cvtsi128_si64(ax4); - for (size_t i = 0; i < ITERATIONS/2; i++) - { - uint64_t hi, lo; - __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4; + uint64_t hi, lo; + __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4; - // EVEN ROUND - CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2); - CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3); - CN_STEP1(ax4, bx4, cx4, l4, ptr4, idx4); - CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2); - CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3); - CN_STEP2(ax4, bx4, cx4, l4, ptr4, idx4); + CN_STEP1_A(ax0, bx0, cx0, l0, ptr0, idx0); + CN_STEP1_A(ax1, bx1, cx1, l1, ptr1, idx1); + CN_STEP1_A(ax2, bx2, cx2, l2, ptr2, idx2); + CN_STEP1_A(ax3, bx3, cx3, l3, ptr3, idx3); + CN_STEP1_A(ax4, bx4, cx4, l4, ptr4, idx4); - CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); - CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3); - CN_STEP3(ax4, bx4, cx4, l4, ptr4, idx4); + CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, bx4, cx4, l4, mc4, ptr4, idx4); - CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); - CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); - CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); - CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3); - CN_STEP4(ax4, bx4, cx4, l4, mc4, ptr4, idx4); + CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP1(ax4, cx4, bx4, l4, mc4, ptr4, idx4); - // ODD ROUND - CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2); - CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3); - CN_STEP1(ax4, cx4, bx4, l4, ptr4, idx4); + CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, cx4, bx4, l4, mc4, ptr4, idx4); - CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2); - CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3); - CN_STEP2(ax4, cx4, bx4, l4, ptr4, idx4); + for (size_t i = 1; i < ITERATIONS/2; i++) + { - CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2); - CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3); - CN_STEP3(ax4, cx4, bx4, l4, ptr4, idx4); + CN2_STEP1(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + CN2_STEP1(ax4, bx4, cx4, l4, mc4, ptr4, idx4); - CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0); - CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1); - CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2); - CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3); - CN_STEP4(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, bx4, cx4, l4, mc4, ptr4, idx4); + + CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP1(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + + CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP2(ax4, cx4, bx4, l4, mc4, ptr4, idx4); } + CN2_STEP3(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP3(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP3(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP3(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + CN2_STEP3(ax4, cx4, bx4, l4, mc4, ptr4, idx4); + for (size_t i = 0; i < 5; i++) { cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); From 47150de3cd1a5c3095f766f2a3082d00a20b4820 Mon Sep 17 00:00:00 2001 From: Michael Basaman Date: Fri, 1 Jun 2018 14:07:01 +0700 Subject: [PATCH 4/5] updated power levels 3, 4 and 5. Set power level 10 --- .../backend/cpu/crypto/cryptonight_aesni.h | 353 +++++------------- xmrstak/backend/cpu/minethd.cpp | 72 ++-- xmrstak/backend/cpu/minethd.hpp | 2 +- 3 files changed, 131 insertions(+), 296 deletions(-) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 3a2fcc453..91b509885 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -724,6 +724,10 @@ void cryptonight_double_hash(const void* input, size_t len, void* output, crypto extra_hashes[ctx[1]->hash_state[0] & 3](ctx[1]->hash_state, 200, (char*)output + 32); } +// feel free to clean up the macros and comments if you want it +// +// 452YzXHGTKVf6a9zbqaSBLPHeNsZVTvkhLPUv2hn6oRgXNP95ikasL64nC8oeqXmMSbKTeMfPbVHNfF8otAuCqHXEEWVxxw + #define CN_STEP1_A(a, b, c, l, ptr, idx) \ ptr = (__m128i *)&l[idx & MASK]; \ if(PREFETCH) \ @@ -846,46 +850,49 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto idx1 = _mm_cvtsi128_si64(ax1); idx2 = _mm_cvtsi128_si64(ax2); - for (size_t i = 0; i < ITERATIONS/2; i++) + uint64_t hi, lo; + __m128i *ptr0, *ptr1, *ptr2; + + CN_STEP1_A(ax0, bx0, cx0, l0, ptr0, idx0); + CN_STEP1_A(ax1, bx1, cx1, l1, ptr1, idx1); + CN_STEP1_A(ax2, bx2, cx2, l2, ptr2, idx2); + + CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + + CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + + CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + + for (size_t i = 1; i < ITERATIONS/2; i++) { - uint64_t hi, lo; - __m128i *ptr0, *ptr1, *ptr2; - - // EVEN ROUND - CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2); - - CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2); - - CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); - - CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); - CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); - CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); - - // ODD ROUND - CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2); - - CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2); - - CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2); - - CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0); - CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1); - CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + + CN2_STEP1(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + + CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + + CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + + CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, cx2, bx2, l2, mc2, ptr2, idx2); } + CN2_STEP3(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP3(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP3(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + for (size_t i = 0; i < 3; i++) { cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); @@ -947,54 +954,58 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni idx2 = _mm_cvtsi128_si64(ax2); idx3 = _mm_cvtsi128_si64(ax3); - for (size_t i = 0; i < ITERATIONS/2; i++) + uint64_t hi, lo; + __m128i *ptr0, *ptr1, *ptr2, *ptr3; + + CN_STEP1_A(ax0, bx0, cx0, l0, ptr0, idx0); + CN_STEP1_A(ax1, bx1, cx1, l1, ptr1, idx1); + CN_STEP1_A(ax2, bx2, cx2, l2, ptr2, idx2); + CN_STEP1_A(ax3, bx3, cx3, l3, ptr3, idx3); + + CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + + CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + + CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + + for (size_t i = 1; i < ITERATIONS/2; i++) { - uint64_t hi, lo; - __m128i *ptr0, *ptr1, *ptr2, *ptr3; - - // EVEN ROUND - CN_STEP1(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP1(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP1(ax2, bx2, cx2, l2, ptr2, idx2); - CN_STEP1(ax3, bx3, cx3, l3, ptr3, idx3); - - CN_STEP2(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP2(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP2(ax2, bx2, cx2, l2, ptr2, idx2); - CN_STEP2(ax3, bx3, cx3, l3, ptr3, idx3); - - CN_STEP3(ax0, bx0, cx0, l0, ptr0, idx0); - CN_STEP3(ax1, bx1, cx1, l1, ptr1, idx1); - CN_STEP3(ax2, bx2, cx2, l2, ptr2, idx2); - CN_STEP3(ax3, bx3, cx3, l3, ptr3, idx3); - - CN_STEP4(ax0, bx0, cx0, l0, mc0, ptr0, idx0); - CN_STEP4(ax1, bx1, cx1, l1, mc1, ptr1, idx1); - CN_STEP4(ax2, bx2, cx2, l2, mc2, ptr2, idx2); - CN_STEP4(ax3, bx3, cx3, l3, mc3, ptr3, idx3); - - // ODD ROUND - CN_STEP1(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP1(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP1(ax2, cx2, bx2, l2, ptr2, idx2); - CN_STEP1(ax3, cx3, bx3, l3, ptr3, idx3); - - CN_STEP2(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP2(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP2(ax2, cx2, bx2, l2, ptr2, idx2); - CN_STEP2(ax3, cx3, bx3, l3, ptr3, idx3); - - CN_STEP3(ax0, cx0, bx0, l0, ptr0, idx0); - CN_STEP3(ax1, cx1, bx1, l1, ptr1, idx1); - CN_STEP3(ax2, cx2, bx2, l2, ptr2, idx2); - CN_STEP3(ax3, cx3, bx3, l3, ptr3, idx3); - - CN_STEP4(ax0, cx0, bx0, l0, mc0, ptr0, idx0); - CN_STEP4(ax1, cx1, bx1, l1, mc1, ptr1, idx1); - CN_STEP4(ax2, cx2, bx2, l2, mc2, ptr2, idx2); - CN_STEP4(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + + CN2_STEP1(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + + CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, bx2, cx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, bx3, cx3, l3, mc3, ptr3, idx3); + + CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP1(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP1(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + + CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP2(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP2(ax3, cx3, bx3, l3, mc3, ptr3, idx3); } + CN2_STEP3(ax0, cx0, bx0, l0, mc0, ptr0, idx0); + CN2_STEP3(ax1, cx1, bx1, l1, mc1, ptr1, idx1); + CN2_STEP3(ax2, cx2, bx2, l2, mc2, ptr2, idx2); + CN2_STEP3(ax3, cx3, bx3, l3, mc3, ptr3, idx3); + for (size_t i = 0; i < 4; i++) { cn_implode_scratchpad((__m128i*)ctx[i]->long_state, (__m128i*)ctx[i]->hash_state); @@ -1066,7 +1077,6 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton uint64_t hi, lo; __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4; - CN_STEP1_A(ax0, bx0, cx0, l0, ptr0, idx0); CN_STEP1_A(ax1, bx1, cx1, l1, ptr1, idx1); CN_STEP1_A(ax2, bx2, cx2, l2, ptr2, idx2); @@ -1133,22 +1143,8 @@ void cryptonight_penta_hash(const void* input, size_t len, void* output, crypton } } -// this seems to improve PREFETCH performance on a few CPU's that I tried with monero7 -// -// #! /bin/sh -// INDEX=0 -// while [ "$INDEX" -lt 20 ] -// do -// echo "CONST_INIT(ctx[$INDEX], $INDEX);" -// INDEX=`expr $INDEX + 1` -// done -// -// feel free to clean up the macros and comments if you want it -// -// 452YzXHGTKVf6a9zbqaSBLPHeNsZVTvkhLPUv2hn6oRgXNP95ikasL64nC8oeqXmMSbKTeMfPbVHNfF8otAuCqHXEEWVxxw -// template -void cryptonight_twenty_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) +void cryptonight_deca_hash(const void* input, size_t len, void* output, cryptonight_ctx** ctx) { constexpr size_t MASK = cn_select_mask(); constexpr size_t ITERATIONS = cn_select_iter(); @@ -1176,16 +1172,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CONST_INIT(ctx[7], 7); CONST_INIT(ctx[8], 8); CONST_INIT(ctx[9], 9); - /* CONST_INIT(ctx[10], 10); - CONST_INIT(ctx[11], 11); - CONST_INIT(ctx[12], 12); - CONST_INIT(ctx[13], 13); - CONST_INIT(ctx[14], 14); - CONST_INIT(ctx[15], 15); - CONST_INIT(ctx[16], 16); - CONST_INIT(ctx[17], 17); - CONST_INIT(ctx[18], 18); - CONST_INIT(ctx[19], 19); */ uint8_t* l0 = ctx[0]->long_state; uint8_t* l1 = ctx[1]->long_state; @@ -1197,16 +1183,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto uint8_t* l7 = ctx[7]->long_state; uint8_t* l8 = ctx[8]->long_state; uint8_t* l9 = ctx[9]->long_state; - /* uint8_t* l10 = ctx[10]->long_state; - uint8_t* l11 = ctx[11]->long_state; - uint8_t* l12 = ctx[12]->long_state; - uint8_t* l13 = ctx[13]->long_state; - uint8_t* l14 = ctx[14]->long_state; - uint8_t* l15 = ctx[15]->long_state; - uint8_t* l16 = ctx[16]->long_state; - uint8_t* l17 = ctx[17]->long_state; - uint8_t* l18 = ctx[18]->long_state; - uint8_t* l19 = ctx[19]->long_state; */ uint64_t* h0 = (uint64_t*)ctx[0]->hash_state; uint64_t* h1 = (uint64_t*)ctx[1]->hash_state; @@ -1218,16 +1194,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto uint64_t* h7 = (uint64_t*)ctx[7]->hash_state; uint64_t* h8 = (uint64_t*)ctx[8]->hash_state; uint64_t* h9 = (uint64_t*)ctx[9]->hash_state; - /* uint64_t* h10 = (uint64_t*)ctx[10]->hash_state; - uint64_t* h11 = (uint64_t*)ctx[11]->hash_state; - uint64_t* h12 = (uint64_t*)ctx[12]->hash_state; - uint64_t* h13 = (uint64_t*)ctx[13]->hash_state; - uint64_t* h14 = (uint64_t*)ctx[14]->hash_state; - uint64_t* h15 = (uint64_t*)ctx[15]->hash_state; - uint64_t* h16 = (uint64_t*)ctx[16]->hash_state; - uint64_t* h17 = (uint64_t*)ctx[17]->hash_state; - uint64_t* h18 = (uint64_t*)ctx[18]->hash_state; - uint64_t* h19 = (uint64_t*)ctx[19]->hash_state; */ __m128i ax0 = _mm_set_epi64x(h0[1] ^ h0[5], h0[0] ^ h0[4]); __m128i ax1 = _mm_set_epi64x(h1[1] ^ h1[5], h1[0] ^ h1[4]); @@ -1239,16 +1205,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto __m128i ax7 = _mm_set_epi64x(h7[1] ^ h7[5], h7[0] ^ h7[4]); __m128i ax8 = _mm_set_epi64x(h8[1] ^ h8[5], h8[0] ^ h8[4]); __m128i ax9 = _mm_set_epi64x(h9[1] ^ h9[5], h9[0] ^ h9[4]); - /* __m128i ax10 = _mm_set_epi64x(h10[1] ^ h10[5], h10[0] ^ h10[4]); - __m128i ax11 = _mm_set_epi64x(h11[1] ^ h11[5], h11[0] ^ h11[4]); - __m128i ax12 = _mm_set_epi64x(h12[1] ^ h12[5], h12[0] ^ h12[4]); - __m128i ax13 = _mm_set_epi64x(h13[1] ^ h13[5], h13[0] ^ h13[4]); - __m128i ax14 = _mm_set_epi64x(h14[1] ^ h14[5], h14[0] ^ h14[4]); - __m128i ax15 = _mm_set_epi64x(h15[1] ^ h15[5], h15[0] ^ h15[4]); - __m128i ax16 = _mm_set_epi64x(h16[1] ^ h16[5], h16[0] ^ h16[4]); - __m128i ax17 = _mm_set_epi64x(h17[1] ^ h17[5], h17[0] ^ h17[4]); - __m128i ax18 = _mm_set_epi64x(h18[1] ^ h18[5], h18[0] ^ h18[4]); - __m128i ax19 = _mm_set_epi64x(h19[1] ^ h19[5], h19[0] ^ h19[4]); */ __m128i bx0 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]); __m128i bx1 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]); @@ -1260,16 +1216,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto __m128i bx7 = _mm_set_epi64x(h7[3] ^ h7[7], h7[2] ^ h7[6]); __m128i bx8 = _mm_set_epi64x(h8[3] ^ h8[7], h8[2] ^ h8[6]); __m128i bx9 = _mm_set_epi64x(h9[3] ^ h9[7], h9[2] ^ h9[6]); - /* __m128i bx10 = _mm_set_epi64x(h10[3] ^ h10[7], h10[2] ^ h10[6]); - __m128i bx11 = _mm_set_epi64x(h11[3] ^ h11[7], h11[2] ^ h11[6]); - __m128i bx12 = _mm_set_epi64x(h12[3] ^ h12[7], h12[2] ^ h12[6]); - __m128i bx13 = _mm_set_epi64x(h13[3] ^ h13[7], h13[2] ^ h13[6]); - __m128i bx14 = _mm_set_epi64x(h14[3] ^ h14[7], h14[2] ^ h14[6]); - __m128i bx15 = _mm_set_epi64x(h15[3] ^ h15[7], h15[2] ^ h15[6]); - __m128i bx16 = _mm_set_epi64x(h16[3] ^ h16[7], h16[2] ^ h16[6]); - __m128i bx17 = _mm_set_epi64x(h17[3] ^ h17[7], h17[2] ^ h17[6]); - __m128i bx18 = _mm_set_epi64x(h18[3] ^ h18[7], h18[2] ^ h18[6]); - __m128i bx19 = _mm_set_epi64x(h19[3] ^ h19[7], h19[2] ^ h19[6]); */ __m128i cx0 = _mm_set_epi64x(0, 0); __m128i cx1 = _mm_set_epi64x(0, 0); @@ -1281,16 +1227,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto __m128i cx7 = _mm_set_epi64x(0, 0); __m128i cx8 = _mm_set_epi64x(0, 0); __m128i cx9 = _mm_set_epi64x(0, 0); - /* __m128i cx10 = _mm_set_epi64x(0, 0); - __m128i cx11 = _mm_set_epi64x(0, 0); - __m128i cx12 = _mm_set_epi64x(0, 0); - __m128i cx13 = _mm_set_epi64x(0, 0); - __m128i cx14 = _mm_set_epi64x(0, 0); - __m128i cx15 = _mm_set_epi64x(0, 0); - __m128i cx16 = _mm_set_epi64x(0, 0); - __m128i cx17 = _mm_set_epi64x(0, 0); - __m128i cx18 = _mm_set_epi64x(0, 0); - __m128i cx19 = _mm_set_epi64x(0, 0); */ uint64_t idx0 = _mm_cvtsi128_si64(ax0); uint64_t idx1 = _mm_cvtsi128_si64(ax1); @@ -1302,19 +1238,8 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto uint64_t idx7 = _mm_cvtsi128_si64(ax7); uint64_t idx8 = _mm_cvtsi128_si64(ax8); uint64_t idx9 = _mm_cvtsi128_si64(ax9); - /* uint64_t idx10 = _mm_cvtsi128_si64(ax10); - uint64_t idx11 = _mm_cvtsi128_si64(ax11); - uint64_t idx12 = _mm_cvtsi128_si64(ax12); - uint64_t idx13 = _mm_cvtsi128_si64(ax13); - uint64_t idx14 = _mm_cvtsi128_si64(ax14); - uint64_t idx15 = _mm_cvtsi128_si64(ax15); - uint64_t idx16 = _mm_cvtsi128_si64(ax16); - uint64_t idx17 = _mm_cvtsi128_si64(ax17); - uint64_t idx18 = _mm_cvtsi128_si64(ax18); - uint64_t idx19 = _mm_cvtsi128_si64(ax19); */ __m128i *ptr0, *ptr1, *ptr2, *ptr3, *ptr4, *ptr5, *ptr6, *ptr7, *ptr8, *ptr9; - //__m128i *ptr10, *ptr11, *ptr12, *ptr13, *ptr14, *ptr15, *ptr16, *ptr17, *ptr18, *ptr19; uint64_t hi, lo; @@ -1328,16 +1253,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN_STEP1_A(ax7, bx7, cx7, l7, ptr7, idx7); CN_STEP1_A(ax8, bx8, cx8, l8, ptr8, idx8); CN_STEP1_A(ax9, bx9, cx9, l9, ptr9, idx9); - /* CN_STEP1_A(ax10, bx10, cx10, l10, ptr10, idx10); - CN_STEP1_A(ax11, bx11, cx11, l11, ptr11, idx11); - CN_STEP1_A(ax12, bx12, cx12, l12, ptr12, idx12); - CN_STEP1_A(ax13, bx13, cx13, l13, ptr13, idx13); - CN_STEP1_A(ax14, bx14, cx14, l14, ptr14, idx14); - CN_STEP1_A(ax15, bx15, cx15, l15, ptr15, idx15); - CN_STEP1_A(ax16, bx16, cx16, l16, ptr16, idx16); - CN_STEP1_A(ax17, bx17, cx17, l17, ptr17, idx17); - CN_STEP1_A(ax18, bx18, cx18, l18, ptr18, idx18); - CN_STEP1_A(ax19, bx19, cx19, l19, ptr19, idx19); */ CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); @@ -1349,16 +1264,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax7, bx7, cx7, l7, mc7, ptr7, idx7); CN2_STEP2(ax8, bx8, cx8, l8, mc8, ptr8, idx8); CN2_STEP2(ax9, bx9, cx9, l9, mc9, ptr9, idx9); - /* CN2_STEP2(ax10, bx10, cx10, l10, mc10, ptr10, idx10); - CN2_STEP2(ax11, bx11, cx11, l11, mc11, ptr11, idx11); - CN2_STEP2(ax12, bx12, cx12, l12, mc12, ptr12, idx12); - CN2_STEP2(ax13, bx13, cx13, l13, mc13, ptr13, idx13); - CN2_STEP2(ax14, bx14, cx14, l14, mc14, ptr14, idx14); - CN2_STEP2(ax15, bx15, cx15, l15, mc15, ptr15, idx15); - CN2_STEP2(ax16, bx16, cx16, l16, mc16, ptr16, idx16); - CN2_STEP2(ax17, bx17, cx17, l17, mc17, ptr17, idx17); - CN2_STEP2(ax18, bx18, cx18, l18, mc18, ptr18, idx18); - CN2_STEP2(ax19, bx19, cx19, l19, mc19, ptr19, idx19); */ CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); @@ -1370,16 +1275,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP1(ax7, cx7, bx7, l7, mc7, ptr7, idx7); CN2_STEP1(ax8, cx8, bx8, l8, mc8, ptr8, idx8); CN2_STEP1(ax9, cx9, bx9, l9, mc9, ptr9, idx9); - /* CN2_STEP1(ax10, cx10, bx10, l10, mc10, ptr10, idx10); - CN2_STEP1(ax11, cx11, bx11, l11, mc11, ptr11, idx11); - CN2_STEP1(ax12, cx12, bx12, l12, mc12, ptr12, idx12); - CN2_STEP1(ax13, cx13, bx13, l13, mc13, ptr13, idx13); - CN2_STEP1(ax14, cx14, bx14, l14, mc14, ptr14, idx14); - CN2_STEP1(ax15, cx15, bx15, l15, mc15, ptr15, idx15); - CN2_STEP1(ax16, cx16, bx16, l16, mc16, ptr16, idx16); - CN2_STEP1(ax17, cx17, bx17, l17, mc17, ptr17, idx17); - CN2_STEP1(ax18, cx18, bx18, l18, mc18, ptr18, idx18); - CN2_STEP1(ax19, cx19, bx19, l19, mc19, ptr19, idx19); */ CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); @@ -1391,16 +1286,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax7, cx7, bx7, l7, mc7, ptr7, idx7); CN2_STEP2(ax8, cx8, bx8, l8, mc8, ptr8, idx8); CN2_STEP2(ax9, cx9, bx9, l9, mc9, ptr9, idx9); - /* CN2_STEP2(ax10, cx10, bx10, l10, mc10, ptr10, idx10); - CN2_STEP2(ax11, cx11, bx11, l11, mc11, ptr11, idx11); - CN2_STEP2(ax12, cx12, bx12, l12, mc12, ptr12, idx12); - CN2_STEP2(ax13, cx13, bx13, l13, mc13, ptr13, idx13); - CN2_STEP2(ax14, cx14, bx14, l14, mc14, ptr14, idx14); - CN2_STEP2(ax15, cx15, bx15, l15, mc15, ptr15, idx15); - CN2_STEP2(ax16, cx16, bx16, l16, mc16, ptr16, idx16); - CN2_STEP2(ax17, cx17, bx17, l17, mc17, ptr17, idx17); - CN2_STEP2(ax18, cx18, bx18, l18, mc18, ptr18, idx18); - CN2_STEP2(ax19, cx19, bx19, l19, mc19, ptr19, idx19); */ for (size_t i = 1; i < ITERATIONS/2; i++) { @@ -1414,16 +1299,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP1(ax7, bx7, cx7, l7, mc7, ptr7, idx7); CN2_STEP1(ax8, bx8, cx8, l8, mc8, ptr8, idx8); CN2_STEP1(ax9, bx9, cx9, l9, mc9, ptr9, idx9); - /* CN2_STEP1(ax10, bx10, cx10, l10, mc10, ptr10, idx10); - CN2_STEP1(ax11, bx11, cx11, l11, mc11, ptr11, idx11); - CN2_STEP1(ax12, bx12, cx12, l12, mc12, ptr12, idx12); - CN2_STEP1(ax13, bx13, cx13, l13, mc13, ptr13, idx13); - CN2_STEP1(ax14, bx14, cx14, l14, mc14, ptr14, idx14); - CN2_STEP1(ax15, bx15, cx15, l15, mc15, ptr15, idx15); - CN2_STEP1(ax16, bx16, cx16, l16, mc16, ptr16, idx16); - CN2_STEP1(ax17, bx17, cx17, l17, mc17, ptr17, idx17); - CN2_STEP1(ax18, bx18, cx18, l18, mc18, ptr18, idx18); - CN2_STEP1(ax19, bx19, cx19, l19, mc19, ptr19, idx19); */ CN2_STEP2(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN2_STEP2(ax1, bx1, cx1, l1, mc1, ptr1, idx1); @@ -1435,16 +1310,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax7, bx7, cx7, l7, mc7, ptr7, idx7); CN2_STEP2(ax8, bx8, cx8, l8, mc8, ptr8, idx8); CN2_STEP2(ax9, bx9, cx9, l9, mc9, ptr9, idx9); - /* CN2_STEP2(ax10, bx10, cx10, l10, mc10, ptr10, idx10); - CN2_STEP2(ax11, bx11, cx11, l11, mc11, ptr11, idx11); - CN2_STEP2(ax12, bx12, cx12, l12, mc12, ptr12, idx12); - CN2_STEP2(ax13, bx13, cx13, l13, mc13, ptr13, idx13); - CN2_STEP2(ax14, bx14, cx14, l14, mc14, ptr14, idx14); - CN2_STEP2(ax15, bx15, cx15, l15, mc15, ptr15, idx15); - CN2_STEP2(ax16, bx16, cx16, l16, mc16, ptr16, idx16); - CN2_STEP2(ax17, bx17, cx17, l17, mc17, ptr17, idx17); - CN2_STEP2(ax18, bx18, cx18, l18, mc18, ptr18, idx18); - CN2_STEP2(ax19, bx19, cx19, l19, mc19, ptr19, idx19); */ CN2_STEP1(ax0, cx0, bx0, l0, mc0, ptr0, idx0); CN2_STEP1(ax1, cx1, bx1, l1, mc1, ptr1, idx1); @@ -1456,16 +1321,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP1(ax7, cx7, bx7, l7, mc7, ptr7, idx7); CN2_STEP1(ax8, cx8, bx8, l8, mc8, ptr8, idx8); CN2_STEP1(ax9, cx9, bx9, l9, mc9, ptr9, idx9); - /* CN2_STEP1(ax10, cx10, bx10, l10, mc10, ptr10, idx10); - CN2_STEP1(ax11, cx11, bx11, l11, mc11, ptr11, idx11); - CN2_STEP1(ax12, cx12, bx12, l12, mc12, ptr12, idx12); - CN2_STEP1(ax13, cx13, bx13, l13, mc13, ptr13, idx13); - CN2_STEP1(ax14, cx14, bx14, l14, mc14, ptr14, idx14); - CN2_STEP1(ax15, cx15, bx15, l15, mc15, ptr15, idx15); - CN2_STEP1(ax16, cx16, bx16, l16, mc16, ptr16, idx16); - CN2_STEP1(ax17, cx17, bx17, l17, mc17, ptr17, idx17); - CN2_STEP1(ax18, cx18, bx18, l18, mc18, ptr18, idx18); - CN2_STEP1(ax19, cx19, bx19, l19, mc19, ptr19, idx19); */ CN2_STEP2(ax0, cx0, bx0, l0, mc0, ptr0, idx0); CN2_STEP2(ax1, cx1, bx1, l1, mc1, ptr1, idx1); @@ -1477,16 +1332,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP2(ax7, cx7, bx7, l7, mc7, ptr7, idx7); CN2_STEP2(ax8, cx8, bx8, l8, mc8, ptr8, idx8); CN2_STEP2(ax9, cx9, bx9, l9, mc9, ptr9, idx9); - /* CN2_STEP2(ax10, cx10, bx10, l10, mc10, ptr10, idx10); - CN2_STEP2(ax11, cx11, bx11, l11, mc11, ptr11, idx11); - CN2_STEP2(ax12, cx12, bx12, l12, mc12, ptr12, idx12); - CN2_STEP2(ax13, cx13, bx13, l13, mc13, ptr13, idx13); - CN2_STEP2(ax14, cx14, bx14, l14, mc14, ptr14, idx14); - CN2_STEP2(ax15, cx15, bx15, l15, mc15, ptr15, idx15); - CN2_STEP2(ax16, cx16, bx16, l16, mc16, ptr16, idx16); - CN2_STEP2(ax17, cx17, bx17, l17, mc17, ptr17, idx17); - CN2_STEP2(ax18, cx18, bx18, l18, mc18, ptr18, idx18); - CN2_STEP2(ax19, cx19, bx19, l19, mc19, ptr19, idx19); */ } CN2_STEP3(ax0, cx0, bx0, l0, mc0, ptr0, idx0); @@ -1499,16 +1344,6 @@ void cryptonight_twenty_hash(const void* input, size_t len, void* output, crypto CN2_STEP3(ax7, cx7, bx7, l7, mc7, ptr7, idx7); CN2_STEP3(ax8, cx8, bx8, l8, mc8, ptr8, idx8); CN2_STEP3(ax9, cx9, bx9, l9, mc9, ptr9, idx9); -/* CN2_STEP3(ax10, cx10, bx10, l10, mc10, ptr10, idx10); - CN2_STEP3(ax11, cx11, bx11, l11, mc11, ptr11, idx11); - CN2_STEP3(ax12, cx12, bx12, l12, mc12, ptr12, idx12); - CN2_STEP3(ax13, cx13, bx13, l13, mc13, ptr13, idx13); - CN2_STEP3(ax14, cx14, bx14, l14, mc14, ptr14, idx14); - CN2_STEP3(ax15, cx15, bx15, l15, mc15, ptr15, idx15); - CN2_STEP3(ax16, cx16, bx16, l16, mc16, ptr16, idx16); - CN2_STEP3(ax17, cx17, bx17, l17, mc17, ptr17, idx17); - CN2_STEP3(ax18, cx18, bx18, l18, mc18, ptr18, idx18); - CN2_STEP3(ax19, cx19, bx19, l19, mc19, ptr19, idx19); */ for (size_t i = 0; i < 10; i++) { diff --git a/xmrstak/backend/cpu/minethd.cpp b/xmrstak/backend/cpu/minethd.cpp index bce619c89..f213d4b4f 100644 --- a/xmrstak/backend/cpu/minethd.cpp +++ b/xmrstak/backend/cpu/minethd.cpp @@ -119,8 +119,8 @@ minethd::minethd(miner_work& pWork, size_t iNo, int iMultiway, bool no_prefetch, switch (iMultiway) { - case 6: - oWorkThd = std::thread(&minethd::twenty_work_main, this); + case 10: + oWorkThd = std::thread(&minethd::deca_work_main, this); break; case 5: oWorkThd = std::thread(&minethd::penta_work_main, this); @@ -184,7 +184,7 @@ cryptonight_ctx* minethd::minethd_alloc_ctx() return nullptr; //Should never happen } -static constexpr size_t MAX_N = 20; +static constexpr size_t MAX_N = 10; bool minethd::self_test() { alloc_msg msg = { 0 }; @@ -280,7 +280,7 @@ bool minethd::self_test() "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05", 160) == 0; - hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); + hashf_multi = func_multi_selector(10, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight); hashf_multi("This is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a testThis is a test", 14, out, ctx); bResult &= memcmp(out, "\xa0\x84\xf0\x1d\x14\x37\xa0\x9c\x69\x85\x40\x1b\x60\xd4\x35\x54\xae\x10\x58\x02\xc5\xf5\xd8\xa9\xb3\x25\x36\x49\xc0\xbe\x66\x05" @@ -424,7 +424,7 @@ bool minethd::self_test() "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" , 160) == 0; - hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); + hashf_multi = func_multi_selector(10, ::jconf::inst()->HaveHardwareAes(), false, xmrstak_algo::cryptonight_monero); hashf_multi( "The quick brown fox jumps over the lazy dog" "The quick brown fox jumps over the lazy dog" @@ -450,7 +450,7 @@ bool minethd::self_test() "\x94\xf5\xde\xc5\x24\xfa\xd6\xd3\x20\x04\xc5\x5c\x03\x5e\x5e\xa2\x23\xe7\x31\x5b\xe2\x0e\x2d\xc5\xb8\xa0\xac\x74\x64\xff\xeb\x1f" , 320) == 0; - hashf_multi = func_multi_selector(6, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); + hashf_multi = func_multi_selector(10, ::jconf::inst()->HaveHardwareAes(), true, xmrstak_algo::cryptonight_monero); hashf_multi( "The quick brown fox jumps over the lazy dog" "The quick brown fox jumps over the lazy dog" @@ -722,7 +722,7 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, // function as a two digit binary size_t NN(N); - if(NN > 5) { + if(NN == 10) { NN = 6; } @@ -772,10 +772,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -793,10 +793,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -814,10 +814,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -835,10 +835,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -856,10 +856,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -877,10 +877,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, cryptonight_double_hash, cryptonight_double_hash, @@ -898,10 +898,10 @@ minethd::cn_hash_fun_multi minethd::func_multi_selector(size_t N, bool bHaveAes, cryptonight_penta_hash, cryptonight_penta_hash, cryptonight_penta_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash, - cryptonight_twenty_hash + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash, + cryptonight_deca_hash }; std::bitset<2> digit; @@ -931,7 +931,7 @@ void minethd::penta_work_main() multiway_work_main<5u>(); } -void minethd::twenty_work_main() { +void minethd::deca_work_main() { multiway_work_main<10u>(); } diff --git a/xmrstak/backend/cpu/minethd.hpp b/xmrstak/backend/cpu/minethd.hpp index 8aee370ae..87b56bbfc 100644 --- a/xmrstak/backend/cpu/minethd.hpp +++ b/xmrstak/backend/cpu/minethd.hpp @@ -46,7 +46,7 @@ class minethd : public iBackend void triple_work_main(); void quad_work_main(); void penta_work_main(); - void twenty_work_main(); + void deca_work_main(); uint64_t iJobNo; From 44116b528814a08a3bbbe6142b0c1f6220166e60 Mon Sep 17 00:00:00 2001 From: Gharlane Date: Fri, 1 Jun 2018 14:34:04 +0700 Subject: [PATCH 5/5] remove spaces --- xmrstak/backend/cpu/crypto/cryptonight_aesni.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h index 91b509885..ca47dc631 100644 --- a/xmrstak/backend/cpu/crypto/cryptonight_aesni.h +++ b/xmrstak/backend/cpu/crypto/cryptonight_aesni.h @@ -871,7 +871,6 @@ void cryptonight_triple_hash(const void* input, size_t len, void* output, crypto for (size_t i = 1; i < ITERATIONS/2; i++) { - CN2_STEP1(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN2_STEP1(ax1, bx1, cx1, l1, mc1, ptr1, idx1); CN2_STEP1(ax2, bx2, cx2, l2, mc2, ptr2, idx2); @@ -979,7 +978,6 @@ void cryptonight_quad_hash(const void* input, size_t len, void* output, cryptoni for (size_t i = 1; i < ITERATIONS/2; i++) { - CN2_STEP1(ax0, bx0, cx0, l0, mc0, ptr0, idx0); CN2_STEP1(ax1, bx1, cx1, l1, mc1, ptr1, idx1); CN2_STEP1(ax2, bx2, cx2, l2, mc2, ptr2, idx2);