Skip to content

Commit

Permalink
Some cleaning.
Browse files Browse the repository at this point in the history
  • Loading branch information
lemire committed Jul 9, 2020
1 parent f002db1 commit cd57c31
Show file tree
Hide file tree
Showing 16 changed files with 437 additions and 108 deletions.
14 changes: 7 additions & 7 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,19 @@ ifeq ($(INTEL), 1)
CC ?= /opt/intel/bin/icpc
ifeq ($(DEBUG),1)
CXXFLAGS = -fpic -std=c++11 -O3 -Wall -ansi -xAVX -DDEBUG=1 -D_GLIBCXX_DEBUG -ggdb
CCFLAGS = -fpic -std=c99 -O3 -Wall -ansi -xAVX -DDEBUG=1 -D_GLIBCXX_DEBUG -ggdb
CCFLAGS = -fpic -std=c99 -O3 -Wall -ansi -xAVX -DDEBUG=1 -D_GLIBCXX_DEBUG -ggdb
else
CXXFLAGS = -fpic -std=c++11 -O2 -Wall -ansi -xAVX -DNDEBUG=1 -ggdb
CCFLAGS = -fpic -std=c99 -O2 -Wall -ansi -xAVX -DNDEBUG=1 -ggdb
CXXFLAGS = -fpic -std=c++11 -O2 -Wall -ansi -xAVX -DNDEBUG=1 -ggdb
CCFLAGS = -fpic -std=c99 -O2 -Wall -ansi -xAVX -DNDEBUG=1 -ggdb
endif # debug
else #intel
CXX ?= g++-4.7
ifeq ($(DEBUG),1)
CXXFLAGS = -fpic -mavx -std=c++11 -Weffc++ -pedantic -ggdb -DDEBUG=1 -D_GLIBCXX_DEBUG -Wall -Wextra
CCFLAGS = -fpic -mavx -std=c99 -pedantic -ggdb -DDEBUG=1 -D_GLIBCXX_DEBUG -Wall -Wextra
CXXFLAGS = -fpic -mavx -std=c++11 -Weffc++ -pedantic -ggdb -DDEBUG=1 -D_GLIBCXX_DEBUG -Wall -Wextra -Wextra -Wsign-compare -Wwrite-strings -Wpointer-arith -Winit-self -Wno-sign-conversion
CCFLAGS = -fpic -mavx -std=c99 -pedantic -ggdb -DDEBUG=1 -D_GLIBCXX_DEBUG -Wall -Wextra -Wsign-compare -Wwrite-strings -Wpointer-arith -Winit-self -Wno-sign-conversion
else
CXXFLAGS = -fpic -mavx -std=c++11 -Weffc++ -pedantic -O3 -Wall -Wextra
CCFLAGS = -fpic -mavx -std=c99 -pedantic -O3 -Wall -Wextra
CXXFLAGS = -fpic -mavx -std=c++11 -Weffc++ -pedantic -O3 -Wall -Wextra -Wsign-compare -Wwrite-strings -Wpointer-arith -Winit-self -Wno-sign-conversion
CCFLAGS = -fpic -mavx -std=c99 -pedantic -O3 -Wall -Wextra -Wsign-compare -Wwrite-strings -Wpointer-arith -Winit-self -Wno-sign-conversion
endif #debug
endif #intel

Expand Down
22 changes: 11 additions & 11 deletions advancedbenchmarking/src/budgetedtest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,10 +190,10 @@ class TestHelper {
if (recbuffer != uncompressed) {
cout << "size = " << uncompressed.size() << endl;
int display = 10;
for (size_t i = 0; i < uncompressed.size(); ++i) {
if (uncompressed[i] != recbuffer[i]) {
cout << " i = " << i << " expected " << uncompressed[i]
<< " but got " << recbuffer[i] << endl;
for (size_t ii = 0; ii < uncompressed.size(); ++ii) {
if (uncompressed[ii] != recbuffer[ii]) {
cout << " i = " << ii << " expected " << uncompressed[ii]
<< " but got " << recbuffer[ii] << endl;
display--;
if (display == 0)
break;
Expand Down Expand Up @@ -581,7 +581,7 @@ class TestHelper {
vector<uint32_t> &onePost = uncompPosts.getOnePost(id);
z.reset();
compPostings.emplace(id, shared_ptr<Skipping>(new Skipping(
SkipLog, onePost.data(), onePost.size())));
SkipLog, onePost.data(), (uint32_t) onePost.size())));
packTime += static_cast<double>(z.split());
size_t qty = onePost.size();
if (MaxPostingSize < qty) {
Expand Down Expand Up @@ -622,7 +622,7 @@ class TestHelper {
for (size_t k = 2; (intersize > 0) && (k < sizeids.size()); ++k) {
unpackVolume += compPostings[sizeids[k].second]->Length;
intersize = compPostings[sizeids[k].second]->intersect(
intersection_result.data(), intersize,
intersection_result.data(), (uint32_t) intersize,
intersection_result.data());
}
}
Expand Down Expand Up @@ -705,7 +705,7 @@ class TestHelper {

z.reset(); // actually this should be very quick
CompressedSizeDuringPacking += hybridPart[part]->load(
id, emptyPost.data(), emptyPost.size());
id, emptyPost.data(), (uint32_t) emptyPost.size());
packTime += static_cast<double>(z.split());

packVolume += 0;
Expand Down Expand Up @@ -755,7 +755,7 @@ class TestHelper {
}

CompressedSizeDuringPacking +=
hybridPart[part]->load(id, dirtyCopy.data(), dirtyCopy.size());
hybridPart[part]->load(id, dirtyCopy.data(), (uint32_t) dirtyCopy.size());
packTime += static_cast<double>(z.split());

packVolume += thissize;
Expand Down Expand Up @@ -861,7 +861,7 @@ class TestHelper {
vector<uint32_t> &onePost = uncompPosts.getOnePost(id);
z.reset();
CompressedSizeDuringPacking +=
hybrid.load(id, onePost.data(), onePost.size());
hybrid.load(id, onePost.data(), (uint32_t) onePost.size());
packTime += static_cast<double>(z.split());
size_t qty = onePost.size();
packVolume += qty;
Expand Down Expand Up @@ -920,7 +920,7 @@ class TestHelper {
vector<uint32_t> &onePost = uncompPosts.getOnePost(id);
z.reset();
CompressedSizeDuringPacking +=
hybrid.load(id, onePost.data(), onePost.size());
hybrid.load(id, onePost.data(), (uint32_t) onePost.size());
packTime += static_cast<double>(z.split());
size_t qty = onePost.size();
packVolume += qty;
Expand Down Expand Up @@ -977,7 +977,7 @@ class TestHelper {
vector<uint32_t> &onePost = uncompPosts.getOnePost(id);
z.reset();
CompressedSizeDuringPacking +=
hybrid.load(id, onePost.data(), onePost.size());
hybrid.load(id, onePost.data(), (uint32_t) onePost.size());
packTime += static_cast<double>(z.split());
size_t qty = onePost.size();
packVolume += qty;
Expand Down
8 changes: 4 additions & 4 deletions advancedbenchmarking/src/ramtocache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ void blockedcompress(shared_ptr<IntegerCODEC> c, vector<uint32_t> &buffer,
l = blocksize;
size_t lo = obuffer.size() - outpos;
c->encodeArray(buffer.data() + inpos, l, obuffer.data() + outpos, lo);
obuffer[outcounter++] = lo; // saving compressed length
obuffer[outcounter++] = (uint32_t)lo; // saving compressed length
outpos += lo;
inpos += l;
}
Expand Down Expand Up @@ -155,9 +155,9 @@ void processArray(map<pair<uint32_t, shared_ptr<IntegerCODEC>>, stats> &mystats,
throw runtime_error("failed to recover right length");
for (size_t k = 0; k < l; ++k)
if (l1buf[k] != buffer[k + inpos]) {
for (size_t K = 0; K < l; ++K)
cout << K << ": expected is " << buffer[K + inpos]
<< ", actual is " << l1buf[K] << endl;
for (size_t KK = 0; KK < l; ++KK)
cout << KK << ": expected is " << buffer[KK + inpos]
<< ", actual is " << l1buf[KK] << endl;
throw runtime_error("bug decoded values do not match");
}
inpos += l;
Expand Down
20 changes: 17 additions & 3 deletions example.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,19 @@

using namespace SIMDCompressionLib;


int main() {
// We pick a CODEC
IntegerCODEC &codec = *CODECFactory::getFromName("s4-bp128-dm");
// could use others, e.g., "varint", "s-fastpfor-1"
IntegerCODEC &codec = *CODECFactory::getFromName("s4-fastpfor-d1");
// could use others, e.g., frameofreference, ibp32, maskedvbyte, s4-bp128-d1, s4-bp128-d2, s4-bp128-d4, s4-bp128-dm, simdframeofreference, streamvbyte
//
// Note that some codecs compute the differential coding in-place, thus modifying part of the input, replacing it with a differentially coded version:
// bp32, fastpfor, s4-bp128-d1-ni, s4-bp128-d2-ni, s4-bp128-d4-ni, s4-bp128-dm-ni, s4-fastpfor-d1, s4-fastpfor-d2, s4-fastpfor-d4, s4-fastpfor-dm
// Other codecs do the differential coding "in passing", such as
// for, frameofreference, ibp32, maskedvbyte, s4-bp128-d1, s4-bp128-d2, s4-bp128-d4, s4-bp128-dm, simdframeofreference, streamvbyte, varint, varintg8iu, varintgb, vbyte
//


////////////
//
// create a container with some integers in it
Expand All @@ -30,6 +39,11 @@ int main() {
vector<uint32_t> mydata(N);
for (uint32_t i = 0; i < N; ++i)
mydata[i] = 3 * i;



// we make a copy
std::vector<uint32_t> original_data(mydata);
///////////
//
// You need some "output" container. You are responsible
Expand Down Expand Up @@ -67,7 +81,7 @@ int main() {
//
// That's it for compression!
//
if (mydataback != mydata)
if (mydataback != original_data)
throw runtime_error("bug!");

//
Expand Down
22 changes: 11 additions & 11 deletions include/hybm2.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
* This is an implementation of the hyb+m2 method proposed in:
*
* J. S. Culpepper and A. Moffat. Efficient set intersection for
* inverted indexing. ACM Trans. Inf. Syst., 29(1):1:1Ð1:25, Dec. 2010.
* inverted indexing. ACM Trans. Inf. Syst., 29(1):1:1�1:25, Dec. 2010.
*
* Implemented by Daniel Lemire
*/
Expand Down Expand Up @@ -139,11 +139,11 @@ class HybM2 {
unpackVolume += bitmaps[i].first;
shared_ptr<BoolArray> &ba = bitmaps[i].second;
pos = 0;
for (uint32_t i = 0; i < sizeout; ++i) {
if (!ba->get(out[i]))
for (uint32_t ii = 0; ii < sizeout; ++ii) {
if (!ba->get(out[ii]))
continue;
else
out[pos++] = out[i];
out[pos++] = out[ii];
}
sizeout = pos;
}
Expand Down Expand Up @@ -373,11 +373,11 @@ class UncompressedHybM2 {
unpackVolume += bitmaps[i].first;
shared_ptr<BoolArray> &ba = bitmaps[i].second;
pos = 0;
for (uint32_t i = 0; i < sizeout; ++i) {
if (!ba->get(out[i]))
for (uint32_t ii = 0; ii < sizeout; ++ii) {
if (!ba->get(out[ii]))
continue;
else
out[pos++] = out[i];
out[pos++] = out[ii];
}
sizeout = pos;
}
Expand Down Expand Up @@ -557,19 +557,19 @@ class SkippingHybM2 {
sizeout = shortlists[0].second->intersect(*shortlists[1].second, out);
for (uint32_t i = 2; (sizeout > 0) && (i < shortlists.size()); ++i) {
unpackVolume += shortlists[i].first;
sizeout = shortlists[i].second->intersect(out, sizeout, out);
sizeout = shortlists[i].second->intersect(out, (uint32_t)sizeout, out);
}
}
size_t pos = 0;
for (uint32_t i = 0; (sizeout > 0) && (i < bitmaps.size()); ++i) {
unpackVolume += bitmaps[i].first;
shared_ptr<BoolArray> &ba = bitmaps[i].second;
pos = 0;
for (uint32_t i = 0; i < sizeout; ++i) {
if (!ba->get(out[i]))
for (uint32_t ii = 0; ii < sizeout; ++ii) {
if (!ba->get(out[ii]))
continue;
else
out[pos++] = out[i];
out[pos++] = out[ii];
}
sizeout = pos;
}
Expand Down
2 changes: 1 addition & 1 deletion include/streamvariablebyte.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ class StreamVByteD1 : public IntegerCODEC {
static_cast<uint32_t>(std::min<size_t>(
count, std::numeric_limits<uint32_t>::max())),
1, 1));
*out = 4 + bytesWritten;
*out = uint8_t(4 + bytesWritten);
nvalue = 4 + bytesWritten;
}

Expand Down
6 changes: 3 additions & 3 deletions include/synthetic.h
Original file line number Diff line number Diff line change
Expand Up @@ -216,10 +216,10 @@ class ZipfianGenerator {
init(_items, _zipfianconstant);
}

double zeta(int n, double theta) {
double zeta(int nn, double ttheta) {
double sum = 0;
for (long i = 0; i < n; i++) {
sum += 1 / (pow(i + 1, theta));
for (long i = 0; i < nn; i++) {
sum += 1 / (pow(i + 1, ttheta));
}
return sum;
}
Expand Down
6 changes: 3 additions & 3 deletions include/varintgb.h
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ template <bool delta = false> class VarIntGB : public IntegerCODEC {
} else {
memcpy(inbyte, block1->data, block1->length);
inbyte += block1->length;
inbyte[0] = newsel;
inbyte[0] = (uint8_t)newsel;
inbyte++;
memcpy(inbyte, &nextval, newsel + 1);
inbyte += newsel + 1;
Expand Down Expand Up @@ -904,7 +904,7 @@ template <bool delta = false> class VarIntGB : public IntegerCODEC {
newnextval &= mask[newnextsel];
// uint32_t newnextval = *(reinterpret_cast<const uint32_t*>(b->data +
// offsettolastval)) & mask[newnextsel];
b->data[0] = (b->data[0] << 2) | *newsel;
b->data[0] = uint8_t((b->data[0] << 2) | *newsel);
std::memmove(b->data + 2 + *newsel, b->data + 1,
b->length - 1 - 1 - newnextsel);
b->length = offsettolastval + 1 + *newsel;
Expand All @@ -915,7 +915,7 @@ template <bool delta = false> class VarIntGB : public IntegerCODEC {

void finalshiftin(Block *b, uint32_t nextval, uint32_t newsel,
size_t howmany) {
b->data[0] = (b->data[0] << 2) | newsel;
b->data[0] = uint8_t((b->data[0] << 2) | newsel);
std::memmove(b->data + 2 + newsel, b->data + 1, b->length - 1);
b->length = 1;
for (size_t k = 0; k < howmany; ++k)
Expand Down
26 changes: 13 additions & 13 deletions src/benchintersection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@ size_t intersect_partitionedV1(const uint16_t *A, const size_t s_a,
size_t partition_size = _intersectV1_vector16(
&A[i_a + 2], &B[i_b + 2], static_cast<size_t>(A[i_a + 1]) + 1,
static_cast<size_t>(B[i_b + 1]) + 1, &C[counter + 1]);
C[counter++] = partition_size; // write partition size
C[counter++] = (uint16_t) partition_size; // write partition size
counter += partition_size;
i_a += static_cast<size_t>(A[i_a + 1]) + 2 + 1;
i_b += static_cast<size_t>(B[i_b + 1]) + 2 + 1;
Expand Down Expand Up @@ -650,7 +650,7 @@ size_t intersect_partitionedscalar(const uint16_t *A, const size_t s_a,
size_t partition_size = _intersectscalar_vector16(
&A[i_a + 2], &B[i_b + 2], static_cast<size_t>(A[i_a + 1]) + 1,
static_cast<size_t>(B[i_b + 1]) + 1, &C[counter + 1]);
C[counter++] = partition_size; // write partition size
C[counter++] = (uint16_t) partition_size; // write partition size
counter += partition_size;
i_a += static_cast<size_t>(A[i_a + 1]) + 2 + 1;
i_b += static_cast<size_t>(B[i_b + 1]) + 2 + 1;
Expand Down Expand Up @@ -685,7 +685,7 @@ size_t intersect_partitioned(const uint16_t *A, const size_t s_a,
size_t partition_size = _intersect_vector16(
&A[i_a + 2], &B[i_b + 2], static_cast<size_t>(A[i_a + 1]) + 1,
static_cast<size_t>(B[i_b + 1]) + 1, &C[counter + 1]);
C[counter++] = partition_size; // write partition size
C[counter++] = (uint16_t) partition_size; // write partition size
counter += partition_size;
i_a += static_cast<size_t>(A[i_a + 1]) + 2 + 1;
i_b += static_cast<size_t>(B[i_b + 1]) + 2 + 1;
Expand Down Expand Up @@ -717,7 +717,7 @@ size_t original_intersect_partitioned(const uint16_t *A, const size_t s_a,
size_t partition_size = _original_intersect_vector16(
&A[i_a + 2], &B[i_b + 2], static_cast<size_t>(A[i_a + 1]) + 1,
static_cast<size_t>(B[i_b + 1]) + 1, &C[counter + 1]);
C[counter++] = partition_size; // write partition size
C[counter++] = (uint16_t) partition_size; // write partition size
counter += partition_size;
i_a += static_cast<size_t>(A[i_a + 1]) + 2 + 1;
i_b += static_cast<size_t>(B[i_b + 1]) + 2 + 1;
Expand Down Expand Up @@ -754,7 +754,7 @@ int main(int argc, char **argv) {
Big = atoi(optarg);
break;
case 'R':
intersectionratio = atof(optarg);
intersectionratio = (float)atof(optarg);
break;
case 'M':
MaxBit = atoi(optarg);
Expand Down Expand Up @@ -810,16 +810,16 @@ int main(int argc, char **argv) {
"16-bitscalar ";
cout << "relative-intersection-size " << endl;

for (float ir = 1.001; ir <= 10000; ir = ir * sqrt(1.9)) {
for (double ir = 1.001; ir <= 10000; ir = ir * sqrt(1.9)) {
vector<pair<vector<uint32_t>, vector<uint32_t>>> data(howmany);
uint32_t smallsize =
static_cast<uint32_t>(round(static_cast<float>(1 << Big) / ir));
cout << "#generating data...";
cout.flush();
for (size_t k = 0; k < howmany; ++k) {
data[k] = uniform ? getNaivePair(udg, smallsize, 1U << MaxBit, ir,
data[k] = uniform ? getNaivePair(udg, smallsize, 1U << MaxBit, (float)ir,
intersectionratio)
: getNaivePair(cdg, smallsize, 1U << MaxBit, ir,
: getNaivePair(cdg, smallsize, 1U << MaxBit, (float)ir,
intersectionratio);
}
cout << "ok." << endl;
Expand Down Expand Up @@ -858,7 +858,7 @@ int main(int argc, char **argv) {
aratio = interfnc(data[k].first.data(), (data[k].first).size(),
data[k].second.data(), (data[k].second).size(),
buffer.data());
bogus += aratio;
bogus += size_t(aratio);
}
}
cout << setw(10) << setprecision(5)
Expand All @@ -876,7 +876,7 @@ int main(int argc, char **argv) {
datapart[k].first.data(), (datapart[k].first).size(),
datapart[k].second.data(), (datapart[k].second).size(),
(uint16_t *)buffer.data());
bogus += aratio;
bogus += size_t(aratio);
}
}
cout << setw(10) << setprecision(5)
Expand All @@ -890,7 +890,7 @@ int main(int argc, char **argv) {
datapart[k].first.data(), (datapart[k].first).size(),
datapart[k].second.data(), (datapart[k].second).size(),
(uint16_t *)buffer.data());
bogus += aratio;
bogus += size_t(aratio);
}
}
cout << setw(10) << setprecision(5)
Expand All @@ -904,7 +904,7 @@ int main(int argc, char **argv) {
datapart[k].first.data(), (datapart[k].first).size(),
datapart[k].second.data(), (datapart[k].second).size(),
(uint16_t *)buffer.data());
bogus += aratio;
bogus += size_t(aratio);
}
}
cout << setw(10) << setprecision(5)
Expand All @@ -918,7 +918,7 @@ int main(int argc, char **argv) {
datapart[k].first.data(), (datapart[k].first).size(),
datapart[k].second.data(), (datapart[k].second).size(),
(uint16_t *)buffer.data());
bogus += aratio;
bogus += size_t(aratio);
}
}
cout << setw(10) << setprecision(5)
Expand Down
Loading

0 comments on commit cd57c31

Please sign in to comment.