Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
ayzk committed Jun 1, 2021
1 parent c9a536a commit 8003a7c
Show file tree
Hide file tree
Showing 3 changed files with 315 additions and 1 deletion.
2 changes: 1 addition & 1 deletion include/utils/Verification.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ namespace SZ {
double Min = ori_data[0];
double diffMax = fabs(data[0] - ori_data[0]);
double diff_sum = 0;
double maxpw_relerr = fabs((data[0] - ori_data[0]) / ori_data[0]);
double maxpw_relerr = 0;
double sum1 = 0, sum2 = 0;
for (i = 0; i < num_elements; i++) {
sum1 += ori_data[i];
Expand Down
1 change: 1 addition & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
set(exes sz_demo
sz_fast
sz_truncate
sz_pw
test_huffman_coding
test_arithmetic_coding)

Expand Down
313 changes: 313 additions & 0 deletions test/sz_pw.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@
#include "frontend/SZ3Frontend.hpp"
#include "predictor/Predictor.hpp"
#include "predictor/LorenzoPredictor.hpp"
#include "predictor/RegressionPredictor.hpp"
#include "predictor/ComposedPredictor.hpp"
#include "quantizer/IntegerQuantizer.hpp"
#include "utils/FileUtil.h"
#include "utils/Config.hpp"
#include "def.hpp"
#include "compressor/SZGeneralCompressor.hpp"
#include "encoder/HuffmanEncoder.hpp"
#include "encoder/ArithmeticEncoder.hpp"
#include "encoder/BypassEncoder.hpp"
#include "lossless/Lossless_zstd.hpp"
#include "lossless/Lossless_bypass.hpp"
#include "utils/Verification.hpp"
#include "utils/Timer.hpp"
#include <sstream>
#include <random>
#include <cstdio>
#include <iostream>
#include <memory>

std::string src_file_name;
float relative_error_bound = 0;
float compression_time = 0;


template<typename T, class Frontend, class Encoder, class Lossless, uint N>
float SZ_compress(std::unique_ptr<T[]> const &data,
SZ::Config<T, N> &conf,
Frontend frontend, Encoder encoder, Lossless lossless) {

std::cout << "****************** Options ********************" << std::endl;
std::cout << "dimension = " << N
<< ", error bound = " << conf.eb
<< ", block_size = " << conf.block_size
<< ", stride = " << conf.stride
<< ", quan_state_num = " << conf.quant_state_num
<< std::endl
<< "lorenzo = " << conf.enable_lorenzo
<< ", 2ndlorenzo = " << conf.enable_2ndlorenzo
<< ", regression = " << conf.enable_regression
<< ", encoder = " << conf.encoder_op
<< ", lossless = " << conf.lossless_op
<< std::endl;


std::vector<T> data_ = std::vector<T>(data.get(), data.get() + conf.num);
// preprocess
std::vector<unsigned char> signs(conf.num, 0);
float max_abs_log_data = 0;
float min_log_data = std::numeric_limits<float>::max();
bool positive = true;
for (size_t i = 0; i < conf.num; i++) {
if (data[i] == 0) {
signs[i] = 2;
}
if (data[i] < 0) {
signs[i] = 1;
data[i] = -data[i];
positive = false;
}
if (data[i] > 0) {
data[i] = log2(data[i]);
if (data[i] > max_abs_log_data) max_abs_log_data = data[i];
if (data[i] < min_log_data) min_log_data = data[i];
}

}

if (fabs(min_log_data) > max_abs_log_data) max_abs_log_data = fabs(min_log_data);
double realPrecision = log2(1.0 + conf.eb) - max_abs_log_data * 1.2e-7;
conf.eb = realPrecision;
for (size_t i = 0; i < conf.num; i++) {
if (signs[i] == 2) {
data[i] = min_log_data - 2.0001 * realPrecision;
signs[i] = 0;
}
}

double minLogValue = min_log_data - 1.0001 * realPrecision;
std::cout << "log eb=" << conf.eb
<< ", minlogvalue=" << minLogValue << std::endl;
if (!positive) {
// compress signs
// unsigned long signSize = sz_lossless_compress(ZSTD_COMPRESSOR, 3, signs, dataLength, &comp_signs);
}

// std::vector<T> data1 = std::vector<T>(data.get(), data.get() + conf.num);
auto sz = make_sz_general_compressor(conf, frontend, encoder, lossless);

SZ::Timer timer;
timer.start();
std::cout << "****************** Compression ******************" << std::endl;


size_t compressed_size = 0;
std::unique_ptr<SZ::uchar[]> compressed;
compressed.reset(sz.compress(data.get(), compressed_size));

compression_time = timer.stop("Compression");

auto ratio = conf.num * sizeof(T) * 1.0 / compressed_size;
std::cout << "Compression Ratio = " << ratio << std::endl;
std::cout << "Compressed size = " << compressed_size << std::endl;

std::random_device rd; //Will be used to obtain a seed for the random number engine
std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd()
std::uniform_int_distribution<> dis(0, 10000);
std::stringstream ss;
ss << src_file_name.substr(src_file_name.rfind('/') + 1)
<< "." << relative_error_bound << "." << dis(gen) << ".sz3";
auto compressed_file_name = ss.str();
SZ::writefile(compressed_file_name.c_str(), compressed.get(), compressed_size);
std::cout << "Compressed file = " << compressed_file_name << std::endl;

std::cout << "****************** Decompression ****************" << std::endl;
compressed = SZ::readfile<SZ::uchar>(compressed_file_name.c_str(), compressed_size);

timer.start();
T *dec_data = sz.decompress(compressed.get(), compressed_size);
timer.stop("Decompression");
// SZ::verify<T>(data1.data(), dec_data, conf.num);

// float threshold = minLogValue;
if (!positive) {
// sz_lossless_decompress(ZSTD_COMPRESSOR, tdps->pwrErrBoundBytes, tdps->pwrErrBoundBytes_size, &signs, dataSeriesLength);
for (size_t i = 0; i < conf.num; i++) {
if (dec_data[i] < minLogValue) dec_data[i] = 0;
else dec_data[i] = exp2(dec_data[i]);
if (signs[i]) dec_data[i] = -dec_data[i];
}
} else {
for (size_t i = 0; i < conf.num; i++) {
if (dec_data[i] < minLogValue) dec_data[i] = 0;
else dec_data[i] = exp2(dec_data[i]);
}
}

SZ::verify<T>(data_.data(), dec_data, conf.num);

remove(compressed_file_name.c_str());
auto decompressed_file_name = compressed_file_name + ".out";
SZ::writefile(decompressed_file_name.c_str(), dec_data, conf.num);
std::cout << "Decompressed file = " << decompressed_file_name << std::endl;

delete[] dec_data;
return ratio;
}

template<typename T, class Frontend, uint N>
float SZ_compress_bulid_backend(std::unique_ptr<T[]> const &data,
SZ::Config<T, N> &conf,
Frontend frontend) {
if (conf.lossless_op == 1) {
if (conf.encoder_op == 1) {
return SZ_compress<T>(data, conf, frontend, SZ::HuffmanEncoder<int>(), SZ::Lossless_zstd());
} else if (conf.encoder_op == 2) {
return SZ_compress<T>(data, conf, frontend, SZ::ArithmeticEncoder<int>(true), SZ::Lossless_zstd());
} else {
return SZ_compress<T>(data, conf, frontend, SZ::BypassEncoder<int>(), SZ::Lossless_zstd());
}
} else {
if (conf.encoder_op == 1) {
return SZ_compress<T>(data, conf, frontend, SZ::HuffmanEncoder<int>(), SZ::Lossless_bypass());
} else if (conf.encoder_op == 2) {
return SZ_compress<T>(data, conf, frontend, SZ::ArithmeticEncoder<int>(true), SZ::Lossless_bypass());
} else {
return SZ_compress<T>(data, conf, frontend, SZ::BypassEncoder<int>(), SZ::Lossless_bypass());
}
}
}

template<typename T, uint N>
float SZ_compress_build_frontend(std::unique_ptr<T[]> const &data, SZ::Config<T, N> &conf) {
auto quantizer = SZ::LinearQuantizer<T>(conf.eb, conf.quant_state_num / 2);
std::vector<std::shared_ptr<SZ::concepts::PredictorInterface<T, N>>> predictors;

int use_single_predictor =
(conf.enable_lorenzo + conf.enable_2ndlorenzo + conf.enable_regression) == 1;
if (conf.enable_lorenzo) {
if (use_single_predictor) {
return SZ_compress_bulid_backend<T>(data, conf,
make_sz3_frontend(conf, SZ::LorenzoPredictor<T, N, 1>(conf.eb),
quantizer));
} else {
predictors.push_back(std::make_shared<SZ::LorenzoPredictor<T, N, 1>>(conf.eb));
}
}
if (conf.enable_2ndlorenzo) {
if (use_single_predictor) {
return SZ_compress_bulid_backend<T>(data, conf,
make_sz3_frontend(conf, SZ::LorenzoPredictor<T, N, 2>(conf.eb),
quantizer));
} else {
predictors.push_back(std::make_shared<SZ::LorenzoPredictor<T, N, 2>>(conf.eb));
}
}
if (conf.enable_regression) {
if (use_single_predictor) {
return SZ_compress_bulid_backend<T>(data, conf,
make_sz3_frontend(conf, SZ::RegressionPredictor<T, N>(conf.block_size,
conf.eb),
quantizer));
} else {
predictors.push_back(std::make_shared<SZ::RegressionPredictor<T, N>>(conf.block_size, conf.eb));
}
}

return SZ_compress_bulid_backend<T>(data, conf,
make_sz3_frontend(conf, SZ::ComposedPredictor<T, N>(predictors), quantizer));
}


template<class T, uint N>
float SZ_compress_parse_args(int argc, char **argv, int argp, std::unique_ptr<T[]> &data, float eb,
std::array<size_t, N> dims) {
SZ::Config<float, N> conf(eb, dims);
if (argp < argc) {
int block_size = atoi(argv[argp++]);
conf.block_size = block_size;
conf.stride = block_size;
}

int lorenzo_op = 1;
if (argp < argc) {
lorenzo_op = atoi(argv[argp++]);
conf.enable_lorenzo = lorenzo_op == 1 || lorenzo_op == 3;
conf.enable_2ndlorenzo = lorenzo_op == 2 || lorenzo_op == 3;
}

int regression_op = 1;
if (argp < argc) {
regression_op = atoi(argv[argp++]);
conf.enable_regression = regression_op == 1;
}

if (argp < argc) {
conf.encoder_op = atoi(argv[argp++]);
if (conf.encoder_op == 0) {
conf.quant_state_num = 250;
} else if (conf.encoder_op == 2) {
conf.quant_state_num = 1024;
}
}

if (argp < argc) {
conf.lossless_op = atoi(argv[argp++]);
}

if (argp < argc) {
conf.quant_state_num = atoi(argv[argp++]);
}

auto ratio = SZ_compress_build_frontend(data, conf);
return ratio;
}

int main(int argc, char **argv) {
if (argc < 2) {
std::cout << "SZ v" << SZ_versionString() << std::endl;
std::cout << "usage: " << argv[0] <<
" data_file -num_dim dim0 .. dimn relative_eb [blocksize lorenzo_op regression_op encoder_op lossless_op quant_state_num]"
<< std::endl;
std::cout << "example: " << argv[0] <<
" qmcpack.dat -3 33120 69 69 1e-3 [6 1 1 1 1 32768]" << std::endl;
return 0;
}

size_t num = 0;
auto data = SZ::readfile<float>(argv[1], num);
src_file_name = argv[1];
std::cout << "Read " << num << " elements\n";

int dim = atoi(argv[2] + 1);
assert(1 <= dim && dim <= 4);
int argp = 3;
std::vector<size_t> dims(dim);
for (int i = 0; i < dim; i++) {
dims[i] = atoi(argv[argp++]);
}

// char *eb_op = argv[argp++] + 1;
float eb = 0;
// if (*eb_op == 'a') {
// eb = atof(argv[argp++]);
// } else {
eb = atof(argv[argp++]);
// float max = data[0];
// float min = data[0];
// for (int i = 1; i < num; i++) {
// if (max < data[i]) max = data[i];
// if (min > data[i]) min = data[i];
// }
// eb = relative_error_bound * (max - min);
// }

if (dim == 1) {
SZ_compress_parse_args<float, 1>(argc, argv, argp, data, eb, std::array<size_t, 1>{dims[0]});
} else if (dim == 2) {
SZ_compress_parse_args<float, 2>(argc, argv, argp, data, eb, std::array<size_t, 2>{dims[0], dims[1]});
} else if (dim == 3) {
SZ_compress_parse_args<float, 3>(argc, argv, argp, data, eb,
std::array<size_t, 3>{dims[0], dims[1], dims[2]});
} else if (dim == 4) {
SZ_compress_parse_args<float, 4>(argc, argv, argp, data, eb,
std::array<size_t, 4>{dims[0], dims[1], dims[2], dims[3]});
}


return 0;
}

0 comments on commit 8003a7c

Please sign in to comment.