+ColumnProof::ColumnProof(uint64_t challenge,
+ node_t* labels, size_t label_idx, size_t label_inc,
+ node_t** tree_bufs, node_t* root) :
+ challenge_(challenge),
+ layers_(P::GetNumLayers()),
+ labels_(labels),
+ label_idx_(label_idx),
+ label_inc_(label_inc)
+{
+ tree_ = new TreeProof(P::GetNumTreeRCArity(),
+ P::GetNumTreeRCLevels(),
+ tree_bufs, P::GetNumTreeRCFiles());
+ tree_->SetRoot(root);
+ tree_->GenInclusionPath(challenge, nullptr);
+}
+
+template
+ColumnProof::~ColumnProof() {
+ if (tree_ != nullptr) {
+ delete tree_;
+ }
+}
+
+template
+size_t ColumnProof::ProofSize() {
+ size_t proof_size = 4;
+ proof_size += 8;
+ proof_size += (sizeof(node_t) * P::GetNumLayers());
+ proof_size += TreeProof::ProofSize(P::GetNumTreeRCArity(),
+ P::GetNumTreeRCLevels(),
+ P::GetNumTreeRCConfig());
+ return proof_size;
+}
+
+template
+size_t ColumnProof::WriteProof(uint8_t* file_ptr, size_t buf_index,
+ uint32_t proof_type) {
+ std::memcpy(file_ptr + buf_index, (uint32_t*)&challenge_, sizeof(uint32_t));
+ buf_index += sizeof(uint32_t);
+
+ std::memcpy(file_ptr + buf_index, &layers_, sizeof(uint64_t));
+ buf_index += sizeof(uint64_t);
+
+ for (uint64_t l = 0; l < layers_; ++l) {
+ std::memcpy(file_ptr + buf_index, labels_ + label_idx_ + (l * label_inc_),
+ sizeof(node_t));
+ buf_index += sizeof(node_t);
+ }
+
+ buf_index = tree_->WriteProof(file_ptr, buf_index, proof_type);
+
+ return buf_index;
+}
+
+#endif // __COLUMN_PROOF_HPP__
diff --git a/extern/supraseal/c1/label_proof.hpp b/extern/supraseal/c1/label_proof.hpp
new file mode 100644
index 000000000..be6559c50
--- /dev/null
+++ b/extern/supraseal/c1/label_proof.hpp
@@ -0,0 +1,133 @@
+// Copyright Supranational LLC
+
+#ifndef __LABEL_PROOF_HPP__
+#define __LABEL_PROOF_HPP__
+
+#include "../sealing/data_structures.hpp"
+
+class LabelProof {
+ public:
+ LabelProof(uint64_t challenge, uint64_t layers,
+ node_t* labels, size_t label_inc);
+ ~LabelProof() { }
+
+ size_t WriteProof(uint8_t* file_ptr, size_t buf_index, bool enc = false);
+ static size_t ProofSize(size_t layers, bool enc);
+
+ private:
+ uint64_t challenge_;
+ uint64_t layers_;
+ node_t* labels_;
+ size_t label_inc_;
+};
+
+LabelProof::LabelProof(uint64_t challenge, uint64_t layers,
+ node_t* labels, size_t label_inc) :
+ challenge_(challenge),
+ layers_(layers),
+ labels_(labels),
+ label_inc_(label_inc) { }
+
+size_t LabelProof::ProofSize(size_t layers, bool enc) {
+ size_t proof_size = 8;
+
+ if ((enc == false) || (layers == 1)) {
+ if (enc == false)
+ proof_size += (layers * 8);
+ proof_size += sizeof(node_t) * LAYER_ONE_REPEAT_SEQ * PARENT_COUNT_BASE;
+ proof_size += sizeof(node_t) * LAYER_ONE_FINAL_SEQ;
+ proof_size += 4;
+ proof_size += 8;
+ layers--;
+ }
+
+ if ((enc == true) && (layers > 1)) {
+ layers = 1;
+ }
+
+ proof_size += (layers * sizeof(node_t) * LAYER_N_REPEAT_SEQ *
+ PARENT_COUNT_BASE);
+ proof_size += (layers * sizeof(node_t) * LAYER_N_REPEAT_SEQ *
+ PARENT_COUNT_EXP);
+ proof_size += (layers * sizeof(node_t) * LAYER_N_FINAL_SEQ);
+ proof_size += (layers * 4);
+ proof_size += (layers * 8);
+
+ return proof_size;
+}
+
+size_t LabelProof::WriteProof(uint8_t* file_ptr, size_t buf_index,
+ bool enc) {
+ uint32_t l = 1;
+
+ if (enc == true) { // Encoding, only last layer
+ l = layers_;
+ } else {
+ // Write vector length of proofs
+ std::memcpy(file_ptr + buf_index, &layers_, sizeof(uint64_t));
+ buf_index += sizeof(uint64_t);
+ }
+
+ while (l <= layers_) {
+ // Number of parents in label calculation
+ std::memcpy(file_ptr + buf_index, &LABEL_PARENTS, sizeof(uint64_t));
+ buf_index += sizeof(uint64_t);
+
+ if (l == 1) {
+ for (size_t k = 0; k < LAYER_ONE_REPEAT_SEQ; ++k) {
+ for (size_t c = 0; c < PARENT_COUNT_BASE; ++c) {
+ std::memcpy(file_ptr + buf_index,
+ labels_ + c + 1 + ((l - 1) * label_inc_), sizeof(node_t));
+ buf_index += sizeof(node_t);
+ }
+ }
+
+ for (size_t c = 0; c < LAYER_ONE_FINAL_SEQ; ++c) {
+ std::memcpy(file_ptr + buf_index,
+ labels_ + c + 1 + ((l - 1) * label_inc_), sizeof(node_t));
+ buf_index += sizeof(node_t);
+ }
+ } else {
+ for (size_t k = 0; k < LAYER_N_REPEAT_SEQ; ++k) {
+ for (size_t c = 0; c < PARENT_COUNT_BASE; ++c) {
+ std::memcpy(file_ptr + buf_index,
+ labels_ + c + 1 + ((l - 1) * label_inc_), sizeof(node_t));
+ buf_index += sizeof(node_t);
+ }
+
+ for (size_t c = 0; c < PARENT_COUNT_EXP; ++c) {
+ std::memcpy(file_ptr + buf_index,
+ labels_ + c + 1 + PARENT_COUNT_BASE + ((l - 2) * label_inc_),
+ sizeof(node_t));
+ buf_index += sizeof(node_t);
+ }
+ }
+
+ for (size_t c = 0; c < LAYER_N_FINAL_SEQ; ++c) {
+ if (c < PARENT_COUNT_BASE) {
+ std::memcpy(file_ptr + buf_index,
+ labels_ + c + 1 + ((l - 1) * label_inc_), sizeof(node_t));
+ } else {
+ std::memcpy(file_ptr + buf_index,
+ labels_ + c + 1 + ((l - 2) * label_inc_),
+ sizeof(node_t));
+ }
+ buf_index += sizeof(node_t);
+ }
+ }
+
+ // Layer index
+ std::memcpy(file_ptr + buf_index, &l, sizeof(uint32_t));
+ buf_index += sizeof(uint32_t);
+
+ // Node - challenge
+ std::memcpy(file_ptr + buf_index, &challenge_, sizeof(uint64_t));
+ buf_index += sizeof(uint64_t);
+
+ l++;
+ }
+
+ return buf_index;
+}
+
+#endif // __LABEL_PROOF_HPP__
diff --git a/extern/supraseal/c1/path_element.hpp b/extern/supraseal/c1/path_element.hpp
new file mode 100644
index 000000000..23403e020
--- /dev/null
+++ b/extern/supraseal/c1/path_element.hpp
@@ -0,0 +1,44 @@
+// Copyright Supranational LLC
+
+#ifndef __PATH_ELEMENT_HPP__
+#define __PATH_ELEMENT_HPP__
+
+class PathElement {
+ public:
+ PathElement(size_t arity, uint64_t index);
+ ~PathElement();
+ void SetHash(size_t index, node_t* hash) { hashes_[index] = hash; }
+ size_t Write(uint8_t* file_ptr, size_t buf_index);
+
+ private:
+ size_t arity_;
+ uint64_t index_;
+ node_t** hashes_; // arity - 1 hashes
+};
+
+PathElement::PathElement(size_t arity, uint64_t index) :
+ arity_(arity),
+ index_(index) {
+ hashes_ = new node_t*[arity - 1]{ nullptr };
+}
+
+PathElement::~PathElement() {
+ delete hashes_;
+}
+
+size_t PathElement::Write(uint8_t* file_ptr, size_t buf_index) {
+ uint64_t len = (uint64_t)arity_ - 1;
+ std::memcpy(file_ptr + buf_index, &len, sizeof(uint64_t));
+ buf_index += sizeof(uint64_t);
+
+ for(uint64_t i = 0; i < len; ++i) {
+ std::memcpy(file_ptr + buf_index, hashes_[i], sizeof(node_t));
+ buf_index += sizeof(node_t);
+ }
+
+ std::memcpy(file_ptr + buf_index, &index_, sizeof(uint64_t));
+ buf_index += sizeof(uint64_t);
+
+ return buf_index;
+}
+#endif // __PATH_ELEMENT_HPP__
diff --git a/extern/supraseal/c1/streaming_node_reader_files.hpp b/extern/supraseal/c1/streaming_node_reader_files.hpp
new file mode 100644
index 000000000..2352dd612
--- /dev/null
+++ b/extern/supraseal/c1/streaming_node_reader_files.hpp
@@ -0,0 +1,140 @@
+// Copyright Supranational LLC
+
+#ifndef __STREAMING_LAYER_READER_FILES_HPP__
+#define __STREAMING_LAYER_READER_FILES_HPP__
+
+#include
+#include
+#include
+#include "../util/mmap_t.hpp"
+#include
+
+// Encapsulate the SPDK portion of reading layers from files
+// C is not used here but is retained to be consistent with
+// multi-sector c1
+template
+class streaming_node_reader_t {
+ std::vector> layer_files;
+ // Packed indicates nodes within a single layer will be contiguous
+ bool packed;
+ size_t num_slots;
+ size_t pages_per_slot;
+
+ node_t* buffer;
+
+ thread_pool_t pool;
+
+public:
+ streaming_node_reader_t(size_t sector_size, std::vector layer_filenames)
+ : buffer(nullptr)
+ {
+ layer_files.resize(layer_filenames.size());
+ for (size_t i = 0; i < layer_filenames.size(); i++) {
+ layer_files[i].mmap_read(layer_filenames[i], sector_size);
+ }
+ }
+
+ ~streaming_node_reader_t() {
+ free_slots();
+ }
+
+ bool data_is_big_endian() {
+ return true;
+ }
+
+ // Allocate resource to perform N reads, each of size slot_node_count. These
+ // will be indexed by slot_id
+ // For C1 (load_nodes, get_node), we don't need local storage because it can
+ // just use the mmapped files.
+ // For PC2 create buffers to consolidate the data.
+ void alloc_slots(size_t _num_slots, size_t slot_node_count, bool _packed) {
+ packed = _packed;
+ if (!packed) {
+ // Reading will occur directly from files, so do nothing
+ } else {
+ pages_per_slot = (slot_node_count + C::NODES_PER_PAGE - 1) / C::NODES_PER_PAGE;
+ num_slots = _num_slots;
+ assert (posix_memalign((void **)&buffer, PAGE_SIZE,
+ num_slots * pages_per_slot * PAGE_SIZE) == 0);
+ }
+ }
+
+ node_t* get_full_buffer(size_t &bytes) {
+ bytes = num_slots * pages_per_slot * PAGE_SIZE;
+ return buffer;
+ }
+
+ node_t* get_slot(size_t slot) {
+ return &buffer[slot * pages_per_slot * C::NODES_PER_PAGE];
+ }
+
+ void free_slots() {
+ free(buffer);
+ buffer = nullptr;
+ }
+
+ ////////////////////////////////////////
+ // Used for PC2
+ ////////////////////////////////////////
+ node_t* load_layers(size_t slot, uint32_t layer, uint64_t node,
+ size_t node_count, size_t num_layers,
+ std::atomic* valid, size_t* valid_count) {
+ if (num_layers == 1) {
+ // Simply return a pointer to the mmap'd file data
+ // This is used by pc2 when bulding just tree-r
+ assert (layer == C::GetNumLayers() - 1);
+ assert (C::PARALLEL_SECTORS == 1);
+ assert (layer_files.size() == 1);
+
+ *valid = 1;
+ *valid_count = 1;
+
+ return &layer_files[0][node];
+ } else {
+ // Consolidate the layer data into the buffer
+ assert (C::PARALLEL_SECTORS == 1);
+ assert (layer_files.size() == num_layers);
+ // Nodes in each layer are expected to evenly fit in a page so that
+ // the result is packed
+ assert (node_count % C::NODES_PER_PAGE == 0);
+ node_t* dest = &buffer[slot * pages_per_slot * C::NODES_PER_PAGE];
+
+ pool.par_map(num_layers, 1, [&](size_t i) {
+ layer_files[i].read_data(node, &dest[i * node_count], node_count);
+ });
+
+ *valid = 1;
+ *valid_count = 1;
+
+ return dest;
+ }
+ }
+
+ ////////////////////////////////////////
+ // Used for C1
+ ////////////////////////////////////////
+
+ // Load a vector of node IDs into the local buffer
+ // The nodes are a vector of layer, node_id pairs
+ // Since the nodes may be non-consecutive each node will use
+ // an entire page in the buffer.
+ int load_nodes(size_t slot, std::vector>& nodes) {
+ assert (!packed);
+ return 0;
+ }
+
+ // Retrieve a sector and node from the local buffer
+ // nodes - the vector of nodes originally read into the local buffer
+ // idx - the index of the node to retrieve
+ // sector_slot - the slot to retrive
+ node_t& get_node(size_t slot, std::vector>& nodes,
+ size_t idx, size_t sector_slot) {
+ assert (!packed);
+ size_t layer = nodes[idx].first;
+ size_t node = nodes[idx].second;
+ node_t& n = layer_files[layer][node];
+ return n;
+ }
+};
+
+#endif
diff --git a/extern/supraseal/c1/tree_d_cc_nodes.h b/extern/supraseal/c1/tree_d_cc_nodes.h
new file mode 100644
index 000000000..c649e231e
--- /dev/null
+++ b/extern/supraseal/c1/tree_d_cc_nodes.h
@@ -0,0 +1,141 @@
+// Copyright Supranational LLC
+
+#ifndef __TREE_D_CC_NODES_H__
+#define __TREE_D_CC_NODES_H__
+
+// CC Sector Tree D is perfectly symmetrical, all nodes per level are equal
+// These values support CC sectors up to 32GB
+// TODO: Need another layer for 64GB
+const uint8_t CC_TREE_D_NODE_VALUES[][32] = {
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0xf5, 0xa5, 0xfd, 0x42, 0xd1, 0x6a, 0x20, 0x30,
+ 0x27, 0x98, 0xef, 0x6e, 0xd3, 0x09, 0x97, 0x9b,
+ 0x43, 0x00, 0x3d, 0x23, 0x20, 0xd9, 0xf0, 0xe8,
+ 0xea, 0x98, 0x31, 0xa9, 0x27, 0x59, 0xfb, 0x0b },
+ { 0x37, 0x31, 0xbb, 0x99, 0xac, 0x68, 0x9f, 0x66,
+ 0xee, 0xf5, 0x97, 0x3e, 0x4a, 0x94, 0xda, 0x18,
+ 0x8f, 0x4d, 0xdc, 0xae, 0x58, 0x07, 0x24, 0xfc,
+ 0x6f, 0x3f, 0xd6, 0x0d, 0xfd, 0x48, 0x83, 0x33 },
+ { 0x64, 0x2a, 0x60, 0x7e, 0xf8, 0x86, 0xb0, 0x04,
+ 0xbf, 0x2c, 0x19, 0x78, 0x46, 0x3a, 0xe1, 0xd4,
+ 0x69, 0x3a, 0xc0, 0xf4, 0x10, 0xeb, 0x2d, 0x1b,
+ 0x7a, 0x47, 0xfe, 0x20, 0x5e, 0x5e, 0x75, 0x0f },
+ { 0x57, 0xa2, 0x38, 0x1a, 0x28, 0x65, 0x2b, 0xf4,
+ 0x7f, 0x6b, 0xef, 0x7a, 0xca, 0x67, 0x9b, 0xe4,
+ 0xae, 0xde, 0x58, 0x71, 0xab, 0x5c, 0xf3, 0xeb,
+ 0x2c, 0x08, 0x11, 0x44, 0x88, 0xcb, 0x85, 0x26 },
+ { 0x1f, 0x7a, 0xc9, 0x59, 0x55, 0x10, 0xe0, 0x9e,
+ 0xa4, 0x1c, 0x46, 0x0b, 0x17, 0x64, 0x30, 0xbb,
+ 0x32, 0x2c, 0xd6, 0xfb, 0x41, 0x2e, 0xc5, 0x7c,
+ 0xb1, 0x7d, 0x98, 0x9a, 0x43, 0x10, 0x37, 0x2f },
+ { 0xfc, 0x7e, 0x92, 0x82, 0x96, 0xe5, 0x16, 0xfa,
+ 0xad, 0xe9, 0x86, 0xb2, 0x8f, 0x92, 0xd4, 0x4a,
+ 0x4f, 0x24, 0xb9, 0x35, 0x48, 0x52, 0x23, 0x37,
+ 0x6a, 0x79, 0x90, 0x27, 0xbc, 0x18, 0xf8, 0x33 },
+ { 0x08, 0xc4, 0x7b, 0x38, 0xee, 0x13, 0xbc, 0x43,
+ 0xf4, 0x1b, 0x91, 0x5c, 0x0e, 0xed, 0x99, 0x11,
+ 0xa2, 0x60, 0x86, 0xb3, 0xed, 0x62, 0x40, 0x1b,
+ 0xf9, 0xd5, 0x8b, 0x8d, 0x19, 0xdf, 0xf6, 0x24 },
+ { 0xb2, 0xe4, 0x7b, 0xfb, 0x11, 0xfa, 0xcd, 0x94,
+ 0x1f, 0x62, 0xaf, 0x5c, 0x75, 0x0f, 0x3e, 0xa5,
+ 0xcc, 0x4d, 0xf5, 0x17, 0xd5, 0xc4, 0xf1, 0x6d,
+ 0xb2, 0xb4, 0xd7, 0x7b, 0xae, 0xc1, 0xa3, 0x2f },
+ { 0xf9, 0x22, 0x61, 0x60, 0xc8, 0xf9, 0x27, 0xbf,
+ 0xdc, 0xc4, 0x18, 0xcd, 0xf2, 0x03, 0x49, 0x31,
+ 0x46, 0x00, 0x8e, 0xae, 0xfb, 0x7d, 0x02, 0x19,
+ 0x4d, 0x5e, 0x54, 0x81, 0x89, 0x00, 0x51, 0x08 },
+ { 0x2c, 0x1a, 0x96, 0x4b, 0xb9, 0x0b, 0x59, 0xeb,
+ 0xfe, 0x0f, 0x6d, 0xa2, 0x9a, 0xd6, 0x5a, 0xe3,
+ 0xe4, 0x17, 0x72, 0x4a, 0x8f, 0x7c, 0x11, 0x74,
+ 0x5a, 0x40, 0xca, 0xc1, 0xe5, 0xe7, 0x40, 0x11 },
+ { 0xfe, 0xe3, 0x78, 0xce, 0xf1, 0x64, 0x04, 0xb1,
+ 0x99, 0xed, 0xe0, 0xb1, 0x3e, 0x11, 0xb6, 0x24,
+ 0xff, 0x9d, 0x78, 0x4f, 0xbb, 0xed, 0x87, 0x8d,
+ 0x83, 0x29, 0x7e, 0x79, 0x5e, 0x02, 0x4f, 0x02 },
+ { 0x8e, 0x9e, 0x24, 0x03, 0xfa, 0x88, 0x4c, 0xf6,
+ 0x23, 0x7f, 0x60, 0xdf, 0x25, 0xf8, 0x3e, 0xe4,
+ 0x0d, 0xca, 0x9e, 0xd8, 0x79, 0xeb, 0x6f, 0x63,
+ 0x52, 0xd1, 0x50, 0x84, 0xf5, 0xad, 0x0d, 0x3f },
+ { 0x75, 0x2d, 0x96, 0x93, 0xfa, 0x16, 0x75, 0x24,
+ 0x39, 0x54, 0x76, 0xe3, 0x17, 0xa9, 0x85, 0x80,
+ 0xf0, 0x09, 0x47, 0xaf, 0xb7, 0xa3, 0x05, 0x40,
+ 0xd6, 0x25, 0xa9, 0x29, 0x1c, 0xc1, 0x2a, 0x07 },
+ { 0x70, 0x22, 0xf6, 0x0f, 0x7e, 0xf6, 0xad, 0xfa,
+ 0x17, 0x11, 0x7a, 0x52, 0x61, 0x9e, 0x30, 0xce,
+ 0xa8, 0x2c, 0x68, 0x07, 0x5a, 0xdf, 0x1c, 0x66,
+ 0x77, 0x86, 0xec, 0x50, 0x6e, 0xef, 0x2d, 0x19 },
+ { 0xd9, 0x98, 0x87, 0xb9, 0x73, 0x57, 0x3a, 0x96,
+ 0xe1, 0x13, 0x93, 0x64, 0x52, 0x36, 0xc1, 0x7b,
+ 0x1f, 0x4c, 0x70, 0x34, 0xd7, 0x23, 0xc7, 0xa9,
+ 0x9f, 0x70, 0x9b, 0xb4, 0xda, 0x61, 0x16, 0x2b },
+ { 0xd0, 0xb5, 0x30, 0xdb, 0xb0, 0xb4, 0xf2, 0x5c,
+ 0x5d, 0x2f, 0x2a, 0x28, 0xdf, 0xee, 0x80, 0x8b,
+ 0x53, 0x41, 0x2a, 0x02, 0x93, 0x1f, 0x18, 0xc4,
+ 0x99, 0xf5, 0xa2, 0x54, 0x08, 0x6b, 0x13, 0x26 },
+ { 0x84, 0xc0, 0x42, 0x1b, 0xa0, 0x68, 0x5a, 0x01,
+ 0xbf, 0x79, 0x5a, 0x23, 0x44, 0x06, 0x4f, 0xe4,
+ 0x24, 0xbd, 0x52, 0xa9, 0xd2, 0x43, 0x77, 0xb3,
+ 0x94, 0xff, 0x4c, 0x4b, 0x45, 0x68, 0xe8, 0x11 },
+ { 0x65, 0xf2, 0x9e, 0x5d, 0x98, 0xd2, 0x46, 0xc3,
+ 0x8b, 0x38, 0x8c, 0xfc, 0x06, 0xdb, 0x1f, 0x6b,
+ 0x02, 0x13, 0x03, 0xc5, 0xa2, 0x89, 0x00, 0x0b,
+ 0xdc, 0xe8, 0x32, 0xa9, 0xc3, 0xec, 0x42, 0x1c },
+ { 0xa2, 0x24, 0x75, 0x08, 0x28, 0x58, 0x50, 0x96,
+ 0x5b, 0x7e, 0x33, 0x4b, 0x31, 0x27, 0xb0, 0xc0,
+ 0x42, 0xb1, 0xd0, 0x46, 0xdc, 0x54, 0x40, 0x21,
+ 0x37, 0x62, 0x7c, 0xd8, 0x79, 0x9c, 0xe1, 0x3a },
+ { 0xda, 0xfd, 0xab, 0x6d, 0xa9, 0x36, 0x44, 0x53,
+ 0xc2, 0x6d, 0x33, 0x72, 0x6b, 0x9f, 0xef, 0xe3,
+ 0x43, 0xbe, 0x8f, 0x81, 0x64, 0x9e, 0xc0, 0x09,
+ 0xaa, 0xd3, 0xfa, 0xff, 0x50, 0x61, 0x75, 0x08 },
+ { 0xd9, 0x41, 0xd5, 0xe0, 0xd6, 0x31, 0x4a, 0x99,
+ 0x5c, 0x33, 0xff, 0xbd, 0x4f, 0xbe, 0x69, 0x11,
+ 0x8d, 0x73, 0xd4, 0xe5, 0xfd, 0x2c, 0xd3, 0x1f,
+ 0x0f, 0x7c, 0x86, 0xeb, 0xdd, 0x14, 0xe7, 0x06 },
+ { 0x51, 0x4c, 0x43, 0x5c, 0x3d, 0x04, 0xd3, 0x49,
+ 0xa5, 0x36, 0x5f, 0xbd, 0x59, 0xff, 0xc7, 0x13,
+ 0x62, 0x91, 0x11, 0x78, 0x59, 0x91, 0xc1, 0xa3,
+ 0xc5, 0x3a, 0xf2, 0x20, 0x79, 0x74, 0x1a, 0x2f },
+ { 0xad, 0x06, 0x85, 0x39, 0x69, 0xd3, 0x7d, 0x34,
+ 0xff, 0x08, 0xe0, 0x9f, 0x56, 0x93, 0x0a, 0x4a,
+ 0xd1, 0x9a, 0x89, 0xde, 0xf6, 0x0c, 0xbf, 0xee,
+ 0x7e, 0x1d, 0x33, 0x81, 0xc1, 0xe7, 0x1c, 0x37 },
+ { 0x39, 0x56, 0x0e, 0x7b, 0x13, 0xa9, 0x3b, 0x07,
+ 0xa2, 0x43, 0xfd, 0x27, 0x20, 0xff, 0xa7, 0xcb,
+ 0x3e, 0x1d, 0x2e, 0x50, 0x5a, 0xb3, 0x62, 0x9e,
+ 0x79, 0xf4, 0x63, 0x13, 0x51, 0x2c, 0xda, 0x06 },
+ { 0xcc, 0xc3, 0xc0, 0x12, 0xf5, 0xb0, 0x5e, 0x81,
+ 0x1a, 0x2b, 0xbf, 0xdd, 0x0f, 0x68, 0x33, 0xb8,
+ 0x42, 0x75, 0xb4, 0x7b, 0xf2, 0x29, 0xc0, 0x05,
+ 0x2a, 0x82, 0x48, 0x4f, 0x3c, 0x1a, 0x5b, 0x3d },
+ { 0x7d, 0xf2, 0x9b, 0x69, 0x77, 0x31, 0x99, 0xe8,
+ 0xf2, 0xb4, 0x0b, 0x77, 0x91, 0x9d, 0x04, 0x85,
+ 0x09, 0xee, 0xd7, 0x68, 0xe2, 0xc7, 0x29, 0x7b,
+ 0x1f, 0x14, 0x37, 0x03, 0x4f, 0xc3, 0xc6, 0x2c },
+ { 0x66, 0xce, 0x05, 0xa3, 0x66, 0x75, 0x52, 0xcf,
+ 0x45, 0xc0, 0x2b, 0xcc, 0x4e, 0x83, 0x92, 0x91,
+ 0x9b, 0xde, 0xac, 0x35, 0xde, 0x2f, 0xf5, 0x62,
+ 0x71, 0x84, 0x8e, 0x9f, 0x7b, 0x67, 0x51, 0x07 },
+ { 0xd8, 0x61, 0x02, 0x18, 0x42, 0x5a, 0xb5, 0xe9,
+ 0x5b, 0x1c, 0xa6, 0x23, 0x9d, 0x29, 0xa2, 0xe4,
+ 0x20, 0xd7, 0x06, 0xa9, 0x6f, 0x37, 0x3e, 0x2f,
+ 0x9c, 0x9a, 0x91, 0xd7, 0x59, 0xd1, 0x9b, 0x01 },
+ { 0x6d, 0x36, 0x4b, 0x1e, 0xf8, 0x46, 0x44, 0x1a,
+ 0x5a, 0x4a, 0x68, 0x86, 0x23, 0x14, 0xac, 0xc0,
+ 0xa4, 0x6f, 0x01, 0x67, 0x17, 0xe5, 0x34, 0x43,
+ 0xe8, 0x39, 0xee, 0xdf, 0x83, 0xc2, 0x85, 0x3c },
+ { 0x07, 0x7e, 0x5f, 0xde, 0x35, 0xc5, 0x0a, 0x93,
+ 0x03, 0xa5, 0x50, 0x09, 0xe3, 0x49, 0x8a, 0x4e,
+ 0xbe, 0xdf, 0xf3, 0x9c, 0x42, 0xb7, 0x10, 0xb7,
+ 0x30, 0xd8, 0xec, 0x7a, 0xc7, 0xaf, 0xa6, 0x3e },
+ // TODO: Placeholder for 64GB CC
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }
+
+};
+#endif // __TREE_D_CC_NODES_H__
diff --git a/extern/supraseal/c1/tree_proof.hpp b/extern/supraseal/c1/tree_proof.hpp
new file mode 100644
index 000000000..1d6b8590a
--- /dev/null
+++ b/extern/supraseal/c1/tree_proof.hpp
@@ -0,0 +1,342 @@
+// Copyright Supranational LLC
+
+#ifndef __TREE_PROOF_HPP__
+#define __TREE_PROOF_HPP__
+
+#include "tree_d_cc_nodes.h"
+
+class TreeProof {
+ public:
+ TreeProof(size_t arity, size_t levels,
+ node_t** tree_bufs, size_t num_tree_bufs,
+ size_t discard_rows);
+ virtual ~TreeProof();
+
+ void SetRoot(node_t* root) { root_ = root; }
+ void SetLeaf(node_t* leaf) { leaf_ = leaf; }
+ size_t WriteProof(uint8_t* file_ptr, size_t buf_index, uint32_t proof_type);
+ static size_t ProofSize(size_t arity, size_t levels, uint32_t proof_type);
+
+ virtual void GenInclusionPath(uint64_t challenge,
+ node_t* first_level = nullptr);
+ protected:
+ bool PerformFirstLevels(uint64_t challenge, node_t* first_level,
+ size_t* indices);
+
+ // The index array will be filled with which side of the input the
+ // challenge node to prove is located on all the way up the tree.
+ void GetTreePaths(size_t* indices, uint64_t challenge);
+
+ size_t arity_;
+ size_t levels_;
+ node_t** tree_bufs_;
+ size_t tree_bufs_len_;
+ size_t discard_rows_;
+ node_t* root_;
+ node_t* leaf_;
+ node_t* path_buf_; // Used for rebuilding trees if needed
+ std::vector path_; // levels number of PathElements
+};
+
+TreeProof::TreeProof(size_t arity, size_t levels,
+ node_t** tree_bufs, size_t tree_bufs_len = 1,
+ size_t discard_rows = 0) :
+ arity_(arity),
+ levels_(levels),
+ tree_bufs_(tree_bufs),
+ tree_bufs_len_(tree_bufs_len),
+ discard_rows_(discard_rows)
+{
+ path_.reserve(levels);
+
+ if (discard_rows > 0) {
+ path_buf_ = new node_t[discard_rows * (arity - 1)];
+ } else {
+ path_buf_ = nullptr;
+ }
+}
+
+TreeProof::~TreeProof() {
+ if (path_buf_ != nullptr) {
+ delete path_buf_;
+ }
+
+ for (size_t l = 0; l < levels_; ++l) {
+ delete path_[l];
+ }
+}
+
+size_t TreeProof::ProofSize(size_t arity, size_t levels, uint32_t proof_type) {
+ size_t proof_size = 4; // proof type u32
+ proof_size += sizeof(node_t); // root
+ proof_size += sizeof(node_t); // leaf
+
+ proof_size += 8; // base size u64
+ proof_size += (((sizeof(node_t) * (arity - 1)) + 8 + 8) * levels); // path
+
+ if (proof_type == 1) {
+ proof_size += 8; // sub size u64
+ }
+
+ return proof_size;
+}
+
+size_t TreeProof::WriteProof(uint8_t* file_ptr, size_t buf_index,
+ uint32_t proof_type) {
+ std::memcpy(file_ptr + buf_index, &proof_type, sizeof(uint32_t));
+ buf_index += sizeof(uint32_t);
+
+ if (proof_type == 0) {
+ // Root
+ std::memcpy(file_ptr + buf_index, root_, sizeof(node_t));
+ buf_index += sizeof(node_t);
+
+ // Leaf
+ std::memcpy(file_ptr + buf_index, leaf_, sizeof(node_t));
+ buf_index += sizeof(node_t);
+
+ // Proof size
+ std::memcpy(file_ptr + buf_index, &levels_, sizeof(uint64_t));
+ buf_index += sizeof(uint64_t);
+
+ // Proofs
+ for (size_t i = 0; i < levels_; ++i) {
+ buf_index = path_[i]->Write(file_ptr, buf_index);
+ }
+ } else if (proof_type == 1) {
+ // Only supports specific tree of single level sub (e.g. 32G case)
+
+ // Base proof size
+ uint64_t base_proof_vec_len = levels_ - 1;
+ std::memcpy(file_ptr + buf_index, &base_proof_vec_len, sizeof(uint64_t));
+ buf_index += sizeof(uint64_t);
+
+ // Base proofs
+ for (size_t i = 0; i < base_proof_vec_len; ++i) {
+ buf_index = path_[i]->Write(file_ptr, buf_index);
+ }
+
+ // Sub proof size
+ uint64_t sub_proof_vec_len = 1;
+ std::memcpy(file_ptr + buf_index, &sub_proof_vec_len, sizeof(uint64_t));
+ buf_index += sizeof(uint64_t);
+
+ // Sub proof
+ buf_index = path_[base_proof_vec_len]->Write(file_ptr, buf_index);
+
+ // Root
+ std::memcpy(file_ptr + buf_index, root_, sizeof(node_t));
+ buf_index += sizeof(node_t);
+
+ // Leaf
+ std::memcpy(file_ptr + buf_index, leaf_, sizeof(node_t));
+ buf_index += sizeof(node_t);
+ }
+
+ return buf_index;
+}
+
+/*
+ Rebuilding discarded tree r rows
+ Gather enough nodes around the challenge to build subtree
+ First level inclusion path is nodes
+ Second level inclusion path requires hashing the 7 adjacent nodes
+ Third level inclusion path requires hashing two levels to get 7 adjacent
+ Fourth level and above are in the tree r files
+
+ O
+ ____/|\____
+ / ... \
+ O O
+ ____________________/|\__ __/|\_____________________
+ / | | \
+ O O O O
+ / / / / \ \ \ \ / / / / \ \ \ \ ... / / / / \ \ \ \ / / / / \ \ \ \
+ O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O
+ 0 1 2 3 4 5 6 7 8 9 A B C D E F ... 1F0 1F7 1F8 1FF
+*/
+bool TreeProof::PerformFirstLevels(uint64_t challenge,
+ node_t* first_level,
+ size_t* indices) {
+ const size_t arity_mask = ~(arity_ - 1);
+ const size_t labels = pow(arity_, discard_rows_ + 1);
+ const size_t index_mask = labels - 1;
+ const size_t sec_mask = ~((arity_ * arity_) - 1);
+
+ size_t leaf_start = (challenge & arity_mask) & index_mask;
+ size_t leaf_idx = indices[0];
+ size_t hash_idx = 0;
+
+ // Set leaf from first level
+ SetLeaf((node_t*)(first_level + leaf_start + leaf_idx));
+
+ // First level labels are separate from tree buffer files
+ path_.push_back(new PathElement(arity_, (uint64_t) indices[0]));
+ for (size_t a = 0; a < arity_; ++a) {
+ if (a != leaf_idx) {
+ path_[0]->SetHash(hash_idx++, (node_t*)(first_level + leaf_start + a));
+ }
+ }
+
+ // Second level needs to hash adjacent labels
+ leaf_idx = indices[1];
+ path_.push_back(new PathElement(arity_, (uint64_t) indices[1]));
+
+ Poseidon p(arity_);
+
+ hash_idx = 0;
+ leaf_start &= sec_mask;
+ for (size_t a = 0; a < arity_; ++a) {
+ if (a != leaf_idx) {
+ p.Hash((uint8_t*)&(path_buf_[hash_idx]),
+ (uint8_t*)&(first_level[leaf_start + (a * arity_)]));
+ path_[1]->SetHash(hash_idx, &(path_buf_[hash_idx]));
+ hash_idx++;
+ }
+ }
+
+ if (levels_ == 2) { // 2K case
+ return true;
+ }
+
+ // Third level needs to hash adjacent labels for two levels
+ uint8_t p_hash_buf[arity_][sizeof(node_t)];
+ path_.push_back(new PathElement(arity_, (uint64_t) indices[2]));
+ hash_idx = 0;
+ leaf_start >>= (size_t) log2(arity_ * arity_);
+ for (size_t a_o = 0; a_o < arity_; ++a_o) {
+ // leaf_start is the node to skip
+ if (a_o != leaf_start) {
+ for (size_t a_i = 0; a_i < arity_; ++a_i) {
+ p.Hash(p_hash_buf[a_i], (uint8_t*)&(first_level[(a_o * arity_ * arity_)+
+ (a_i * arity_)]));
+ }
+ p.Hash((uint8_t*)&(path_buf_[hash_idx + arity_ - 1]), p_hash_buf[0]);
+ path_[2]->SetHash(hash_idx, &(path_buf_[hash_idx + arity_ - 1]));
+ hash_idx++;
+ }
+ }
+
+ if (levels_ == 3) {
+ return true;
+ }
+
+ return false;
+}
+
+void TreeProof::GenInclusionPath(uint64_t challenge,
+ node_t* first_level) {
+ // Get the challenge index for each level of the tree
+ size_t indices[levels_];
+ GetTreePaths(indices, challenge);
+
+ size_t starting_level = 0;
+
+ if (first_level != nullptr) {
+ bool done = PerformFirstLevels(challenge, first_level, indices);
+ if (done) return;
+ starting_level = 3;
+ }
+
+ size_t finish_level = levels_ - 1;
+ if (tree_bufs_len_ == 1) {
+ finish_level = levels_;
+ }
+
+ const size_t arity_mask = ~(arity_ - 1);
+ const size_t arity_lg = (size_t) log2(arity_);
+ const size_t leaves = pow(2, levels_ * arity_lg);
+ const size_t file_leaves = (size_t) (leaves / tree_bufs_len_);
+ const size_t file_shift = (size_t) log2(file_leaves);
+ const size_t tree_idx_mask = file_leaves - 1;
+ size_t start_level_size = file_leaves;
+
+ if (first_level != nullptr) {
+ size_t act_file_leaves = pow(2, (levels_ - (discard_rows_ + 1)) * arity_lg);
+ start_level_size = (size_t) (act_file_leaves / tree_bufs_len_);
+ }
+
+ const size_t buf_idx = challenge >> file_shift;
+ size_t cur_level_size = start_level_size;
+ size_t add_level_size = 0;
+ size_t leaf_idx;
+ size_t hash_idx;
+ size_t leaf_start;
+
+ for (size_t l = starting_level; l < finish_level; ++l) {
+ leaf_idx = indices[l];
+ leaf_start = challenge & tree_idx_mask;
+ leaf_start >>= (l * arity_lg);
+ leaf_start &= arity_mask;
+ leaf_start += add_level_size;
+ add_level_size += cur_level_size;
+ cur_level_size >>= arity_lg;
+
+ if (l == 0) {
+ SetLeaf((node_t*)(tree_bufs_[buf_idx] + leaf_start + leaf_idx));
+ }
+
+ path_.push_back(new PathElement(arity_, (uint64_t)leaf_idx));
+ hash_idx = 0;
+ for (size_t a = 0; a < arity_; ++a) {
+ if (a != leaf_idx) {
+ path_[l]->SetHash(hash_idx++,
+ (node_t*)(tree_bufs_[buf_idx] + leaf_start + a));
+ }
+ }
+ }
+
+ if (tree_bufs_len_ == 1) {
+ return;
+ }
+
+ leaf_idx = indices[levels_ - 1];
+ path_.push_back(new PathElement(arity_, (uint64_t)leaf_idx));
+ hash_idx = 0;
+ for (size_t a = 0; a < arity_; ++a) {
+ if (a != leaf_idx) {
+ path_[levels_ - 1]->SetHash(hash_idx++,
+ (node_t*)(tree_bufs_[a] + add_level_size));
+ }
+ }
+}
+
+void TreeProof::GetTreePaths(size_t* indices, uint64_t challenge) {
+ size_t arity_lg = log2(arity_);
+ size_t arity_mask = arity_ - 1;
+
+ for (size_t i = 0; i < levels_; ++i) {
+ indices[i] = challenge & arity_mask;
+ challenge >>= arity_lg;
+ }
+}
+
+class TreeDCCProof : public TreeProof {
+ public:
+ TreeDCCProof(size_t arity, size_t levels,
+ node_t** tree_bufs, size_t num_tree_bufs,
+ size_t discard_rows) :
+ TreeProof(arity, levels, tree_bufs, num_tree_bufs, discard_rows) {
+ // TODO: for 64GB would need to access the next layer. CC_TREE_D_NODE_VALUES
+ // would need to be filled in.
+ assert (levels <= 31);
+
+ SetRoot((node_t*)(CC_TREE_D_NODE_VALUES[levels]));
+ SetLeaf((node_t*)(CC_TREE_D_NODE_VALUES[0]));
+ }
+
+ void GenInclusionPath(size_t challenge, node_t* first_level);
+};
+
+void TreeDCCProof::GenInclusionPath(uint64_t challenge,
+ node_t* first_level) {
+ size_t comm_d_indices[levels_];
+ GetTreePaths(comm_d_indices, challenge);
+
+ for (size_t l = 0; l < levels_; ++l) {
+ path_.push_back(new PathElement(arity_, (uint64_t) comm_d_indices[l]));
+ path_[l]->SetHash(0, (node_t*)(first_level + l));
+ }
+}
+
+#endif // __TREE_PROOF_HPP__
diff --git a/extern/supraseal/c2/Cargo.toml b/extern/supraseal/c2/Cargo.toml
new file mode 100644
index 000000000..802e1391c
--- /dev/null
+++ b/extern/supraseal/c2/Cargo.toml
@@ -0,0 +1,24 @@
+[package]
+name = "supraseal-c2"
+version = "0.1.0"
+edition = "2021"
+license = "Apache-2.0"
+description = "CUDA Groth16 proof generator for Filecoin"
+repository = "https://github.com/supranational/supra_seal"
+
+[dependencies]
+blst = "^0.3.11"
+sppark = "^0.1.5"
+
+[features]
+default = []
+# Compile in portable mode, without ISA extensions.
+# Binary can be executed on all systems.
+portable = [ "blst/portable" ]
+# Enable ADX even if the host CPU doesn't support it.
+# Binary can be executed on Broadwell+ and Ryzen+ systems.
+force-adx = [ "blst/force-adx" ]
+quiet = []
+
+[build-dependencies]
+cc = { version = "^1.0.70", features = ["parallel"] }
diff --git a/extern/supraseal/c2/README.md b/extern/supraseal/c2/README.md
new file mode 100644
index 000000000..2a084e40e
--- /dev/null
+++ b/extern/supraseal/c2/README.md
@@ -0,0 +1,11 @@
+# Commit 2
+
+The final step of the sealing process is to generate a zkSNARK for the proof of replication (porep). Using the inclusion proofs from C1, the inputs are put through the porep circuit and a proof generated using Groth16.
+
+## Intended Usage
+
+The SupraSeal C2 operations are different than the rest of the library in that there are dependencies on primitives in external libraries. Specifically with bellperson through the use of a modified version of synthesize_circuits_batch() to generate the witness. From there the vectors are put through various MSM and NTT kernels on GPU and CPU. Note this requires the usage of a Rust based interface as opposed to the C/C++ seen throughout SupraSeal.
+
+bellperson v0.26 interfaces to this implementation through `cuda-supraseal` feature.
+
+To perform a 32GiB test/benchmark change directory to `demos/c2-test` and execute `cargo test --release -- --nocapture`. It's assumed that you've previously fetched the corresponding parameters. The expected execution time for the test is approximately 2-3 minutes depending on system.
diff --git a/extern/supraseal/c2/build.rs b/extern/supraseal/c2/build.rs
new file mode 100644
index 000000000..3f42899a0
--- /dev/null
+++ b/extern/supraseal/c2/build.rs
@@ -0,0 +1,52 @@
+use std::env;
+
+fn main() {
+ groth16_cuda();
+}
+
+fn groth16_cuda() {
+ let mut nvcc = cc::Build::new();
+ nvcc.cuda(true);
+ nvcc.flag("-arch=sm_80");
+ nvcc.flag("-gencode").flag("arch=compute_70,code=sm_70");
+ nvcc.flag("-t0");
+ nvcc.define("TAKE_RESPONSIBILITY_FOR_ERROR_MESSAGE", None);
+ nvcc.define("FEATURE_BLS12_381", None);
+ apply_blst_flags(&mut nvcc);
+ if let Some(include) = env::var_os("DEP_BLST_C_SRC") {
+ nvcc.include(&include);
+ }
+ if let Some(include) = env::var_os("DEP_SPPARK_ROOT") {
+ nvcc.include(include);
+ }
+ nvcc.flag("-Xcompiler").flag("-Wno-subobject-linkage");
+ nvcc.flag("-Xcompiler").flag("-Wno-unused-function");
+
+ nvcc.file("cuda/groth16_cuda.cu").compile("groth16_cuda");
+
+ println!("cargo:rerun-if-changed=cuda");
+ println!("cargo:rerun-if-env-changed=CXXFLAGS");
+}
+
+fn apply_blst_flags(nvcc: &mut cc::Build) {
+ let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
+
+ match (cfg!(feature = "portable"), cfg!(feature = "force-adx")) {
+ (true, false) => {
+ nvcc.define("__BLST_PORTABLE__", None);
+ }
+ (false, true) => {
+ if target_arch.eq("x86_64") {
+ nvcc.define("__ADX__", None);
+ }
+ }
+ (false, false) =>
+ {
+ #[cfg(target_arch = "x86_64")]
+ if target_arch.eq("x86_64") && std::is_x86_feature_detected!("adx") {
+ nvcc.define("__ADX__", None);
+ }
+ }
+ (true, true) => panic!("Cannot compile with both `portable` and `force-adx` features"),
+ }
+}
diff --git a/extern/supraseal/c2/cuda/groth16_cuda.cu b/extern/supraseal/c2/cuda/groth16_cuda.cu
new file mode 100644
index 000000000..affab5c9e
--- /dev/null
+++ b/extern/supraseal/c2/cuda/groth16_cuda.cu
@@ -0,0 +1,698 @@
+// Copyright Supranational LLC
+
+#include
+#include
+#include
+#include
+#include
+
+#if defined(FEATURE_BLS12_381)
+# include
+#else
+# error "only FEATURE_BLS12_381 is supported"
+#endif
+
+#include
+#include
+
+typedef jacobian_t point_t;
+typedef xyzz_t bucket_t;
+typedef bucket_t::affine_t affine_t;
+
+typedef jacobian_t point_fp2_t;
+typedef xyzz_t bucket_fp2_t;
+typedef bucket_fp2_t::affine_t affine_fp2_t;
+
+typedef fr_t scalar_t;
+
+#define SPPARK_DONT_INSTANTIATE_TEMPLATES
+#include
+#include
+
+template
+struct Assignment {
+ // Density of queries
+ const uint64_t* a_aux_density;
+ size_t a_aux_bit_len;
+ size_t a_aux_popcount;
+
+ const uint64_t* b_inp_density;
+ size_t b_inp_bit_len;
+ size_t b_inp_popcount;
+
+ const uint64_t* b_aux_density;
+ size_t b_aux_bit_len;
+ size_t b_aux_popcount;
+
+ // Evaluations of A, B, C polynomials
+ const Scalar* a;
+ const Scalar* b;
+ const Scalar* c;
+ size_t abc_size;
+
+ // Assignments of variables
+ const Scalar* inp_assignment_data;
+ size_t inp_assignment_size;
+
+ const Scalar* aux_assignment_data;
+ size_t aux_assignment_size;
+};
+
+#include "groth16_ntt_h.cu"
+#include "groth16_split_msm.cu"
+
+template
+static void mult(point_t& ret, const affine_t point, const scalar_t& fr,
+ size_t top = scalar_t::nbits)
+{
+#ifndef __CUDA_ARCH__
+ scalar_t::pow_t scalar;
+ fr.to_scalar(scalar);
+
+ mult(ret, point, scalar, top);
+#endif
+}
+
+static thread_pool_t groth16_pool;
+
+struct msm_results {
+ std::vector h;
+ std::vector l;
+ std::vector a;
+ std::vector b_g1;
+ std::vector b_g2;
+
+ msm_results(size_t num_circuits) : h(num_circuits),
+ l(num_circuits),
+ a(num_circuits),
+ b_g1(num_circuits),
+ b_g2(num_circuits) {}
+};
+
+struct groth16_proof {
+ point_t::affine_t a;
+ point_fp2_t::affine_t b;
+ point_t::affine_t c;
+};
+
+#include "groth16_srs.cuh"
+
+#if defined(_MSC_VER) && !defined(__clang__) && !defined(__builtin_popcountll)
+#define __builtin_popcountll(x) __popcnt64(x)
+#endif
+
+extern "C"
+RustError::by_value generate_groth16_proofs_c(const Assignment provers[],
+ size_t num_circuits,
+ const fr_t r_s[], const fr_t s_s[],
+ groth16_proof proofs[], SRS& srs)
+{
+ // Mutex to serialize execution of this subroutine
+ static std::mutex mtx;
+ std::lock_guard lock(mtx);
+
+ if (!ngpus()) {
+ return RustError{ENODEV, "No CUDA devices available"};
+ }
+
+ const verifying_key* vk = &srs.get_vk();
+
+ auto points_h = srs.get_h_slice();
+ auto points_l = srs.get_l_slice();
+ auto points_a = srs.get_a_slice();
+ auto points_b_g1 = srs.get_b_g1_slice();
+ auto points_b_g2 = srs.get_b_g2_slice();
+
+ for (size_t c = 0; c < num_circuits; c++) {
+ auto& p = provers[c];
+
+ assert(points_l.size() == p.aux_assignment_size);
+ assert(points_a.size() == p.inp_assignment_size + p.a_aux_popcount);
+ assert(points_b_g1.size() == p.b_inp_popcount + p.b_aux_popcount);
+ assert(p.a_aux_bit_len == p.aux_assignment_size);
+ assert(p.b_aux_bit_len == p.aux_assignment_size);
+ assert(p.b_inp_bit_len == p.inp_assignment_size);
+ }
+
+ bool l_split_msm = true, a_split_msm = true,
+ b_split_msm = true;
+ size_t l_popcount = 0, a_popcount = 0, b_popcount = 0;
+
+ split_vectors split_vectors_l{num_circuits, points_l.size()};
+ split_vectors split_vectors_a{num_circuits, points_a.size()};
+ split_vectors split_vectors_b{num_circuits, points_b_g1.size()};
+
+ std::vector tail_msm_l_bases,
+ tail_msm_a_bases,
+ tail_msm_b_g1_bases;
+ std::vector tail_msm_b_g2_bases;
+
+ msm_results results{num_circuits};
+
+ semaphore_t barrier;
+ std::atomic caught_exception{false};
+ size_t n_gpus = std::min(ngpus(), num_circuits);
+
+ std::thread prep_msm_thread([&, num_circuits]
+ {
+ // pre-processing step
+ // mark inp and significant scalars in aux assignments
+ groth16_pool.par_map(num_circuits, [&](size_t c) {
+ auto& prover = provers[c];
+ auto& l_bit_vector = split_vectors_l.bit_vector[c];
+ auto& a_bit_vector = split_vectors_a.bit_vector[c];
+ auto& b_bit_vector = split_vectors_b.bit_vector[c];
+
+ size_t a_bits_cursor = 0, b_bits_cursor = 0;
+ uint64_t a_bits = 0, b_bits = 0;
+ uint32_t a_bit_off = 0, b_bit_off = 0;
+
+ size_t inp_size = prover.inp_assignment_size;
+
+ for (size_t i = 0; i < inp_size; i += CHUNK_BITS) {
+ uint64_t b_map = prover.b_inp_density[i / CHUNK_BITS];
+ uint64_t map_mask = 1;
+ size_t chunk_bits = std::min(CHUNK_BITS, inp_size - i);
+
+ for (size_t j = 0; j < chunk_bits; j++, map_mask <<= 1) {
+ a_bits |= map_mask;
+
+ if (b_map & map_mask) {
+ b_bits |= (uint64_t)1 << b_bit_off;
+ if (++b_bit_off == CHUNK_BITS) {
+ b_bit_off = 0;
+ b_bit_vector[b_bits_cursor++] = b_bits;
+ b_bits = 0;
+ }
+ }
+ }
+
+ a_bit_vector[i / CHUNK_BITS] = a_bits;
+ if (chunk_bits == CHUNK_BITS)
+ a_bits = 0;
+ }
+
+ a_bits_cursor = inp_size / CHUNK_BITS;
+ a_bit_off = inp_size % CHUNK_BITS;
+
+ auto* aux_assignment = prover.aux_assignment_data;
+ size_t aux_size = prover.aux_assignment_size;
+
+ for (size_t i = 0; i < aux_size; i += CHUNK_BITS) {
+ uint64_t a_map = prover.a_aux_density[i / CHUNK_BITS];
+ uint64_t b_map = prover.b_aux_density[i / CHUNK_BITS];
+ uint64_t l_bits = 0;
+ uint64_t map_mask = 1;
+ size_t chunk_bits = std::min(CHUNK_BITS, aux_size - i);
+
+ for (size_t j = 0; j < chunk_bits; j++, map_mask <<= 1) {
+ const fr_t& scalar = aux_assignment[i + j];
+
+ bool is_one = scalar.is_one();
+ bool is_zero = scalar.is_zero();
+
+ if (!is_zero && !is_one)
+ l_bits |= map_mask;
+
+ if (a_map & map_mask) {
+ if (!is_zero && !is_one) {
+ a_bits |= ((uint64_t)1 << a_bit_off);
+ }
+
+ if (++a_bit_off == CHUNK_BITS) {
+ a_bit_off = 0;
+ a_bit_vector[a_bits_cursor++] = a_bits;
+ a_bits = 0;
+ }
+ }
+
+ if (b_map & map_mask) {
+ if (!is_zero && !is_one) {
+ b_bits |= ((uint64_t)1 << b_bit_off);
+ }
+
+ if (++b_bit_off == CHUNK_BITS) {
+ b_bit_off = 0;
+ b_bit_vector[b_bits_cursor++] = b_bits;
+ b_bits = 0;
+ }
+ }
+ }
+
+ l_bit_vector[i / CHUNK_BITS] = l_bits;
+ }
+
+ if (a_bit_off)
+ a_bit_vector[a_bits_cursor] = a_bits;
+
+ if (b_bit_off)
+ b_bit_vector[b_bits_cursor] = b_bits;
+ });
+
+ if (caught_exception)
+ return;
+
+ // merge all the masks from aux_assignments and count set bits
+ std::vector tail_msm_l_mask(split_vectors_l.bit_vector_size);
+ std::vector tail_msm_a_mask(split_vectors_a.bit_vector_size);
+ std::vector tail_msm_b_mask(split_vectors_b.bit_vector_size);
+
+ for (size_t i = 0; i < tail_msm_l_mask.size(); i++) {
+ uint64_t mask = split_vectors_l.bit_vector[0][i];
+ for (size_t c = 1; c < num_circuits; c++)
+ mask |= split_vectors_l.bit_vector[c][i];
+ tail_msm_l_mask[i] = mask;
+ l_popcount += __builtin_popcountll(mask);
+ }
+
+ for (size_t i = 0; i < tail_msm_a_mask.size(); i++) {
+ uint64_t mask = split_vectors_a.bit_vector[0][i];
+ for (size_t c = 1; c < num_circuits; c++)
+ mask |= split_vectors_a.bit_vector[c][i];
+ tail_msm_a_mask[i] = mask;
+ a_popcount += __builtin_popcountll(mask);
+ }
+
+ for (size_t i = 0; i < tail_msm_b_mask.size(); i++) {
+ uint64_t mask = split_vectors_b.bit_vector[0][i];
+ for (size_t c = 1; c < num_circuits; c++)
+ mask |= split_vectors_b.bit_vector[c][i];
+ tail_msm_b_mask[i] = mask;
+ b_popcount += __builtin_popcountll(mask);
+ }
+
+ if (caught_exception)
+ return;
+
+ if (l_split_msm = (l_popcount <= points_l.size() / 2)) {
+ split_vectors_l.tail_msms_resize(l_popcount);
+ tail_msm_l_bases.resize(l_popcount);
+ }
+
+ if (a_split_msm = (a_popcount <= points_a.size() / 2)) {
+ split_vectors_a.tail_msms_resize(a_popcount);
+ tail_msm_a_bases.resize(a_popcount);
+ } else {
+ split_vectors_a.tail_msms_resize(points_a.size());
+ }
+
+ if (b_split_msm = (b_popcount <= points_b_g1.size() / 2)) {
+ split_vectors_b.tail_msms_resize(b_popcount);
+ tail_msm_b_g1_bases.resize(b_popcount);
+ tail_msm_b_g2_bases.resize(b_popcount);
+ } else {
+ split_vectors_b.tail_msms_resize(points_b_g1.size());
+ }
+
+ // populate bitmaps for batch additions, bases and scalars for tail msms
+ groth16_pool.par_map(num_circuits, [&](size_t c) {
+ auto& prover = provers[c];
+ auto& l_bit_vector = split_vectors_l.bit_vector[c];
+ auto& a_bit_vector = split_vectors_a.bit_vector[c];
+ auto& b_bit_vector = split_vectors_b.bit_vector[c];
+ auto& tail_msm_l_scalars = split_vectors_l.tail_msm_scalars[c];
+ auto& tail_msm_a_scalars = split_vectors_a.tail_msm_scalars[c];
+ auto& tail_msm_b_scalars = split_vectors_b.tail_msm_scalars[c];
+
+ size_t a_cursor = 0, b_cursor = 0;
+
+ uint32_t a_bit_off = 0, b_bit_off = 0;
+ size_t a_bits_cursor = 0, b_bits_cursor = 0;
+
+ auto* inp_assignment = prover.inp_assignment_data;
+ size_t inp_size = prover.inp_assignment_size;
+
+ for (size_t i = 0; i < inp_size; i += CHUNK_BITS) {
+ uint64_t b_map = prover.b_inp_density[i / CHUNK_BITS];
+ size_t chunk_bits = std::min(CHUNK_BITS, inp_size - i);
+
+ for (size_t j = 0; j < chunk_bits; j++, b_map >>= 1) {
+ const fr_t& scalar = inp_assignment[i + j];
+
+ if (b_map & 1) {
+ if (c == 0 && b_split_msm) {
+ tail_msm_b_g1_bases[b_cursor] = points_b_g1[b_cursor];
+ tail_msm_b_g2_bases[b_cursor] = points_b_g2[b_cursor];
+ }
+ tail_msm_b_scalars[b_cursor] = scalar;
+ b_cursor++;
+
+ if (++b_bit_off == CHUNK_BITS) {
+ b_bit_off = 0;
+ b_bit_vector[b_bits_cursor++] = 0;
+ }
+ }
+
+ if (c == 0 && a_split_msm)
+ tail_msm_a_bases[a_cursor] = points_a[a_cursor];
+ tail_msm_a_scalars[a_cursor] = scalar;
+ a_cursor++;
+ }
+
+ a_bit_vector[i / CHUNK_BITS] = 0;
+ }
+
+ assert(b_cursor == prover.b_inp_popcount);
+
+ a_bits_cursor = inp_size / CHUNK_BITS;
+ a_bit_off = inp_size % CHUNK_BITS;
+
+ uint64_t a_mask = tail_msm_a_mask[a_bits_cursor], a_bits = 0;
+ uint64_t b_mask = tail_msm_b_mask[b_bits_cursor], b_bits = 0;
+
+ size_t points_a_cursor = a_cursor,
+ points_b_cursor = b_cursor,
+ l_cursor = 0;
+
+ auto* aux_assignment = prover.aux_assignment_data;
+ size_t aux_size = prover.aux_assignment_size;
+
+ for (size_t i = 0; i < aux_size; i += CHUNK_BITS) {
+ uint64_t a_map = prover.a_aux_density[i / CHUNK_BITS];
+ uint64_t b_map = prover.b_aux_density[i / CHUNK_BITS];
+ uint64_t l_map = tail_msm_l_mask[i / CHUNK_BITS], l_bits = 0;
+ uint64_t map_mask = 1;
+
+ size_t chunk_bits = std::min(CHUNK_BITS, aux_size - i);
+ for (size_t j = 0; j < chunk_bits; j++, map_mask <<= 1) {
+ const fr_t& scalar = aux_assignment[i + j];
+ bool is_one = scalar.is_one();
+
+ if (l_split_msm) {
+ if (is_one)
+ l_bits |= map_mask;
+
+ if (l_map & map_mask) {
+ if (c == 0)
+ tail_msm_l_bases[l_cursor] = points_l[i+j];
+ tail_msm_l_scalars[l_cursor] = czero(scalar, is_one);
+ l_cursor++;
+ }
+ }
+
+ if (a_split_msm) {
+ if (a_map & map_mask) {
+ uint64_t mask = (uint64_t)1 << a_bit_off;
+
+ if (a_mask & mask) {
+ if (c == 0)
+ tail_msm_a_bases[a_cursor] = points_a[points_a_cursor];
+ tail_msm_a_scalars[a_cursor] = czero(scalar, is_one);
+ a_cursor++;
+ }
+
+ points_a_cursor++;
+
+ if (is_one)
+ a_bits |= mask;
+
+ if (++a_bit_off == CHUNK_BITS) {
+ a_bit_off = 0;
+ a_bit_vector[a_bits_cursor++] = a_bits;
+ a_bits = 0;
+ a_mask = tail_msm_a_mask[a_bits_cursor];
+ }
+ }
+ } else {
+ if (a_map & map_mask) {
+ tail_msm_a_scalars[a_cursor] = scalar;
+ a_cursor++;
+ }
+ }
+
+ if (b_split_msm) {
+ if (b_map & map_mask) {
+ uint64_t mask = (uint64_t)1 << b_bit_off;
+
+ if (b_mask & mask) {
+ if (c == 0) {
+ tail_msm_b_g1_bases[b_cursor] =
+ points_b_g1[points_b_cursor];
+ tail_msm_b_g2_bases[b_cursor] =
+ points_b_g2[points_b_cursor];
+ }
+ tail_msm_b_scalars[b_cursor] = czero(scalar,
+ is_one);
+ b_cursor++;
+ }
+
+ points_b_cursor++;
+
+ if (is_one)
+ b_bits |= mask;
+
+ if (++b_bit_off == CHUNK_BITS) {
+ b_bit_off = 0;
+ b_bit_vector[b_bits_cursor++] = b_bits;
+ b_bits = 0;
+ b_mask = tail_msm_b_mask[b_bits_cursor];
+ }
+ }
+ } else {
+ if (b_map & map_mask) {
+ tail_msm_b_scalars[b_cursor] = scalar;
+ b_cursor++;
+ }
+ }
+ }
+
+ l_bit_vector[i / CHUNK_BITS] = l_bits;
+ }
+
+ if (a_bit_off)
+ a_bit_vector[a_bits_cursor] = a_bits;
+
+ if (b_bit_off)
+ b_bit_vector[b_bits_cursor] = b_bits;
+
+ if (l_split_msm)
+ assert(l_cursor == l_popcount);
+
+ if (a_split_msm) {
+ assert(points_a_cursor == points_a.size());
+ assert(a_cursor == a_popcount);
+ } else {
+ assert(a_cursor == points_a.size());
+ }
+
+ if (b_split_msm) {
+ assert(points_b_cursor == points_b_g1.size());
+ assert(b_cursor == b_popcount);
+ } else {
+ assert(b_cursor == points_b_g1.size());
+ }
+
+ });
+ // end of pre-processing step
+
+ for (size_t i = 0; i < n_gpus; i++)
+ barrier.notify();
+
+ if (caught_exception)
+ return;
+
+ // tail MSM b_g2 - on CPU
+ for (size_t c = 0; c < num_circuits; c++) {
+#ifndef __CUDA_ARCH__
+ mult_pippenger(results.b_g2[c],
+ b_split_msm ? tail_msm_b_g2_bases.data() :
+ points_b_g2.data(),
+ split_vectors_b.tail_msm_scalars[c].size(),
+ split_vectors_b.tail_msm_scalars[c].data(),
+ true, &groth16_pool);
+#endif
+
+ if (caught_exception)
+ return;
+ }
+ });
+
+ batch_add_results batch_add_res{num_circuits};
+ std::vector per_gpu;
+ RustError ret{cudaSuccess};
+
+ for (size_t tid = 0; tid < n_gpus; tid++) {
+ per_gpu.emplace_back(std::thread([&, tid, n_gpus](size_t num_circuits)
+ {
+ const gpu_t& gpu = select_gpu(tid);
+
+ size_t rem = num_circuits % n_gpus;
+ num_circuits /= n_gpus;
+ num_circuits += tid < rem;
+ size_t circuit0 = tid * num_circuits;
+ if (tid >= rem)
+ circuit0 += rem;
+
+ try {
+ {
+ size_t d_a_sz = sizeof(fr_t) << (lg2(points_h.size() - 1) + 1);
+ gpu_ptr_t d_a{(scalar_t*)gpu.Dmalloc(d_a_sz)};
+
+ for (size_t c = circuit0; c < circuit0 + num_circuits; c++) {
+#ifndef __CUDA_ARCH__
+ ntt_msm_h::execute_ntt_msm_h(gpu, d_a, provers[c],
+ points_h,
+ results.h[c]);
+#endif
+ if (caught_exception)
+ return;
+ }
+ }
+
+ barrier.wait();
+
+ if (caught_exception)
+ return;
+
+ if (l_split_msm) {
+ // batch addition L - on GPU
+ execute_batch_addition(gpu, circuit0, num_circuits,
+ points_l, split_vectors_l,
+ &batch_add_res.l[circuit0]);
+
+ if (caught_exception)
+ return;
+ }
+
+ if (a_split_msm) {
+ // batch addition a - on GPU
+ execute_batch_addition(gpu, circuit0, num_circuits,
+ points_a, split_vectors_a,
+ &batch_add_res.a[circuit0]);
+
+ if (caught_exception)
+ return;
+ }
+
+ if (b_split_msm) {
+ // batch addition b_g1 - on GPU
+ execute_batch_addition(gpu, circuit0, num_circuits,
+ points_b_g1, split_vectors_b,
+ &batch_add_res.b_g1[circuit0]);
+
+ if (caught_exception)
+ return;
+
+ // batch addition b_g2 - on GPU
+ execute_batch_addition(gpu, circuit0,
+ num_circuits, points_b_g2,
+ split_vectors_b, &batch_add_res.b_g2[circuit0]);
+
+ if (caught_exception)
+ return;
+ }
+
+ {
+ msm_t msm{nullptr,
+ (l_popcount + a_popcount + b_popcount) / 3};
+
+ for (size_t c = circuit0; c < circuit0+num_circuits; c++) {
+ // tail MSM l - on GPU
+ if (l_split_msm)
+ msm.invoke(results.l[c], tail_msm_l_bases,
+ split_vectors_l.tail_msm_scalars[c], true);
+ else
+ msm.invoke(results.l[c], points_l,
+ provers[c].aux_assignment_data, true);
+
+ if (caught_exception)
+ return;
+
+ // tail MSM a - on GPU
+ if (a_split_msm)
+ msm.invoke(results.a[c], tail_msm_a_bases,
+ split_vectors_a.tail_msm_scalars[c], true);
+ else
+ msm.invoke(results.a[c], points_a,
+ split_vectors_a.tail_msm_scalars[c], true);
+
+ if (caught_exception)
+ return;
+
+ // tail MSM b_g1 - on GPU
+ if (b_split_msm)
+ msm.invoke(results.b_g1[c], tail_msm_b_g1_bases,
+ split_vectors_b.tail_msm_scalars[c], true);
+ else
+ msm.invoke(results.b_g1[c], points_b_g1,
+ split_vectors_b.tail_msm_scalars[c], true);
+
+ if (caught_exception)
+ return;
+ }
+ }
+ } catch (const cuda_error& e) {
+ bool already = caught_exception.exchange(true);
+ if (!already) {
+ for (size_t i = 1; i < n_gpus; i++)
+ barrier.notify();
+#ifdef TAKE_RESPONSIBILITY_FOR_ERROR_MESSAGE
+ ret = RustError{e.code(), e.what()};
+#else
+ ret = RustError{e.code()};
+#endif
+ }
+ gpu.sync();
+ }
+ }, num_circuits));
+ }
+
+ prep_msm_thread.join();
+ for (auto& tid : per_gpu)
+ tid.join();
+
+ if (caught_exception)
+ return ret;
+
+ for (size_t circuit = 0; circuit < num_circuits; circuit++) {
+ if (l_split_msm)
+ results.l[circuit].add(batch_add_res.l[circuit]);
+ if (a_split_msm)
+ results.a[circuit].add(batch_add_res.a[circuit]);
+ if (b_split_msm) {
+ results.b_g1[circuit].add(batch_add_res.b_g1[circuit]);
+ results.b_g2[circuit].add(batch_add_res.b_g2[circuit]);
+ }
+
+ fr_t r = r_s[circuit], s = s_s[circuit];
+ fr_t rs = r * s;
+ // we want the scalars to be in Montomery form when passing them to
+ // "mult" routine
+
+ point_t g_a, g_c, a_answer, b1_answer, vk_delta_g1_rs, vk_alpha_g1_s,
+ vk_beta_g1_r;
+ point_fp2_t g_b;
+
+ mult(vk_delta_g1_rs, vk->delta_g1, rs);
+ mult(vk_alpha_g1_s, vk->alpha_g1, s);
+ mult(vk_beta_g1_r, vk->beta_g1, r);
+
+ mult(b1_answer, results.b_g1[circuit], r);
+
+ // A
+ mult(g_a, vk->delta_g1, r);
+ g_a.add(vk->alpha_g1);
+ g_a.add(results.a[circuit]);
+
+ // B
+ mult(g_b, vk->delta_g2, s);
+ g_b.add(vk->beta_g2);
+ g_b.add(results.b_g2[circuit]);
+
+ // C
+ mult(g_c, results.a[circuit], s);
+ g_c.add(b1_answer);
+ g_c.add(vk_delta_g1_rs);
+ g_c.add(vk_alpha_g1_s);
+ g_c.add(vk_beta_g1_r);
+ g_c.add(results.h[circuit]);
+ g_c.add(results.l[circuit]);
+
+ // to affine
+ proofs[circuit].a = g_a;
+ proofs[circuit].b = g_b;
+ proofs[circuit].c = g_c;
+ }
+
+ return ret;
+}
diff --git a/extern/supraseal/c2/cuda/groth16_ntt_h.cu b/extern/supraseal/c2/cuda/groth16_ntt_h.cu
new file mode 100644
index 000000000..6d072d89d
--- /dev/null
+++ b/extern/supraseal/c2/cuda/groth16_ntt_h.cu
@@ -0,0 +1,127 @@
+// Copyright Supranational LLC
+
+#include
+
+__launch_bounds__(1024)
+__global__ void coeff_wise_mult(fr_t* a, const fr_t* b, uint32_t lg_domain_size)
+{
+ uint32_t idx0 = threadIdx.x + blockIdx.x * blockDim.x;
+ size_t limit = (size_t)1 << lg_domain_size;
+
+ for (size_t idx = idx0; idx < limit; idx += blockDim.x * gridDim.x)
+ a[idx] *= b[idx];
+}
+
+__launch_bounds__(1024)
+__global__ void sub_mult_with_constant(fr_t* a, const fr_t* c, fr_t z,
+ uint32_t lg_domain_size)
+{
+ uint32_t idx0 = threadIdx.x + blockIdx.x * blockDim.x;
+ size_t limit = (size_t)1 << lg_domain_size;
+
+ for (size_t idx = idx0; idx < limit; idx += blockDim.x * gridDim.x) {
+ fr_t r = a[idx] - c[idx];
+ a[idx] = r * z;
+ }
+}
+
+#ifndef __CUDA_ARCH__
+
+const size_t gib = (size_t)1 << 30;
+
+class ntt_msm_h : public NTT {
+private:
+ static fr_t calculate_z_inv(size_t lg_domain_size) {
+ fr_t gen_pow = group_gen;
+ while (lg_domain_size--)
+ gen_pow ^= 2;
+ return (gen_pow - fr_t::one()).reciprocal();
+ }
+
+ static void execute_ntts_single(fr_t* d_inout, const fr_t* in,
+ size_t lg_domain_size, size_t actual_size,
+ stream_t& stream)
+ {
+ size_t domain_size = (size_t)1 << lg_domain_size;
+
+ assert(actual_size <= domain_size);
+
+ stream.HtoD(&d_inout[0], in, actual_size);
+
+ if (actual_size < domain_size) {
+ cudaMemsetAsync(&d_inout[actual_size], 0,
+ (domain_size - actual_size) * sizeof(fr_t), stream);
+ }
+
+ NTT_internal(&d_inout[0], lg_domain_size,
+ NTT::InputOutputOrder::NR, NTT::Direction::inverse,
+ NTT::Type::standard, stream);
+ NTT_internal(&d_inout[0], lg_domain_size,
+ NTT::InputOutputOrder::RN, NTT::Direction::forward,
+ NTT::Type::coset, stream);
+ }
+
+ static int lg2(size_t n)
+ { int ret = 0; while (n >>= 1) ret++; return ret; }
+
+public:
+
+ // a, b, c = coset_ntt(intt(a, b, c))
+ // a *= b
+ // a -= c
+ // a[i] /= (multiplicative_gen^domain_size) - 1
+ // a = coset_intt(a)
+ // a is the result vector
+ static void execute_ntt_msm_h(const gpu_t& gpu, gpu_ptr_t d_a,
+ const Assignment& input,
+ slice_t points_h,
+ point_t& result_h)
+ {
+ size_t actual_size = input.abc_size;
+ size_t npoints = points_h.size();
+ size_t lg_domain_size = lg2(npoints - 1) + 1;
+ size_t domain_size = (size_t)1 << lg_domain_size;
+
+ fr_t z_inv = calculate_z_inv(lg_domain_size);
+
+ int sm_count = gpu.props().multiProcessorCount;
+
+ bool lot_of_memory = 3 * domain_size * sizeof(fr_t) <
+ gpu.props().totalGlobalMem - gib;
+ {
+ dev_ptr_t d_b(domain_size * (lot_of_memory + 1));
+ fr_t* d_c = &d_b[domain_size * lot_of_memory];
+
+ event_t sync_event;
+
+ execute_ntts_single(&d_a[0], input.a, lg_domain_size,
+ actual_size, gpu[0]);
+ sync_event.record(gpu[0]);
+
+ execute_ntts_single(&d_b[0], input.b, lg_domain_size,
+ actual_size, gpu[1]);
+
+ sync_event.wait(gpu[1]);
+ coeff_wise_mult<<>>
+ (&d_a[0], &d_b[0], (index_t)lg_domain_size);
+ sync_event.record(gpu[1]);
+
+ execute_ntts_single(&d_c[0], input.c, lg_domain_size,
+ actual_size, gpu[1 + lot_of_memory]);
+
+ sync_event.wait(gpu[1 + lot_of_memory]);
+ sub_mult_with_constant<<>>
+ (&d_a[0], &d_c[0], z_inv, (index_t)lg_domain_size);
+ }
+
+ NTT_internal(&d_a[0], lg_domain_size, NTT::InputOutputOrder::NN,
+ NTT::Direction::inverse, NTT::Type::coset, gpu[1 + lot_of_memory]);
+
+ gpu[1 + lot_of_memory].sync();
+
+ msm_t msm(nullptr, npoints);
+ msm.invoke(result_h, points_h, d_a, true);
+ }
+};
+
+#endif
diff --git a/extern/supraseal/c2/cuda/groth16_split_msm.cu b/extern/supraseal/c2/cuda/groth16_split_msm.cu
new file mode 100644
index 000000000..a164f3300
--- /dev/null
+++ b/extern/supraseal/c2/cuda/groth16_split_msm.cu
@@ -0,0 +1,134 @@
+// Copyright Supranational LLC
+
+#include
+
+template __global__
+void batch_addition(bucket_t::mem_t ret_[],
+ const affine_t::mem_t points_[], uint32_t npoints,
+ const uint32_t bitmap[], bool accumulate,
+ uint32_t sid);
+
+template __global__
+void batch_addition(bucket_fp2_t::mem_t ret_[],
+ const affine_fp2_t::mem_t points_[],
+ uint32_t npoints, const uint32_t bitmap[],
+ bool accumulate, uint32_t sid);
+
+struct batch_add_results {
+ std::vector l;
+ std::vector a;
+ std::vector b_g1;
+ std::vector b_g2;
+
+ batch_add_results(size_t num_circuits) : l(num_circuits),
+ a(num_circuits),
+ b_g1(num_circuits),
+ b_g2(num_circuits) { }
+};
+
+template class uninit {
+ T val;
+public:
+ uninit() { } // don't zero std::vector>
+ uninit(T v) { val = v; }
+ operator T() const { return val; }
+};
+
+using mask_t = uninit;
+
+const size_t CHUNK_BITS = sizeof(mask_t) * 8; // 64 bits
+
+#define NUM_BATCHES 8
+#define GPU_DIV (32*WARP_SZ)
+
+class split_vectors {
+public:
+ std::vector> bit_vector;
+ std::vector> tail_msm_scalars;
+ size_t batch_size, bit_vector_size;
+
+ split_vectors(size_t num_circuits, size_t num_points)
+ : bit_vector{num_circuits},
+ tail_msm_scalars{num_circuits}
+ {
+ batch_size = (num_points + GPU_DIV - 1) / GPU_DIV;
+ batch_size = (batch_size + NUM_BATCHES - 1) / NUM_BATCHES;
+ batch_size *= GPU_DIV;
+
+ bit_vector_size = (num_points + CHUNK_BITS - 1) / CHUNK_BITS;
+
+ for (size_t c = 0; c < num_circuits; c++) {
+ bit_vector[c].resize(bit_vector_size);
+ }
+ }
+
+ void tail_msms_resize(size_t num_sig_scalars) {
+ size_t num_circuits = tail_msm_scalars.size();
+ for (size_t c = 0; c < num_circuits; c++) {
+ tail_msm_scalars[c].resize(num_sig_scalars);
+ }
+ }
+};
+
+template
+void execute_batch_addition(const gpu_t& gpu,
+ size_t circuit0, size_t num_circuits,
+ slice_t points,
+ const split_vectors& split_vector,
+ point_t batch_add_res[])
+{
+ int sm_count = gpu.sm_count();
+
+ uint32_t nbuckets = sm_count * BATCH_ADD_BLOCK_SIZE / WARP_SZ;
+
+ uint32_t bit_vector_size = (split_vector.bit_vector_size + WARP_SZ - 1) & (0u - WARP_SZ);
+ size_t batch_size = split_vector.batch_size;
+
+ assert(batch_size == (uint32_t)batch_size);
+
+ size_t d_points_size = batch_size * 2 * sizeof(affine_h);
+ size_t d_buckets_size = num_circuits * nbuckets * sizeof(bucket_h);
+
+ dev_ptr_t d_temp{d_points_size + d_buckets_size +
+ num_circuits * bit_vector_size * sizeof(mask_t)};
+
+ vec2d_t d_points{&d_temp[0], (uint32_t)batch_size};
+ vec2d_t d_buckets{&d_temp[d_points_size], nbuckets};
+ vec2d_t d_bit_vectors{&d_temp[d_points_size + d_buckets_size],
+ bit_vector_size};
+
+ uint32_t sid = 0;
+
+ for (size_t c = 0; c < num_circuits; c++)
+ gpu[sid].HtoD(d_bit_vectors[c], split_vector.bit_vector[circuit0 + c]);
+
+ size_t npoints = points.size();
+ for (uint32_t batch = 0; npoints > 0; batch++, sid ^= 1) {
+ uint32_t amount = std::min(npoints, batch_size);
+ size_t cursor = batch * batch_size;
+
+ gpu[sid].HtoD(d_points[sid], &points[cursor], amount);
+
+ for (size_t c = 0; c < num_circuits; c++)
+ gpu[sid].launch_coop(batch_addition,
+ {sm_count, BATCH_ADD_BLOCK_SIZE},
+ d_buckets[c], (const affine_h*)d_points[sid], amount,
+ (const uint32_t*)&d_bit_vectors[c][cursor / CHUNK_BITS],
+ batch > 0, sid);
+
+ npoints -= amount;
+ }
+ sid ^= 1;
+
+ vec2d_t buckets{nbuckets, num_circuits};
+ gpu[sid].DtoH(buckets[0], d_buckets[0], num_circuits * nbuckets);
+ gpu[sid].sync();
+
+ gpu.par_map(num_circuits, 1, [&, batch_add_res, nbuckets](size_t c) {
+ batch_add_res[c] = sum_up(buckets[c], nbuckets);
+ });
+}
diff --git a/extern/supraseal/c2/cuda/groth16_srs.cuh b/extern/supraseal/c2/cuda/groth16_srs.cuh
new file mode 100644
index 000000000..2040684ad
--- /dev/null
+++ b/extern/supraseal/c2/cuda/groth16_srs.cuh
@@ -0,0 +1,471 @@
+// Copyright Supranational LLC
+
+#include
+#include
+#include
+#include
+
+#include
+
+struct verifying_key {
+ affine_t alpha_g1;
+ affine_t beta_g1;
+ affine_fp2_t beta_g2;
+ affine_fp2_t gamma_g2;
+ affine_t delta_g1;
+ affine_fp2_t delta_g2;
+};
+
+#ifdef __CUDA_ARCH__
+typedef uint8_t byte;
+#endif
+
+extern "C" {
+ int blst_p1_deserialize(affine_t*, const byte[96]);
+ int blst_p2_deserialize(affine_fp2_t*, const byte[192]);
+}
+
+class SRS {
+private:
+ // This class assumes that the SRS files used by filecoin have a specific file
+ // layout and assumes some properties of data types that are present in the file
+ //
+ // There are 3 data types in the file:
+ // 4-byte big-endian unsigned integer,
+ // 92-byte BLS12-381 P1 affine point,
+ // 192-byte BLS12-381 P2 affine point
+ //
+ // The layout of the file is as such, in order, without any padding:
+ //
+ // alpha_g1: g1 affine
+ // beta_g1 : g1 affine
+ // beta_g2 : g2 affine
+ // gamma_g2: g2 affine
+ // delta_g1: g1 affine
+ // delta_g2: g2 affine
+ // number of ic points: 4-byte big-endian unsigned integer
+ // ic points: g1 affines
+ // number of h points: 4-byte big-endian unsigned integer
+ // h points: g1 affines
+ // number of l points: 4-byte big-endian unsigned integer
+ // l points: g1 affines
+ // number of a points: 4-byte big-endian unsigned integer
+ // a points: g1 affines
+ // number of b_g1 points: 4-byte big-endian unsigned integer
+ // b_g1 points: g1 affines
+ // number of b_g2 points: 4-byte big-endian unsigned integer
+ // b_g2 points: g2 affines
+ class SRS_internal {
+ friend class SRS;
+
+ private:
+ static const int max_num_circuits = 10;
+
+ static size_t get_num_threads() {
+ int total_threads = groth16_pool.size();
+
+ // Assume that the CPU supports hyperthreading to be on the safe
+ // side and ensure that there are at least max_num_circuits number
+ // of physical cores left available if the SRS is going to be read
+ // concurrently with synthesis
+ // If there are not enough physical cores, just use all of them
+ // and read it.
+ return (total_threads / 2 - max_num_circuits) < max_num_circuits ?
+ (size_t)total_threads / 2 :
+ (size_t)total_threads / 2 - max_num_circuits;
+ }
+
+ // size of p1 affine and p2 affine points in the SRS file in bytes
+ static const size_t p1_affine_size = 96;
+ static const size_t p2_affine_size = 192;
+
+ // 3 p1 affine and 3 p2 affine points are in the verification key. 864 bytes
+ static const size_t vk_offset = p1_affine_size * 3 + p2_affine_size * 3;
+
+ template
+ static T from_big_endian(const unsigned char* ptr) {
+ T res = ptr[0];
+ for (size_t i = 1; i < sizeof(T); i++) {
+ res <<= 8;
+ res |= ptr[i];
+ }
+
+ return res;
+ }
+
+ static size_t get_batch_size(size_t num_points, size_t num_threads) {
+ size_t batch_size = (num_points + num_threads - 1) / num_threads;
+ batch_size = (batch_size + 64 - 1) / 64;
+ return batch_size;
+ }
+
+ static inline size_t read_g1_point(affine_t* point, const byte* srs_ptr)
+ {
+ blst_p1_deserialize(point, srs_ptr);
+ return p1_affine_size;
+ }
+
+ static inline size_t read_g2_point(affine_fp2_t* point, const byte* srs_ptr)
+ {
+ blst_p2_deserialize(point, srs_ptr);
+ return p2_affine_size;
+ }
+
+ static void read_g1_points(slice_t points, const byte* srs_ptr)
+ {
+ size_t num_points = points.size();
+ size_t batch_size = get_batch_size(num_points, get_num_threads());
+
+ const byte (*srs)[p1_affine_size] =
+ reinterpret_cast(srs_ptr);
+
+ groth16_pool.par_map(num_points, batch_size, [&](size_t i) {
+ (void)read_g1_point(const_cast(&points[i]), srs[i]);
+ }, get_num_threads());
+ }
+
+ static void read_g2_points(slice_t points, const byte* srs_ptr)
+ {
+ size_t num_points = points.size();
+ size_t batch_size = get_batch_size(num_points, get_num_threads());
+
+ const byte (*srs)[p2_affine_size] =
+ reinterpret_cast(srs_ptr);
+
+ groth16_pool.par_map(num_points, batch_size, [&](size_t i) {
+ (void)read_g2_point(const_cast(&points[i]), srs[i]);
+ }, get_num_threads());
+ }
+
+ std::thread read_th;
+ mutable std::mutex mtx;
+
+ std::string path;
+ verifying_key vk;
+
+#if 0
+#define H_IS_STD__VECTOR
+ std::vector h;
+#else
+ slice_t h;
+#endif
+ slice_t l, a, b_g1;
+ slice_t b_g2;
+ void* pinned;
+
+ SRS_internal(SRS_internal const&) = delete;
+ void operator=(SRS_internal const&) = delete;
+
+ inline static size_t round_up(size_t sz)
+ { return (sz + 4095) & ((size_t)0 - 4096); }
+
+ public:
+ SRS_internal(const char* srs_path) : path(srs_path), pinned(nullptr) {
+ struct {
+ struct {
+ uint32_t size;
+ size_t off; // in bytes
+ } h, l, a, b_g1, b_g2;
+ } data;
+
+ if (!ngpus()) {
+ throw sppark_error{ENODEV, std::string("No CUDA devices available")};
+ }
+
+ int srs_file = open(srs_path, O_RDONLY);
+
+ if (srs_file < 0) {
+ throw sppark_error{errno, "open(\"%s\") failed: ", srs_path};
+ }
+
+ struct stat st;
+ fstat(srs_file, &st);
+ size_t file_size = st.st_size;
+
+ const byte* srs_ptr = (const byte*)mmap(NULL, file_size, PROT_READ,
+ MAP_PRIVATE, srs_file, 0);
+
+ {
+ int err = errno;
+ close(srs_file);
+ if (srs_ptr == MAP_FAILED) {
+ throw sppark_error{err, "mmap(srs_file) failed: "};
+ }
+ }
+
+ size_t cursor = 0;
+ cursor += read_g1_point(&vk.alpha_g1, srs_ptr + cursor);
+ cursor += read_g1_point(&vk.beta_g1, srs_ptr + cursor);
+ cursor += read_g2_point(&vk.beta_g2, srs_ptr + cursor);
+ cursor += read_g2_point(&vk.gamma_g2, srs_ptr + cursor);
+ cursor += read_g1_point(&vk.delta_g1, srs_ptr + cursor);
+ cursor += read_g2_point(&vk.delta_g2, srs_ptr + cursor);
+
+ if (file_size <= cursor + sizeof(uint32_t)) {
+ munmap(const_cast(srs_ptr), file_size);
+ throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")};
+ }
+ uint32_t vk_ic_size = from_big_endian(srs_ptr + cursor);
+ cursor += sizeof(uint32_t);
+
+ cursor += vk_ic_size * p1_affine_size;
+ if (file_size <= cursor + sizeof(uint32_t)) {
+ munmap(const_cast(srs_ptr), file_size);
+ throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")};
+ }
+ data.h.size = from_big_endian(srs_ptr + cursor);
+ data.h.off = cursor += sizeof(uint32_t);
+
+ cursor += data.h.size * p1_affine_size;
+ if (file_size <= cursor + sizeof(uint32_t)) {
+ munmap(const_cast(srs_ptr), file_size);
+ throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")};
+ }
+ data.l.size = from_big_endian(srs_ptr + cursor);
+ data.l.off = cursor += sizeof(uint32_t);
+
+ cursor += data.l.size * p1_affine_size;
+ if (file_size <= cursor + sizeof(uint32_t)) {
+ munmap(const_cast(srs_ptr), file_size);
+ throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")};
+ }
+ data.a.size = from_big_endian(srs_ptr + cursor);
+ data.a.off = cursor += sizeof(uint32_t);
+
+ cursor += data.a.size * p1_affine_size;
+ if (file_size <= cursor + sizeof(uint32_t)) {
+ munmap(const_cast(srs_ptr), file_size);
+ throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")};
+ }
+ data.b_g1.size = from_big_endian(srs_ptr + cursor);
+ data.b_g1.off = cursor += sizeof(uint32_t);
+
+ cursor += data.b_g1.size * p1_affine_size;
+ if (file_size <= cursor + sizeof(uint32_t)) {
+ munmap(const_cast(srs_ptr), file_size);
+ throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")};
+ }
+ data.b_g2.size = from_big_endian(srs_ptr + cursor);
+ data.b_g2.off = cursor += sizeof(uint32_t);
+
+ cursor += data.b_g2.size * p1_affine_size;
+ if (file_size < cursor) {
+ munmap(const_cast(srs_ptr), file_size);
+ throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")};
+ }
+
+ size_t l_size = round_up(data.l.size * sizeof(affine_t)),
+ a_size = round_up(data.a.size * sizeof(affine_t)),
+ b1_size = round_up(data.b_g1.size * sizeof(affine_t)),
+ b2_size = round_up(data.b_g2.size * sizeof(affine_fp2_t)),
+ total = l_size + a_size + b1_size + b2_size;
+#ifndef H_IS_STD__VECTOR
+ total += round_up(data.h.size * sizeof(affine_t));
+#endif
+
+ cudaError_t cuda_err = cudaHostAlloc(&pinned, total, cudaHostAllocPortable);
+ if (cuda_err != cudaSuccess) {
+ munmap(const_cast(srs_ptr), file_size);
+ CUDA_OK(cuda_err);
+ }
+ byte *ptr = reinterpret_cast(pinned);
+
+ l = slice_t{ptr, data.l.size}; ptr += l_size;
+ a = slice_t{ptr, data.a.size}; ptr += a_size;
+ b_g1 = slice_t{ptr, data.b_g1.size}; ptr += b1_size;
+ b_g2 = slice_t{ptr, data.b_g2.size}; ptr += b2_size;
+
+#ifdef H_IS_STD__VECTOR
+ h.resize(data.h.size);
+#else
+ h = slice_t{ptr, data.h.size};
+#endif
+
+ semaphore_t barrier;
+ read_th = std::thread([&, srs_ptr, file_size, data] {
+ std::lock_guard guard(mtx);
+ barrier.notify();
+
+ read_g1_points(h, srs_ptr + data.h.off);
+ read_g1_points(l, srs_ptr + data.l.off);
+ read_g1_points(a, srs_ptr + data.a.off);
+ read_g1_points(b_g1, srs_ptr + data.b_g1.off);
+ read_g2_points(b_g2, srs_ptr + data.b_g2.off);
+
+ munmap(const_cast(srs_ptr), file_size);
+ });
+ barrier.wait();
+ }
+ ~SRS_internal() {
+ if (read_th.joinable())
+ read_th.join();
+ if (pinned)
+ cudaFreeHost(pinned);
+ }
+ };
+
+ struct inner {
+ const SRS_internal srs;
+ std::atomic ref_cnt;
+ inline inner(const char* srs_path) : srs(srs_path), ref_cnt(1) {}
+ };
+ inner* ptr;
+
+public:
+ SRS(const char* srs_path) { ptr = new inner(srs_path); }
+ SRS(const SRS& r) { *this = r; }
+ ~SRS() {
+ if (ptr && ptr->ref_cnt.fetch_sub(1, std::memory_order_seq_cst) == 1) {
+ delete ptr;
+ }
+ }
+
+ SRS& operator=(const SRS& r) {
+ if (this != &r)
+ (ptr = r.ptr)->ref_cnt.fetch_add(1, std::memory_order_relaxed);
+ return *this;
+ }
+
+ SRS& operator=(SRS&& r) noexcept {
+ if (this != &r) {
+ ptr = r.ptr;
+ r.ptr = nullptr;
+ }
+ return *this;
+ }
+
+ const verifying_key& get_vk() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return ptr->srs.vk;
+ }
+
+ const affine_t* get_h() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return ptr->srs.h.data();
+ }
+
+ const affine_t* get_l() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return ptr->srs.l.data();
+ }
+
+ const affine_t* get_a() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return ptr->srs.a.data();
+ }
+
+ const affine_t* get_b_g1() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return ptr->srs.b_g1.data();
+ }
+
+ const affine_fp2_t* get_b_g2() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return ptr->srs.b_g2.data();
+ }
+
+ const slice_t get_h_slice() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return {ptr->srs.h.data(), ptr->srs.h.size()};
+ }
+
+ const slice_t& get_l_slice() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return ptr->srs.l;
+ }
+
+ const slice_t& get_a_slice() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return ptr->srs.a;
+ }
+
+ const slice_t& get_b_g1_slice() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return ptr->srs.b_g1;
+ }
+
+ const slice_t& get_b_g2_slice() const {
+ std::lock_guard guard(ptr->srs.mtx);
+ return ptr->srs.b_g2;
+ }
+
+ const std::string& get_path() const {
+ return ptr->srs.path;
+ }
+
+ // facilitate return by value through FFI, as SRS::by_value.
+ struct by_value { inner *ptr; };
+ operator by_value() const {
+ ptr->ref_cnt.fetch_add(1, std::memory_order_relaxed);
+ return {ptr};
+ }
+ SRS(by_value v) { ptr = v.ptr; }
+
+ class SRS_cache {
+ std::list> list;
+ std::mutex mtx;
+
+ public:
+ SRS lookup(const char *key)
+ {
+ std::lock_guard lock(mtx);
+
+ for (auto it = list.begin(); it != list.end(); ++it) {
+ if (it->first == key) {
+ if (it != list.begin()) {
+ // move to the beginning of the list
+ list.splice(list.begin(), list, it);
+ }
+ return it->second;
+ }
+ }
+
+ if (list.size() > 3)
+ list.pop_back(); // least recently used
+
+ list.emplace_front(std::make_pair(key, SRS{key}));
+
+ return list.begin()->second;
+ }
+
+ void evict(const char *key)
+ {
+ std::lock_guard lock(mtx);
+
+ list.remove_if([=](decltype(list)::value_type& elem) {
+ return elem.first == key;
+ });
+ }
+ };
+
+ static SRS_cache& cache()
+ {
+ static SRS_cache da_cache;
+ return da_cache;
+ }
+
+ void evict() const { SRS::cache().evict(ptr->srs.path.c_str()); }
+};
+
+extern "C" RustError::by_value create_SRS(SRS& ret, const char* srs_path, bool cache)
+{
+ try {
+ ret = cache ? SRS::cache().lookup(srs_path) : SRS{srs_path};
+ return RustError{cudaSuccess};
+ } catch (const sppark_error& e) {
+#ifdef TAKE_RESPONSIBILITY_FOR_ERROR_MESSAGE
+ return RustError{e.code(), e.what()};
+#else
+ return RustError{e.code()};
+#endif
+ }
+}
+
+extern "C" void evict_SRS(const SRS& ref)
+{ ref.evict(); }
+
+extern "C" void drop_SRS(SRS& ref)
+{ ref.~SRS(); }
+
+extern "C" SRS::by_value clone_SRS(const SRS& rhs)
+{ return rhs; }
diff --git a/extern/supraseal/c2/src/lib.rs b/extern/supraseal/c2/src/lib.rs
new file mode 100644
index 000000000..e48733add
--- /dev/null
+++ b/extern/supraseal/c2/src/lib.rs
@@ -0,0 +1,222 @@
+// Copyright Supranational LLC
+
+sppark::cuda_error!();
+
+use std::path::PathBuf;
+
+#[repr(C)]
+pub struct SRS {
+ ptr: *const core::ffi::c_void,
+}
+
+impl Default for SRS {
+ fn default() -> Self {
+ Self {
+ ptr: core::ptr::null(),
+ }
+ }
+}
+
+impl SRS {
+ pub fn try_new(srs_path: PathBuf, cache: bool) -> Result {
+ extern "C" {
+ fn create_SRS(
+ ret: &mut SRS,
+ srs_path: *const std::os::raw::c_char,
+ cache: bool,
+ ) -> cuda::Error;
+ }
+ let c_srs_path = std::ffi::CString::new(srs_path.to_str().unwrap()).unwrap();
+
+ let mut ret = SRS::default();
+ let err = unsafe { create_SRS(&mut ret, c_srs_path.as_ptr(), cache) };
+ if err.code != 0 {
+ Err(err)
+ } else {
+ Ok(ret)
+ }
+ }
+
+ pub fn evict(&self) {
+ extern "C" {
+ fn evict_SRS(by_ref: &SRS);
+ }
+ unsafe { evict_SRS(self) };
+ }
+}
+
+impl Drop for SRS {
+ fn drop(&mut self) {
+ extern "C" {
+ fn drop_SRS(by_ref: &SRS);
+ }
+ unsafe { drop_SRS(self) };
+ self.ptr = core::ptr::null();
+ }
+}
+
+impl Clone for SRS {
+ fn clone(&self) -> Self {
+ extern "C" {
+ fn clone_SRS(by_ref: &SRS) -> SRS;
+ }
+ unsafe { clone_SRS(self) }
+ }
+}
+
+unsafe impl Sync for SRS {}
+unsafe impl Send for SRS {}
+
+pub fn generate_groth16_proof(
+ ntt_a_scalars: &[*const S],
+ ntt_b_scalars: &[*const S],
+ ntt_c_scalars: &[*const S],
+ ntt_scalars_actual_size: usize,
+ input_assignments: &[*const S],
+ aux_assignments: &[*const S],
+ input_assignments_size: usize,
+ aux_assignments_size: usize,
+ a_aux_density_bv: &[D],
+ b_g1_input_density_bv: &[D],
+ b_g1_aux_density_bv: &[D],
+ a_aux_total_density: usize,
+ b_g1_input_total_density: usize,
+ b_g1_aux_total_density: usize,
+ num_circuits: usize,
+ r_s: &[S],
+ s_s: &[S],
+ proofs: &mut [PR],
+ srs: &SRS,
+) {
+ assert_eq!(ntt_a_scalars.len(), num_circuits);
+ assert_eq!(ntt_b_scalars.len(), num_circuits);
+ assert_eq!(ntt_c_scalars.len(), num_circuits);
+ assert_eq!(input_assignments.len(), num_circuits);
+ assert_eq!(aux_assignments.len(), num_circuits);
+ assert_eq!(r_s.len(), num_circuits);
+ assert_eq!(s_s.len(), num_circuits);
+ assert_eq!(proofs.len(), num_circuits);
+
+ let bv_element_size: usize = std::mem::size_of::() * 8; // length of D in bits
+ assert!(
+ bv_element_size == 64,
+ "only 64-bit elements in bit vectors are supported"
+ );
+
+ assert!(a_aux_density_bv.len() * bv_element_size >= aux_assignments_size);
+ assert!(b_g1_aux_density_bv.len() * bv_element_size >= aux_assignments_size);
+
+ let provers: Vec<_> = (0..num_circuits)
+ .map(|c| Assignment:: {
+ // Density of queries
+ a_aux_density: a_aux_density_bv.as_ptr() as *const _,
+ a_aux_bit_len: aux_assignments_size,
+ a_aux_popcount: a_aux_total_density,
+
+ b_inp_density: b_g1_input_density_bv.as_ptr() as *const _,
+ b_inp_bit_len: input_assignments_size,
+ b_inp_popcount: b_g1_input_total_density,
+
+ b_aux_density: b_g1_aux_density_bv.as_ptr() as *const _,
+ b_aux_bit_len: aux_assignments_size,
+ b_aux_popcount: b_g1_aux_total_density,
+
+ // Evaluations of A, B, C polynomials
+ a: ntt_a_scalars[c],
+ b: ntt_b_scalars[c],
+ c: ntt_c_scalars[c],
+ abc_size: ntt_scalars_actual_size,
+
+ // Assignments of variables
+ inp_assignment_data: input_assignments[c],
+ inp_assignment_size: input_assignments_size,
+
+ aux_assignment_data: aux_assignments[c],
+ aux_assignment_size: aux_assignments_size,
+ })
+ .collect();
+
+ let err = unsafe {
+ generate_groth16_proofs_c(
+ provers.as_ptr() as *const _,
+ num_circuits,
+ r_s.as_ptr() as *const _,
+ s_s.as_ptr() as *const _,
+ proofs.as_mut_ptr() as *mut _,
+ srs,
+ )
+ };
+
+ if err.code != 0 {
+ panic!("{}", String::from(err));
+ }
+}
+
+#[repr(C)]
+pub struct Assignment {
+ // Density of queries
+ pub a_aux_density: *const usize,
+ pub a_aux_bit_len: usize,
+ pub a_aux_popcount: usize,
+
+ pub b_inp_density: *const usize,
+ pub b_inp_bit_len: usize,
+ pub b_inp_popcount: usize,
+
+ pub b_aux_density: *const usize,
+ pub b_aux_bit_len: usize,
+ pub b_aux_popcount: usize,
+
+ // Evaluations of A, B, C polynomials
+ pub a: *const Scalar,
+ pub b: *const Scalar,
+ pub c: *const Scalar,
+ pub abc_size: usize,
+
+ // Assignments of variables
+ pub inp_assignment_data: *const Scalar,
+ pub inp_assignment_size: usize,
+
+ pub aux_assignment_data: *const Scalar,
+ pub aux_assignment_size: usize,
+}
+
+extern "C" {
+ fn generate_groth16_proofs_c(
+ provers: *const core::ffi::c_void,
+ num_circuits: usize,
+ r_s: *const core::ffi::c_void,
+ s_s: *const core::ffi::c_void,
+ proofs: *mut core::ffi::c_void,
+ srs: &SRS,
+ ) -> cuda::Error;
+}
+
+pub fn generate_groth16_proofs(
+ provers: &[Assignment],
+ r_s: &[S],
+ s_s: &[S],
+ proofs: &mut [PR],
+ srs: &SRS,
+) {
+ let num_circuits = provers.len();
+
+ assert_eq!(r_s.len(), num_circuits);
+ assert_eq!(s_s.len(), num_circuits);
+ assert_eq!(proofs.len(), num_circuits);
+
+ let err = unsafe {
+ generate_groth16_proofs_c(
+ provers.as_ptr() as *const _,
+ num_circuits,
+ r_s.as_ptr() as *const _,
+ s_s.as_ptr() as *const _,
+ proofs.as_mut_ptr() as *mut _,
+ srs,
+ )
+ };
+
+ if err.code != 0 {
+ panic!("{}", String::from(err));
+ }
+}
diff --git a/extern/supraseal/c2/tests/c2.rs b/extern/supraseal/c2/tests/c2.rs
new file mode 100644
index 000000000..6b8a8e55d
--- /dev/null
+++ b/extern/supraseal/c2/tests/c2.rs
@@ -0,0 +1,4 @@
+#[test]
+fn run_seal() {
+ assert!(false, "c2 test is moved to /demos/c2-test");
+}
diff --git a/extern/supraseal/demos/.cargo/config.toml b/extern/supraseal/demos/.cargo/config.toml
new file mode 100644
index 000000000..978eb1f8b
--- /dev/null
+++ b/extern/supraseal/demos/.cargo/config.toml
@@ -0,0 +1,2 @@
+[patch.crates-io]
+supraseal-c2 = { path = "../c2" }
diff --git a/extern/supraseal/demos/c2-test/Cargo.toml b/extern/supraseal/demos/c2-test/Cargo.toml
new file mode 100644
index 000000000..58c4de8f7
--- /dev/null
+++ b/extern/supraseal/demos/c2-test/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "supraseal-c2-test"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+
+[dev-dependencies]
+supraseal-c2 = "0"
+anyhow = "1.0.26"
+bincode = "1.1.2"
+filecoin-proofs = { version = "16.0.0", default-features = false, features = ["cuda-supraseal"] }
+storage-proofs-core = { version = "16.0.0", default-features = false, features = ["cuda-supraseal"] }
diff --git a/extern/supraseal/demos/c2-test/resources/test/commit-phase1-output b/extern/supraseal/demos/c2-test/resources/test/commit-phase1-output
new file mode 100644
index 000000000..11a4c09fe
Binary files /dev/null and b/extern/supraseal/demos/c2-test/resources/test/commit-phase1-output differ
diff --git a/extern/supraseal/demos/c2-test/src/main.rs b/extern/supraseal/demos/c2-test/src/main.rs
new file mode 100644
index 000000000..e7a11a969
--- /dev/null
+++ b/extern/supraseal/demos/c2-test/src/main.rs
@@ -0,0 +1,3 @@
+fn main() {
+ println!("Hello, world!");
+}
diff --git a/extern/supraseal/demos/c2-test/tests/c2.rs b/extern/supraseal/demos/c2-test/tests/c2.rs
new file mode 100644
index 000000000..70104663e
--- /dev/null
+++ b/extern/supraseal/demos/c2-test/tests/c2.rs
@@ -0,0 +1,83 @@
+// Copyright Supranational LLC
+
+const COMMIT_PHASE1_OUTPUT_FILE: &str = "resources/test/commit-phase1-output";
+
+use anyhow::Context;
+use bincode::deserialize;
+use std::fs::read;
+use std::path::PathBuf;
+use std::time::Instant;
+
+use filecoin_proofs::{
+ constants::SECTOR_SIZE_32_GIB, seal_commit_phase2, verify_seal,
+ PoRepConfig, SealCommitPhase1Output, SectorShape32GiB,
+};
+use storage_proofs_core::{api_version::ApiVersion, sector::SectorId};
+
+#[test]
+fn run_seal() {
+ let commit_phase1_output = {
+ let mut commit_phase1_output_path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
+ commit_phase1_output_path.push(COMMIT_PHASE1_OUTPUT_FILE);
+ println!("*** Restoring commit phase1 output file");
+ let commit_phase1_output_bytes = read(&commit_phase1_output_path)
+ .with_context(|| {
+ format!(
+ "couldn't read file commit_phase1_output_path={:?}",
+ commit_phase1_output_path
+ )
+ })
+ .unwrap();
+ println!(
+ "commit_phase1_output_bytes len {}",
+ commit_phase1_output_bytes.len()
+ );
+
+ let res: SealCommitPhase1Output =
+ deserialize(&commit_phase1_output_bytes).unwrap();
+ res
+ };
+
+ let sector_id = SectorId::from(0);
+ let prover_id: [u8; 32] = [9u8; 32];
+ let arbitrary_porep_id = [99; 32];
+
+ let porep_config =
+ PoRepConfig::new_groth16(SECTOR_SIZE_32_GIB, arbitrary_porep_id, ApiVersion::V1_1_0);
+
+ let SealCommitPhase1Output {
+ vanilla_proofs: _,
+ comm_d,
+ comm_r,
+ replica_id: _,
+ seed,
+ ticket,
+ } = commit_phase1_output;
+
+ println!("Starting seal_commit_phase2");
+ let now = Instant::now();
+ let commit_output =
+ seal_commit_phase2(&porep_config, commit_phase1_output, prover_id, sector_id).unwrap();
+ println!("seal_commit_phase2 took: {:.2?}", now.elapsed());
+
+ println!("Verifying result");
+ let result = verify_seal::(
+ &porep_config,
+ comm_r,
+ comm_d,
+ prover_id,
+ sector_id,
+ ticket,
+ seed,
+ &commit_output.proof,
+ )
+ .unwrap();
+
+ if result == true {
+ println!("Verification PASSED!");
+ } else {
+ println!("Verification FAILED!");
+ }
+
+ assert!(result, "Verification FAILED");
+}
diff --git a/extern/supraseal/demos/main.cpp b/extern/supraseal/demos/main.cpp
new file mode 100644
index 000000000..0c0e5aab7
--- /dev/null
+++ b/extern/supraseal/demos/main.cpp
@@ -0,0 +1,257 @@
+// Copyright Supranational LLC
+
+#include
+#include
+#include // file read
+#include // printing
+#include
+#include // htonl
+#include
+
+#include "../sealing/supra_seal.hpp"
+#include "../util/sector_util.hpp"
+
+uint8_t replica_id_buf_2K[] = { 24, 108, 245, 122, 161, 8, 61, 88, 51, 81, 141, 176, 97, 225, 25, 135, 218, 165, 249, 113, 195, 10, 255, 24, 6, 140, 145, 244, 253, 107, 8, 39 };
+uint8_t replica_id_buf_4K[] = { 2, 239, 249, 237, 200, 74, 74, 118, 230, 239, 207, 194, 109, 161, 27, 24, 208, 63, 44, 254, 14, 250, 200, 138, 74, 35, 123, 115, 123, 86, 98, 2 };
+uint8_t replica_id_buf_16K[] = { 240, 26, 25, 20, 201, 110, 242, 173, 62, 74, 255, 96, 37, 143, 120, 69, 91, 52, 81, 243, 134, 37, 112, 41, 27, 213, 208, 145, 107, 149, 76, 52 };
+uint8_t replica_id_buf_32K[] = { 50, 213, 77, 230, 65, 212, 193, 39, 25, 125, 41, 233, 147, 28, 126, 201, 217, 162, 65, 39, 132, 252, 61, 245, 39, 34, 32, 38, 158, 149, 24, 24 };
+uint8_t replica_id_buf_8M[] = { 23, 124, 26, 248, 237, 136, 178, 226, 193, 239, 173, 27, 131, 214, 147, 242, 18, 110, 7, 252, 4, 245, 118, 152, 94, 125, 73, 140, 25, 102, 152, 57 };
+uint8_t replica_id_buf_16M[] = { 0, 104, 11, 183, 198, 151, 180, 179, 187, 46, 233, 221, 244, 44, 204, 221, 108, 14, 17, 49, 254, 229, 229, 252, 200, 102, 16, 240, 84, 175, 220, 52 };
+uint8_t replica_id_buf_512M[] = { 37, 249, 121, 174, 70, 206, 91, 232, 165, 246, 66, 184, 198, 10, 232, 126, 215, 171, 221, 76, 26, 2, 117, 118, 201, 142, 116, 143, 25, 131, 167, 37 };
+uint8_t replica_id_buf_1G[] = { 36, 67, 76, 192, 211, 223, 90, 159, 60, 141, 212, 178, 36, 120, 21, 93, 28, 92, 79, 231, 31, 100, 115, 240, 114, 152, 20, 78, 80, 158, 122, 34 };
+uint8_t replica_id_buf_32G[] = { 121, 145, 135, 251, 187, 117, 51, 109, 88, 99, 80, 105, 79, 235, 85, 240, 147, 153, 120, 231, 144, 247, 244, 201, 42, 10, 149, 142, 203, 151, 188, 43 };
+uint8_t replica_id_buf_64G[] = { 96, 159, 133, 62, 63, 177, 24, 234, 146, 31, 140, 109, 39, 48, 219, 3, 168, 169, 249, 98, 25, 210, 33, 210, 4, 217, 45, 216, 99, 90, 114, 4 };
+
+// This ultimately comes from the sealing flows
+const char* get_parent_filename(size_t sector_size_lg) {
+ switch (sector_size_lg) {
+ case SectorSizeLg::Sector2KB:
+ // 2KB
+ return "/var/tmp/filecoin-parents/v28-sdr-parent-652bae61e906c0732e9eb95b1217cfa6afcce221ff92a8aedf62fa778fa765bc.cache";
+ case SectorSizeLg::Sector4KB:
+ // 4KB
+ return "/var/tmp/filecoin-parents/v28-sdr-parent-56d4865ec3476221fd1412409b5d9439182d71bf5e2078d0ecde76c0f7e33986.cache";
+ case SectorSizeLg::Sector16KB:
+ // 16KB
+ return "/var/tmp/filecoin-parents/v28-sdr-parent-41059e359f8a8b479f9e29bdf20344fcd43d9c03ce4a7d01daf2c9a77909fd4f.cache";
+ case SectorSizeLg::Sector32KB:
+ // 32KB
+ return "/var/tmp/filecoin-parents/v28-sdr-parent-81a0489b0dd6c7755cdce0917dd436288b6e82e17d596e5a23836e7a602ab9be.cache";
+ case SectorSizeLg::Sector8MB:
+ // 8MB
+ return "/var/tmp/filecoin-parents/v28-sdr-parent-1139cb33af3e3c24eb644da64ee8bc43a8df0f29fc96b5337bee369345884cdc.cache";
+ case SectorSizeLg::Sector16MB:
+ // 16MB
+ return "/var/tmp/filecoin-parents/v28-sdr-parent-7fa3ff8ffb57106211c4be413eb15ea072ebb363fa5a1316fe341ac8d7a03d51.cache";
+ case SectorSizeLg::Sector512MB:
+ // 512MB
+ return "/var/tmp/filecoin-parents/v28-sdr-parent-7ba215a1d2345774ab90b8cb1158d296e409d6068819d7b8c7baf0b25d63dc34.cache";
+ case SectorSizeLg::Sector1GB:
+ // 1GB
+ return "/var/tmp/filecoin-parents/v28-sdr-parent-637f021bceb5248f0d1dcf4dbf132fedc025d0b3b55d3e7ac171c02676a96ccb.cache";
+ case SectorSizeLg::Sector32GB:
+ // 32GB
+ return "/var/tmp/filecoin-parents/v28-sdr-parent-21981246c370f9d76c7a77ab273d94bde0ceb4e938292334960bce05585dc117.cache";
+ case SectorSizeLg::Sector64GB:
+ // 64GB
+ return "/var/tmp/filecoin-parents/v28-sdr-parent-767ee5400732ee77b8762b9d0dd118e88845d28bfa7aee875dc751269f7d0b87.cache";
+ default:
+ printf("ERROR: unknown sector size lg %ld\n", sector_size_lg);
+ return nullptr;
+ }
+}
+
+template
+void demo_pipeline(size_t num_sectors, uint8_t* replica_ids) {
+ size_t slot0 = 0;
+ size_t slot1 = get_slot_size(num_sectors, P::GetSectorSize()) * 1;
+ const char* parent_filename = get_parent_filename(P::GetSectorSizeLg());
+ const char* output_dir0 = "/var/tmp/supra_seal/0";
+ const char* output_dir1 = "/var/tmp/supra_seal/1";
+
+ printf("slot0 %08lx\n", slot0);
+ printf("slot1 %08lx\n", slot1);
+
+ // Fill slot0 pc1
+ printf("Starting slot0 pc1\n");
+ pc1(slot0, num_sectors, replica_ids, parent_filename, P::GetSectorSize());
+
+ // Slot0 PC2 + slot1 pc1
+ std::thread j0([&]() {
+ printf("Starting slot1 pc1\n");
+ pc1(slot1, num_sectors, replica_ids, parent_filename, P::GetSectorSize());
+ });
+ std::thread j1([&]() {
+ printf("Starting slot0 pc2\n");
+ pc2(slot0, num_sectors, output_dir0, nullptr, P::GetSectorSize());
+ });
+ j0.join();
+ j1.join();
+
+ // slot1 pc2
+ printf("Starting slot1 pc2\n");
+ pc2(slot1, num_sectors, output_dir1, nullptr, P::GetSectorSize());
+}
+
+int main(int argc, char** argv) {
+ uint64_t node_to_read = 0;
+ uint64_t slot = 0;
+ size_t num_sectors = 64;
+ std::string sector_size_string = "";
+ const char* output_dir = "/var/tmp/supra_seal/0";
+
+ enum { SEAL_MODE, READ_MODE, PARENTS_MODE, PIPELINE_MODE } mode = PIPELINE_MODE;
+ bool perform_pc1 = false;
+ bool perform_pc2 = false;
+ bool perform_c1 = false;
+
+ int opt;
+ while ((opt = getopt(argc, argv, "123r:s:n:po:b:h")) != -1) {
+ switch (opt) {
+ case '1':
+ mode = SEAL_MODE;
+ perform_pc1 = true;
+ break;
+ case '2':
+ mode = SEAL_MODE;
+ perform_pc2 = true;
+ break;
+ case '3':
+ mode = SEAL_MODE;
+ perform_c1 = true;
+ break;
+ case 'r':
+ mode = READ_MODE;
+ node_to_read = strtol(optarg, NULL, 16);
+ break;
+ case 's':
+ slot = strtol(optarg, NULL, 16);
+ break;
+ case 'o':
+ output_dir = optarg;
+ break;
+ case 'n':
+ num_sectors = strtol(optarg, NULL, 10);
+ break;
+ case 'p':
+ mode = PIPELINE_MODE;
+ break;
+ case 'b':
+ sector_size_string = optarg;
+ break;
+ case 'h':
+ printf("Usage: sudo ./seal [options]\n");
+ printf(" -1 - perform pc1\n");
+ printf(" -2 - perform pc2\n");
+ printf(" -3 - perform c1\n");
+ printf(" -p - perform pc1, pc2, and c1 pipeline (default)\n");
+ printf(" -n - number of parallel sectors (default 64)\n");
+ printf(" -b - sector size e.g 32GiB\n");
+ exit(0);
+ break;
+ }
+ }
+
+ if (sector_size_string == "") {
+ printf("Please specify a sector size\n");
+ exit(0);
+ }
+
+ size_t sector_size = get_sector_size_from_string(sector_size_string);
+ size_t sector_size_lg;
+
+ SECTOR_PARAMS_TABLE(sector_size_lg = params.GetSectorSizeLg());
+
+ supra_seal_init(sector_size, "demos/rust/supra_seal.cfg");
+
+ // // 512mb random data
+ // uint8_t replica_id_buf_512M[] = {
+ // 89, 186, 126, 238, 239, 37, 73, 20,
+ // 148, 180, 147, 227, 154, 153, 224, 173,
+ // 101, 206, 212, 202, 229, 49, 100, 20,
+ // 19, 156, 251, 17, 68, 212, 238, 32
+ // };
+
+ uint8_t* replica_id_buf;
+ switch (sector_size_lg) {
+ case (size_t)SectorSizeLg::Sector2KB:
+ replica_id_buf = replica_id_buf_2K;
+ break;
+ case (size_t)SectorSizeLg::Sector16KB:
+ replica_id_buf = replica_id_buf_16K;
+ break;
+ case (size_t)SectorSizeLg::Sector8MB:
+ replica_id_buf = replica_id_buf_8M;
+ break;
+ case (size_t)SectorSizeLg::Sector512MB:
+ replica_id_buf = replica_id_buf_512M;
+ break;
+ case (size_t)SectorSizeLg::Sector32GB:
+ replica_id_buf = replica_id_buf_32G;
+ break;
+ case (size_t)SectorSizeLg::Sector64GB:
+ replica_id_buf = replica_id_buf_64G;
+ break;
+ default:
+ replica_id_buf = replica_id_buf_2K;
+ break;
+ }
+ uint8_t* replica_ids = new uint8_t[num_sectors * sizeof(replica_id_buf_2K)];
+ assert (replica_ids != nullptr);
+ for (size_t i = 0; i < num_sectors; i++) {
+ memcpy(&replica_ids[sizeof(replica_id_buf_2K) * i],
+ replica_id_buf, sizeof(replica_id_buf_2K));
+ }
+
+ if (mode == PIPELINE_MODE) {
+ SECTOR_PARAMS_TABLE(demo_pipeline