diff --git a/.gitmodules b/.gitmodules index 5daf41c03..773dea971 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,7 +1,3 @@ [submodule "extern/filecoin-ffi"] path = extern/filecoin-ffi url = https://github.com/filecoin-project/filecoin-ffi.git -[submodule "extern/supra_seal"] - path = extern/supra_seal - url = https://github.com/magik6k/supra_seal.git - branch = feat/multi-out-paths diff --git a/Makefile b/Makefile index 4b38df89d..62e46008b 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ BUILD_DEPS+=ffi-version-check ## BLST (from supraseal, but needed in curio) -BLST_PATH:=extern/supra_seal/ +BLST_PATH:=extern/supraseal/ BLST_DEPS:=.install-blst BLST_DEPS:=$(addprefix $(BLST_PATH),$(BLST_DEPS)) @@ -36,14 +36,13 @@ build/.blst-install: $(BLST_PATH) bash scripts/build-blst.sh @touch $@ -MODULES+=$(BLST_PATH) BUILD_DEPS+=build/.blst-install CLEAN+=build/.blst-install ## SUPRA-FFI ifeq ($(shell uname),Linux) -SUPRA_FFI_PATH:=extern/supra_seal/ +SUPRA_FFI_PATH:=extern/supraseal/ SUPRA_FFI_DEPS:=.install-supraseal SUPRA_FFI_DEPS:=$(addprefix $(SUPRA_FFI_PATH),$(SUPRA_FFI_DEPS)) @@ -53,7 +52,6 @@ build/.supraseal-install: $(SUPRA_FFI_PATH) cd $(SUPRA_FFI_PATH) && ./build.sh @touch $@ -# MODULES+=$(SUPRA_FFI_PATH) -- already included in BLST_PATH CLEAN+=build/.supraseal-install endif @@ -103,7 +101,7 @@ ifeq ($(shell uname),Linux) batchdep: build/.supraseal-install batchdep: $(BUILD_DEPS) -,PHONY: batchdep +.PHONY: batchdep batch: CURIO_TAGS+= supraseal batch: CGO_LDFLAGS_ALLOW='.*' diff --git a/documentation/en/supraseal.md b/documentation/en/supraseal.md index 6e05fb880..38a569739 100644 --- a/documentation/en/supraseal.md +++ b/documentation/en/supraseal.md @@ -64,9 +64,9 @@ Please consider contributing to the [SupraSeal hardware examples](https://github Please make sure to benchmark the raw NVME IOPS before proceeding with further configuration to verify that IOPS requirements are fulfilled. ```bash -cd extern/supra_seal/deps/spdk-v22.09/ +cd extern/supraseal/deps/spdk-v22.09/ -# repeat -b with all devices you plan to use with supra_seal +# repeat -b with all devices you plan to use with supraseal # NOTE: You want to test with ALL devices so that you can see if there are any bottlenecks in the system ./build/examples/perf -b 0000:85:00.0 -b 0000:86:00.0... -q 64 -o 4096 -w randread -t 10 ``` @@ -297,7 +297,7 @@ This is only needed while batch sealing is in beta, future versions of Curio wil {% endhint %} ```bash -cd extern/supra_seal/deps/spdk-v22.09/ +cd extern/supraseal/deps/spdk-v22.09/ env NRHUGE=36 ./scripts/setup.sh ``` @@ -399,7 +399,7 @@ To troubleshoot: If the [NVME Benchmark](supraseal.md#benchmark-nvme-iops) shows lower than expected IOPS, you can try formatting the NVMe devices with SPDK: ```bash -cd extern/supra_seal/deps/spdk-v22.09/ +cd extern/supraseal/deps/spdk-v22.09/ ./build/examples/nvme_manage ``` diff --git a/documentation/zh/supraseal.md b/documentation/zh/supraseal.md index ffe6a68f1..ef8326240 100644 --- a/documentation/zh/supraseal.md +++ b/documentation/zh/supraseal.md @@ -70,9 +70,9 @@ SupraSeal 是一个针对 Filecoin 优化的批量封装实现,允许并行封 在进行进一步配置之前,请确保对原始 NVME IOPS 进行基准测试,以验证是否满足 IOPS 要求。 ```bash -cd extern/supra_seal/deps/spdk-v22.09/ +cd extern/supraseal/deps/spdk-v22.09/ -# repeat -b with all devices you plan to use with supra_seal +# repeat -b with all devices you plan to use with supraseal # 注意:您需要测试所有设备,以便查看系统中是否存在任何瓶颈 ./build/examples/perf -b 0000:85:00.0 -b 0000:86:00.0... -q 64 -o 4096 -w randread -t 10 @@ -312,7 +312,7 @@ Hugepagesize: 1048576 kB {% endhint %} ```bash -cd extern/supra_seal/deps/spdk-v22.09/ +cd extern/supraseal/deps/spdk-v22.09/ env NRHUGE=36 ./scripts/setup.sh ``` diff --git a/extern/supra_seal b/extern/supra_seal deleted file mode 160000 index 4b5641401..000000000 --- a/extern/supra_seal +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 4b5641401318d37906e56e2dbc61b3ec9d0a9257 diff --git a/extern/supraseal/.gitignore b/extern/supraseal/.gitignore new file mode 100644 index 000000000..b5112de22 --- /dev/null +++ b/extern/supraseal/.gitignore @@ -0,0 +1,10 @@ +/bin +/c2/bellperson +/deps +/obj +**/target +*~ +*.log +*.swp +*.lock +.install-supraseal diff --git a/extern/supraseal/LICENSE b/extern/supraseal/LICENSE new file mode 100644 index 000000000..261eeb9e9 --- /dev/null +++ b/extern/supraseal/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/extern/supraseal/README.md b/extern/supraseal/README.md new file mode 100644 index 000000000..81c92ee5d --- /dev/null +++ b/extern/supraseal/README.md @@ -0,0 +1,313 @@ +# SupraSeal + +SupraSeal is a highly optimized collection of Filecoin sealing primitives intended to be used by storage providers who require high throughput. The PC1, PC2, C1, and C2 subsections provide more details on usage. Note this is not a standalone library, the primitives are intended to be used in a storage providers application of choice. + +# Architecture and Design Considerations + +## Sealing Operations + +For a single sector, the sealing operation is comprised of the following operations: Add Piece, Pre-commit 1, Pre-commit 2, Wait Seed, Commit 1, and Commit 2. In order to maximize application flexibility, SupraSeal is designed to handle a subset of these operations independently. +- **Add Piece**: the process of concatenating pieces of data in order to fill a sector (e.g. 32GB). SupraSeal does not address this operation given the trivial compute requirements and large number of possible ways to ingest data. +- **Pre-commit 1 (PC1)**: the generation of the stacked depth robust graph. SupraSeal parallelizes this operation across a power of 2 number of sectors, up to 128, and is designed to maximize throughput. +- **Pre-commit 2 (PC2)**: the creation of two Merkle trees comprised of the graph columns and replica. SupraSeal supports the generation of tree c across the parallel sectors created by PC1. For Committed Capacity (CC) sectors, SupraSeal supports the parallel generation of tree r. Alternatively for non-CC sectors (customer data), SupraSeal offers GPU and CPU based single sector replica encoding and tree r generation. +- **Wait Seed**: the 150 epoch (~75 minute) gap between submission of pre-commit2 and the availability of randomness (seed) from the chain. SupraSeal does not interact with the chain, therefore relies on the application for obtaining the seed. +- **Commit 1 (C1)**: the generation of node challenges using the seed. SupraSeal will build the inclusion proofs from the parallel layers generated in PC1 and the Merkle trees for PC2. The C1 api operates on a single sector at a time, as opposed to PC1 and PC2 which operate on all sectors at once. The reason is each sector will have a different set of challenges, thus making the parallelization less effective. If working with non-CC (customer data) sectors, then tree D and tree R must be provided to SupraSeal. Both trees can be generated using deal data through standalone SupraSeal utilities. +- **Commit 2 (C2)**: the zkSNARK proof generation using the inclusion proofs from C1. SupraSeal provides the post constraint evaluation portion of the Groth16 proof. There is no complete C2 api within SupraSeal, the expectation is the heavy Groth16 compute function is integrated directly into the existing Filecoin C2 apis. + +For a more detailed discussion of PC1 see [pc1/README.md](pc1/README.md). + +## Intended Usage + +There are two primary methods for storage providers to participate in the Filecoin network, either by providing CC sectors or servicing customer data. How one uses SupraSeal depends on these methods. Here is a sample flow to consider when building an application on top of SupraSeal. + +```mermaid +flowchart TD; + Start[Start] --> SisCC{Is CC?}; + SisCC --> |Yes| C; + SisCC --> |No| AP[Add Piece]; + AP --> PD{Piece Full?}; + PD --> |No| Start; + PD --> |Yes| B[Build Tree D]; + B --> C[Calculate Replica ID]; + C --> BR{Batch Ready?}; + BR --> |Yes| BG[Build Graph]; + BR --> |No| Start; + subgraph SupraSeal PC1; + BG; + end; + subgraph SupraSeal PC2 TreeC; + BG --> |Parallel Labels| E[Calculate Column Digests]; + E --> F[Build Tree C]; + end; + BG --> pc2cc{Is CC?}; + pc2cc --> |Yes
Parallel Layers| G[Build Tree R]; + pc2cc --> |No
Single Layer| sRep[Calculate Replica]; + subgraph SupraSeal PC2 Parallel TreeR; + G; + end; + subgraph SupraSeal PC2 Comm R; + G --> |Comm R Last| H[Calculate Comm R]; + F --> |Comm C| H; + end; + subgraph SupraSeal PC2 Single TreeR; + sRep --> sTR[Build Tree R Single]; + end; + sTR --> |Comm R Last| H; + H --> WS[Wait Seed]; + WS --> c1Cc{Is CC?}; + c1Cc --> |No| rTD[Build Tree D & R Proofs]; + c1Cc --> |Yes| TD[Build Tree D & R Proofs]; + WS --> CP; + WS --> LP; + subgraph SupraSeal C1 Local ; + TD[Build Tree D & R Proofs]; + CP[Build Column Proofs]; + LP[Build Label Proofs]; + TD --> c1out[C1 Output]; + CP --> c1out; + LP --> c1out; + end; + subgraph SupraSeal C1 Remote ; + rTD[Build Tree D & R Proofs]; + end; + rTD --> c1out; + c1out --> r1cs[R1CS Generation]; + subgraph SupraSeal C2; + r1cs --> GP[Calculate Groth16 Proof]; + end; +``` + +## Object Sizes + +The table below outlines the object sizes to expect for a 32 GB sector. The total data for a parallel operation would be these values * the number of sectors. Keep this in mind when provisioning hardware. This will impact NVMe and hard drive capacity, memory, and network bandwidth. + +| Object | Size | Notes | +|--------------|:-----:|-----------| +| Piece File | 32 GB | Data to be sealed | +| Tree D | 64 GB | Piece File is leaves, binary tree on top | +| Replica ID | 32 B | Derived unique identifier for each sector | +| Stacked DRG | 352 GB | PC1 Graph is layers (11) * num_nodes (1B) * 32 B | +| Tree C | 36.6 GB | 8 - arity tree with num_nodes (1B) * 32 B leaves | +| Comm C | 32 B | Tree C Commitment (root of tree C) | +| Tree R | 73.1 MB | Same as Tree C expect leaves are replica and discard 2 rows | +| Tree R Root | 32 B | Root of Tree R | +| Comm R | 32 B | Poseidon hash of Tree C Root || Tree R Root | +| DRG last layer (Key) | 32 GB | Last layer of DRG is the key to encode replica | +| Replica | 32 GB | Output of sealing process, Piece File + Key | +| Tree D Inclusion Paths | 265 KB | Merkle tree D inclusion paths for C1 challenges | +| Tree R Inclusion Paths | 435 KB | Merkle tree R inclusion paths for C1 challenges | +| C1 Output | 10.5 MB | Result of C1 to provide C2 for proving | + +## API + +The CC sector API is very straightforward, we have demo code for both C and Rust access. + +``` +// Optional init function. If used this must be the first library +// function called. +// \config_file Topology config file. Defaults to supra_config.cfg +void supra_seal_init(const char* config_file); + +// Returns the highest node offset address in the NVMe array. This is useful +// for calculating if a new PC1 batch of parallel sectors will fit. +size_t get_max_block_offset(); + +// Returns the size of a PC1 batch for the specified number of parallel sectors. +// Used to calculate the next block offset as well as to determine if +// enough NVMe space is available. +size_t get_slot_size(size_t num_sectors); + +// Perform PC1 operation on a number of sectors in parallel +// +// \block_offset Index within NVMe to store graph +// \num_sectors Number of sectors to operate on in parallel +// \replica_ids Flattened array of ReplicaIds for all sectors +// \parents_filename Filesystem location of parents graph cache file +int pc1(size_t block_offset, + size_t num_sectors, + const uint8_t* replica_ids, + const char* parents_filename); + +// Perform PC2 operation on a number of sectors in parallel +// +// \block_offset Index within NVMe to retrieve graph +// \num_sectors Number of sectors to operate on in parallel +// \output_dir Filesystem location to store results (trees, p_aux, etc) +int pc2(size_t block_offset, + size_t num_sectors, + const char* output_dir); + +// Perform C1 operation on a single sector +// +// \block_offset Index within NVMe to retrieve graph +// \num_sectors Number of sectors that are in graph, used to index +// \sector_slot Index in sectors for this sector +// \replica_id Sector ReplicaId +// \seed Sector wait seed result to use for challenge generation +// \ticket Original ticket used in ReplicaId generation +// \cache_path Filesystem location of pc2 results and to store output +// \parents_filename Filesystem location of parents graph cache file +int c1(size_t block_offset, + size_t num_sectors, + size_t sector_slot, + const uint8_t* replica_id, + const uint8_t* seed, + const uint8_t* ticket, + const char* cache_path, + const char* parents_filename); +``` + +# Reference Platform + +Our reference configuration consists of: +- Threadripper PRO 5995WX +- ASUS WRX80E SAGE Motherboard +- 512GB Memory +- 16 Samsung 7.68TB U.2 Drives +- Corsair Dual SSD Mounting Bracket +- 4 Supermicro AOC-SLG4-4E4T NVMe HBA +- Nvidia RTX 4090 (Suprim Liquid X) +- EVGA SuperNOVA 2000 G+ +- Lian Li V3000 Plus Case +- Ubuntu 22.04 +- SPDK v22.09 + +# Prerequisites + +### Install dependencies +``` +sudo apt install build-essential libconfig++-dev libgmp-dev +``` + +Install Rust if necessary +``` +curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh +source $HOME/.cargo/env +rustup toolchain install nightly +rustup default nightly +``` + +### Enable Huge Pages (1GB): +``` +sudo vi /etc/default/grub +GRUB_CMDLINE_LINUX_DEFAULT="default_hugepagesz=1G hugepagesz=1G hugepages=36" +GRUB_CMDLINE_LINUX="default_hugepagesz=1G hugepagesz=1G hugepages=36" +sudo update-grub +sudo reboot +``` + +You can confirm huge pages are enabled with: +``` +grep Huge /proc/meminfo + +# Look for: +HugePages_Total: 36 +HugePages_Free: 36 +``` + +Additionally you may need to enable huge pages after boot using: +``` +sudo sysctl -w vm.nr_hugepages=36 +``` + +Due to the random reads, if the page table was built with 4KB pages then there would be a significant number of costly faults. Moving to 1GB pages alleviates this problem. + +### Install CUDA + +If CUDA is not already installed, the latest toolkit is available [here](https://developer.nvidia.com/cuda-downloads) + +The minimum version required is 11.x + +### Build this repository + +During the build process it will clone and build SPDK, sppark, and blst. +``` +./build.sh +``` + +SPDK must be setup after every reboot: +``` +cd deps/spdk-v22.09 +sudo env NRHUGE=36 ./scripts/setup.sh +``` + +During the setup process SPDK will take control of any NVMe drives that do not have a filesystem. + +# Configuration + +The software is configured using the file `demos/rust/supra_seal.cfg`. This file contains the core topology used (assigning threads to cores) as well as the NVMe configuration. There is also a configuration `demos/rust/supra_seal_zen2.cfg` that assigns one hashing thread (2 sectors) per physical core rather than the default of 2 hashing threads per physical core intended for systems older than Zen3. The configuration file can be changed in `demos/main.cpp` and `demos/rust/main.rs`. + +### NVMe + +The NVMe configuration must be adapted to the local system. SPDK can be used to identify attached NVMe devices and their addresses with the following command: +``` +sudo ./scripts/setup.sh status +``` + +For more extensive information about attached devices: +``` +sudo ./build/examples/identify +``` + +This will show the NVMe disks (controllers) along with their addresses, which will resemble `0000:2c:00.0`. The address list in `supra_seal.cfg` should be updated for the local drives. + +### NVMe Performance Testing + +This software requires NVMe drives that support a high amount of random read IOPS. SPDK has a performance tester in the example directory that can be used to measure the random read IOPS of your devices. In order to minimize PC1 latency the software targets approximately 10-15M IOPS at the system level, and with drives like the [Samsung PM9A3](https://semiconductor.samsung.com/ssd/datacenter-ssd/pm9a3/) we generally see around 1M IOPS per drive. +``` +build/examples/perf -b -q 64 -o 4096 -w randread -t 10 +``` + +### Local filesystem + +The PC2 and C1 processes write files into the local filesystem (`/var/tmp/supra_seal`). For best performance this should be a dedicated disk, ideally a separate disk from where the parent cache is stored so that writing during PC2 does not impact read performance during PC1. The simplest way is to symlink `/var/tmp/supra_seal` to the desired location, but those paths can also be adjusted in `demos/main.cpp` and `demos/rust/main.rs`. + +Empirically we have found it's preferable to consolidate the disks with filesystems within the same PCIe controller, thereby minimizing mixing with SPDK owned disks. Presumably the differing nature of the IO traffic (read heavy for PC1 vs. write heavy for PC2) results in lower performance when mixed. + +We also use F2FS for the PC2 storage disks, as this is designed specifically for flash based storage. If using ext4 disabling journaling is recommended. + +Finally we recommend mounting the PC2 drives with the `lazytime` option to avoid frequent meta data updates (`mount -o lazytime`). + +# Running + +There are both Rust and c++ based demos that will perform PC1, PC2, and C1 on multiple pipelines. They both demonstrate concurrent PC1/PC2/C1 processes along the lines of the flowchart below. The main constraints exist around PC1 and PC2, which respectively utilize the CPU core and GPU(s) heavily so care must be taken to stage them for best performance. For example two PC1's or two PC2's should not typically be run concurrently. + +```mermaid +--- +displayMode: compact +--- +gantt + title Sealing Pipeline + dateFormat HH-mm + axisFormat %Hh%M + section Hashing Cores + PipeA Slot0 PC1:a1, 00-00, 3.5h + PipeB Slot1 PC1:b1, after a1 , 3.5h + PipeC Slot0 PC1:c1, after b1 , 3.5h + PipeD Slot1 PC1:d1, after c1 , 3.5h + section GPU + PipeA Slot0 PC2 :a2, after a1, 1.5h + PipeB Slot1 PC2 :b2, after b1, 1.5h + PipeA Slot0 C2 :a4, after b2, 2.0h + PipeC Slot0 PC2 :c2, after c1, 1.5h + PipeB Slot1 C2 :b4, after c2, 2.0h + section Filesystem + PipeA Slot0 Write: a5, after a2, 2.0h + PipeA Slot0 Wait :a3, after a2, 1.25h + PipeA Slot0 Clear: a6, after a3, 0.75h + PipeB Slot1 Write: b5, after b2, 2.0h + PipeB Slot1 Wait :b3, after b2, 1.25h + PipeB Slot1 Clear: b6, after b3, 0.75h + PipeC Slot0 Write: c5, after c2, 2.0h + PipeC Slot0 Wait :c3, after c2, 1.25h + PipeC Slot0 Clear: c6, after c3, 0.75h +``` + +``` +# Rust +./exec.sh -b 32GiB + +# C++ +./build.sh # Also called automatically by exec.sh +sudo ./bin/seal +``` diff --git a/extern/supraseal/build.sh b/extern/supraseal/build.sh new file mode 100755 index 000000000..4d789f571 --- /dev/null +++ b/extern/supraseal/build.sh @@ -0,0 +1,258 @@ +#!/bin/bash + +# Copyright Supranational LLC + +set -e +set -x + +SECTOR_SIZE="" # Compile for all sector sizes +while getopts r flag +do + case "${flag}" in + r) SECTOR_SIZE="-DRUNTIME_SECTOR_SIZE";; + esac +done + +# Function to check GCC version +check_gcc_version() { + local gcc_version=$(gcc -dumpversion | cut -d. -f1) + if [ "$gcc_version" != "11" ]; then + if command -v gcc-11 &> /dev/null && command -v g++-11 &> /dev/null; then + echo "GCC version is not 11. Setting CC, CXX, and NVCC_PREPEND_FLAGS to use GCC 11." + export CC=gcc-11 + export CXX=g++-11 + export NVCC_PREPEND_FLAGS='-ccbin /usr/bin/g++-11' + else + echo "Error: GCC 11 is required but not found. Please install GCC 11 and try again." + echo "You can typically install it using your package manager. For example:" + echo " On Ubuntu: sudo apt-get install gcc-11 g++-11" + echo " On Fedora: sudo dnf install gcc-11 gcc-c++-11" + echo " On Arch: Install gcc11 from AUR" + exit 1 + fi + fi +} + +# Call the function to check GCC version +check_gcc_version + +set -x + +# Rest of your script remains unchanged +SECTOR_SIZE="" # Compile for all sector sizes +while getopts r flag +do + case "${flag}" in + r) SECTOR_SIZE="-DRUNTIME_SECTOR_SIZE";; + esac +done + +CC=${CC:-cc} +CXX=${CXX:-c++} +NVCC=${NVCC:-nvcc} + +CUDA=$(dirname $(dirname $(which $NVCC))) +SPDK="deps/spdk-v22.09" +CUDA_ARCH="-arch=sm_80 -gencode arch=compute_70,code=sm_70 -t0" +CXXSTD=`$CXX -dM -E -x c++ /dev/null | \ + awk '{ if($2=="__cplusplus" && $3<"2017") print "-std=c++17"; }'` + +INCLUDE="-I$SPDK/include -I$SPDK/isa-l/.. -I$SPDK/dpdk/build/include" +CFLAGS="$SECTOR_SIZE $INCLUDE -g -O2" +CXXFLAGS="$CFLAGS -march=native $CXXSTD \ + -fPIC -fno-omit-frame-pointer -fno-strict-aliasing \ + -fstack-protector -fno-common \ + -D_GNU_SOURCE -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 \ + -DSPDK_GIT_COMMIT=4be6d3043 -pthread \ + -Wall -Wextra -Wno-unused-variable -Wno-unused-parameter -Wno-missing-field-initializers \ + -Wformat -Wformat-security" + +LDFLAGS="-fno-omit-frame-pointer -Wl,-z,relro,-z,now -Wl,-z,noexecstack -fuse-ld=bfd\ + -L$SPDK/build/lib \ + -Wl,--whole-archive -Wl,--no-as-needed \ + -lspdk_bdev_malloc \ + -lspdk_bdev_null \ + -lspdk_bdev_nvme \ + -lspdk_bdev_passthru \ + -lspdk_bdev_lvol \ + -lspdk_bdev_raid \ + -lspdk_bdev_error \ + -lspdk_bdev_gpt \ + -lspdk_bdev_split \ + -lspdk_bdev_delay \ + -lspdk_bdev_zone_block \ + -lspdk_blobfs_bdev \ + -lspdk_blobfs \ + -lspdk_blob_bdev \ + -lspdk_lvol \ + -lspdk_blob \ + -lspdk_nvme \ + -lspdk_bdev_ftl \ + -lspdk_ftl \ + -lspdk_bdev_aio \ + -lspdk_bdev_virtio \ + -lspdk_virtio \ + -lspdk_vfio_user \ + -lspdk_accel_ioat \ + -lspdk_ioat \ + -lspdk_scheduler_dynamic \ + -lspdk_env_dpdk \ + -lspdk_scheduler_dpdk_governor \ + -lspdk_scheduler_gscheduler \ + -lspdk_sock_posix \ + -lspdk_event \ + -lspdk_event_bdev \ + -lspdk_bdev \ + -lspdk_notify \ + -lspdk_dma \ + -lspdk_event_accel \ + -lspdk_accel \ + -lspdk_event_vmd \ + -lspdk_vmd \ + -lspdk_event_sock \ + -lspdk_init \ + -lspdk_thread \ + -lspdk_trace \ + -lspdk_sock \ + -lspdk_rpc \ + -lspdk_jsonrpc \ + -lspdk_json \ + -lspdk_util \ + -lspdk_log \ + -Wl,--no-whole-archive $SPDK/build/lib/libspdk_env_dpdk.a \ + -Wl,--whole-archive $SPDK/dpdk/build/lib/librte_bus_pci.a \ + $SPDK/dpdk/build/lib/librte_cryptodev.a \ + $SPDK/dpdk/build/lib/librte_dmadev.a \ + $SPDK/dpdk/build/lib/librte_eal.a \ + $SPDK/dpdk/build/lib/librte_ethdev.a \ + $SPDK/dpdk/build/lib/librte_hash.a \ + $SPDK/dpdk/build/lib/librte_kvargs.a \ + $SPDK/dpdk/build/lib/librte_mbuf.a \ + $SPDK/dpdk/build/lib/librte_mempool.a \ + $SPDK/dpdk/build/lib/librte_mempool_ring.a \ + $SPDK/dpdk/build/lib/librte_net.a \ + $SPDK/dpdk/build/lib/librte_pci.a \ + $SPDK/dpdk/build/lib/librte_power.a \ + $SPDK/dpdk/build/lib/librte_rcu.a \ + $SPDK/dpdk/build/lib/librte_ring.a \ + $SPDK/dpdk/build/lib/librte_telemetry.a \ + $SPDK/dpdk/build/lib/librte_vhost.a \ + -Wl,--no-whole-archive \ + -lnuma -ldl \ + -L$SPDK/isa-l/.libs -lisal \ + -pthread -lrt -luuid -lssl -lcrypto -lm -laio" + +# Check for the default result directory +# if [ ! -d "/var/tmp/supraseal" ]; then +# mkdir -p /var/tmp/supraseal +# fi + +rm -fr obj +mkdir -p obj + +rm -fr bin +mkdir -p bin + +mkdir -p deps +if [ ! -d $SPDK ]; then + git clone --branch v22.09 https://github.com/spdk/spdk --recursive $SPDK + (cd $SPDK + sudo scripts/pkgdep.sh + ./configure --with-virtio --with-vhost + make -j 10) +fi +if [ ! -d "deps/sppark" ]; then + git clone --branch v0.1.10 https://github.com/supranational/sppark.git deps/sppark +fi +if [ ! -d "deps/blst" ]; then + git clone https://github.com/supranational/blst.git deps/blst + (cd deps/blst + git checkout bef14ca512ea575aff6f661fdad794263938795d + ./build.sh -march=native) +fi + +$CC -c sha/sha_ext_mbx2.S -o obj/sha_ext_mbx2.o + +# Generate .h files for the Poseidon constants +xxd -i poseidon/constants/constants_2 > obj/constants_2.h +xxd -i poseidon/constants/constants_4 > obj/constants_4.h +xxd -i poseidon/constants/constants_8 > obj/constants_8.h +xxd -i poseidon/constants/constants_11 > obj/constants_11.h +xxd -i poseidon/constants/constants_16 > obj/constants_16.h +xxd -i poseidon/constants/constants_24 > obj/constants_24.h +xxd -i poseidon/constants/constants_36 > obj/constants_36.h + +# PC1 +$CXX $CXXFLAGS -Ideps/sppark/util -o obj/pc1.o -c pc1/pc1.cpp & + +# PC2 +$CXX $CXXFLAGS -o obj/streaming_node_reader_nvme.o -c nvme/streaming_node_reader_nvme.cpp & +$CXX $CXXFLAGS -o obj/ring_t.o -c nvme/ring_t.cpp & +$NVCC $CFLAGS $CUDA_ARCH -std=c++17 -DNO_SPDK -Xcompiler -march=native \ + -Xcompiler -Wall,-Wextra,-Wno-subobject-linkage,-Wno-unused-parameter \ + -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -c pc2/cuda/pc2.cu -o obj/pc2.o & + +$CXX $CXXFLAGS $INCLUDE -Iposeidon -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src \ + -c sealing/supra_seal.cpp -o obj/supra_seal.o -Wno-subobject-linkage & + +wait + +# Sppark object dedupe +nm obj/pc2.o | grep -E 'select_gpu|all_gpus|cuda_available|gpu_props|ngpus' | awk '{print $3 " supra_" $3}' > symbol_rename.txt + +for obj in obj/pc1.o obj/pc2.o obj/ring_t.o obj/streaming_node_reader_nvme.o obj/supra_seal.o obj/sha_ext_mbx2.o; do + objcopy --redefine-syms=symbol_rename.txt $obj +done + +rm symbol_rename.txt + +ar rvs obj/libsupraseal.a \ + obj/pc1.o \ + obj/pc2.o \ + obj/ring_t.o \ + obj/streaming_node_reader_nvme.o \ + obj/supra_seal.o \ + obj/sha_ext_mbx2.o + +$CXX $CXXFLAGS -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src \ + -o bin/seal demos/main.cpp \ + -Lobj -lsupraseal \ + $LDFLAGS -Ldeps/blst -lblst -L$CUDA/lib64 -lcudart_static -lgmp -lconfig++ & + +# tree-r CPU only +$CXX $SECTOR_SIZE $CXXSTD -pthread -g -O3 -march=native \ + -Wall -Wextra -Werror -Wno-subobject-linkage \ + tools/tree_r.cpp poseidon/poseidon.cpp \ + -o bin/tree_r_cpu -Iposeidon -Ideps/sppark -Ideps/blst/src -L deps/blst -lblst & + +# tree-r CPU + GPU +$NVCC $SECTOR_SIZE -DNO_SPDK -DSTREAMING_NODE_READER_FILES \ + $CUDA_ARCH -std=c++17 -g -O3 -Xcompiler -march=native \ + -Xcompiler -Wall,-Wextra,-Werror \ + -Xcompiler -Wno-subobject-linkage,-Wno-unused-parameter \ + -x cu tools/tree_r.cpp -o bin/tree_r \ + -Iposeidon -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -L deps/blst -lblst -lconfig++ & + +# tree-d CPU only +$CXX -DRUNTIME_SECTOR_SIZE $CXXSTD -g -O3 -march=native \ + -Wall -Wextra -Werror -Wno-subobject-linkage \ + tools/tree_d.cpp \ + -o bin/tree_d_cpu -Ipc1 -L deps/blst -lblst & + +# Standalone GPU pc2 +$NVCC $SECTOR_SIZE -DNO_SPDK -DSTREAMING_NODE_READER_FILES \ + $CUDA_ARCH -std=c++17 -g -O3 -Xcompiler -march=native \ + -Xcompiler -Wall,-Wextra,-Werror \ + -Xcompiler -Wno-subobject-linkage,-Wno-unused-parameter \ + -x cu tools/tree_r.cpp -o bin/tree_r \ + -Iposeidon -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -L deps/blst -lblst -lconfig++ & + +# Standalone GPU pc2 +$NVCC $SECTOR_SIZE -DNO_SPDK -DSTREAMING_NODE_READER_FILES \ + $CUDA_ARCH -std=c++17 -g -O3 -Xcompiler -march=native \ + -Xcompiler -Wall,-Wextra,-Werror \ + -Xcompiler -Wno-subobject-linkage,-Wno-unused-parameter \ + -x cu tools/pc2.cu -o bin/pc2 \ + -Iposeidon -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -L deps/blst -lblst -lconfig++ & + +wait diff --git a/extern/supraseal/c1/README.md b/extern/supraseal/c1/README.md new file mode 100644 index 000000000..2f1580348 --- /dev/null +++ b/extern/supraseal/c1/README.md @@ -0,0 +1,38 @@ +# Commit 1 + +Once the security wait time has passed since the pre-commitment, randomness is collected from the chain to create a series of challenges. The Commit 1 (C1) phase is the process of deriving those node challenges and generating associated inclusion proofs. + +## Intended Usage + +The SupraSeal C1 functions operate on a single sector at a time given they are not performance critical and the randomness of the challenges do not aid in amortizing NVME reads across parallel sectors. There are two factors which determine how the SupraSeal library is used to generate the C1 proofs. The first is whether or not this is a CC sector and the second is whether or not the tree proofs will be generated locally. For CC and local non-CC sectors a single function call will cover the entire operation. The only difference there would be providing a path to the Tree D and Replica files as opposed to using the known structure of CC Tree D and pulling Replica from the interleaved last layer on NVME. For scenarios where the data is not present locally, the remote host which holds the data and replica would be responsible for building the tree proofs and returning them. The tree proofs would be combined with the locally generated column and label proofs to produce the C1 output. There are SupraSeal functions in place to support computation of the various models with orchestration left to the application. + +```mermaid +flowchart TD; + sc1[Start C1 Sector i] --> isCC{is CC or
all local?}; + sc1 --> CP; + sc1 --> LP; + isCC --> |False| rTD; + isCC --> |True| TD; + subgraph SupraSeal C1 Local; + TD[Build Tree D & R Proofs]; + CP[Build Column Proofs]; + LP[Build Label Proofs]; + TD --> TRloc[Tree R Proofs]; + TRloc --> chD[Choose Tree Proofs]; + TD --> isTCC{is CC?}; + isTCC --> |False| tdd[Tree D Data Proofs]; + isTCC --> |True| tdc[Tree D CC Proofs]; + tdd --> chD; + tdc --> chD; + end; + subgraph SupraSeal C1 Remote; + rTD[Build Tree D & R Proofs]; + rTD --> rtdd[Tree D Data Proofs]; + rTD --> rtr[Tree R Proofs]; + end; + rtdd --> chD; + rtr --> chD; + CP --> c1out[C1 Output]; + LP --> c1out; + chD --> c1out; +``` diff --git a/extern/supraseal/c1/c1.hpp b/extern/supraseal/c1/c1.hpp new file mode 100644 index 000000000..366116bc9 --- /dev/null +++ b/extern/supraseal/c1/c1.hpp @@ -0,0 +1,546 @@ +// Copyright Supranational LLC + +// Filecoin Sealing Commit 1 (C1) operation + +#include // uint* +#include // memcpy +#include // file read +#include // file open +#include // file close +#include // mapping +#include // file stats +#include // printing +#include // printing +#include // assertions +#include // log2 +#include // gmp for challenge modulo operation +#include // + +#include +#include "tree_d_cc_nodes.h" +#include "../poseidon/poseidon.hpp" +#include "../sha/sha_functions.hpp" +#include "../util/mmap_t.hpp" + +#include "path_element.hpp" +#include "tree_proof.hpp" +#include "column_proof.hpp" +#include "label_proof.hpp" +#include "challenge.hpp" + +template +class C1 { + public: + C1(streaming_node_reader_t& reader, size_t sector_slot); + ~C1(); + + void SetReplicaID(const node_t* replica_id) { replica_id_ = replica_id; } + void SetTicket(const node_t* ticket) { ticket_ = ticket; } + + void DeriveChallenges(const uint8_t* seed); + + void SetTreeRBufs(const char* tree_r_cache, const char* file_prefix, + bool include_slot = false) { + size_t num_files = C::GetNumTreeRCFiles(); + tree_r_bufs_.resize(num_files); + SetTreeBufs(&tree_r_bufs_[0], tree_r_cache, + file_prefix, num_files, include_slot); + } + + void SetTreeCBufs(const char* tree_c_cache, const char* file_prefix, + bool include_slot = false) { + size_t num_files = C::GetNumTreeRCFiles(); + tree_c_bufs_.resize(num_files); + SetTreeBufs(&tree_c_bufs_[0], tree_c_cache, + file_prefix, num_files, include_slot); + } + + void SetTreeDBuf(const char* tree_d_cache, const char* file_prefix, + bool include_slot = false) { + SetTreeBufs(&tree_d_buf_, tree_d_cache, + file_prefix, 1, include_slot); + if (tree_d_buf_.is_open() == 0) { + printf("No tree d file, assuming CC sector\n"); + // TODO: for 64GB would need to access the next layer. CC_TREE_D_NODE_VALUES + // would need to be filled in. + assert (C::GetNumTreeDLevels() <= 31); + comm_d_ = (node_t*) CC_TREE_D_NODE_VALUES[C::GetNumTreeDLevels()]; + } else { + uint8_t* comm_d_addr = (uint8_t*)&tree_d_buf_[0] + + (tree_d_buf_.get_size() - sizeof(node_t)); + comm_d_ = (node_t*)comm_d_addr; + } + } + + void GetRoots(const char* cache); + void SetParentsBuf(const char* filename); + void SetReplicaBuf(const char* cache); + + void WriteProofs(const char* filename, bool do_tree, bool do_node); + + size_t ProofSize(bool do_tree, bool do_node); + + void CombineProofs(const char* filename, + const char* tree_filename, + const char* node_filename); + private: + void WriteTreeDProof(uint64_t challenge); + void SetTreeBufs(mmap_t* bufs, const char* cache, + const char* prefix, size_t num_files, bool include_slot); + + streaming_node_reader_t& reader_; + size_t sector_slot_; + const node_t* replica_id_; + uint64_t* challenges_; + size_t challenges_count_; + mmap_t replica_buf_; + std::vector> tree_r_bufs_; + std::vector> tree_c_bufs_; + mmap_t tree_d_buf_; + node_t tree_c_root_; + node_t tree_r_root_; + node_t comm_r_; + node_t* comm_d_; + const node_t* seed_; + const node_t* ticket_; + mmap_t parents_buf_; +}; + +template +C1::C1(streaming_node_reader_t& reader, size_t sector_slot) : + reader_(reader), sector_slot_(sector_slot) { + + challenges_count_ = C::GetNumChallenges() / C::GetNumPartitions(); + + challenges_ = nullptr; +} + +template +C1::~C1() { + if (challenges_ != nullptr) delete challenges_; +} + +// https://spec.filecoin.io/#section-algorithms.sdr.porep-challenges +template +void C1::DeriveChallenges(const uint8_t* seed) { + seed_ = (node_t*) seed; + + uint32_t hash[8] __attribute__ ((aligned (32))); + size_t leaves = C::GetNumLeaves(); + challenges_ = new uint64_t[C::GetNumChallenges()]; + + for (uint8_t k = 0; k < C::GetNumPartitions(); ++k) { + uint8_t buf[128] __attribute__ ((aligned (32))) = {0}; + std::memcpy(buf, replica_id_, 32); + std::memcpy(buf + 32, seed, 32); + buf[68] = 0x80; // padding + // 544 bits -> 0x220 + buf[126] = 0x02; // padding length + buf[127] = 0x20; // padding length + + mpz_t gmp_challenge; + mpz_init(gmp_challenge); + + for (size_t i = 0; i < challenges_count_; ++i) { + uint32_t j = (uint32_t)((challenges_count_ * k) + i); + buf[64] = (uint8_t)(j & 0xFF); + buf[65] = (uint8_t)((j >> 8) & 0xFF); + + std::memcpy(hash, SHA256_INITIAL_DIGEST, NODE_SIZE); + blst_sha256_block(hash, buf, 2); + blst_sha256_emit((uint8_t*)hash, hash); + + mpz_import(gmp_challenge, 8, -1, 4, 0, 0, hash); + + // Resulting challenge must be a leaf index and not the first leaf + // Use gmp to perform modulo operation + challenges_[i + (k * challenges_count_)] = + mpz_mod_ui(gmp_challenge, gmp_challenge, leaves - 1) + 1; + } + mpz_clear(gmp_challenge); + + } +} + +template +void C1::SetParentsBuf(const char* filename) { + assert (parents_buf_.mmap_read(filename) == 0); +} + +template +void C1::SetReplicaBuf(const char* cache) { + const char* rep_template = "%s";; + const size_t MAX = 256; + char fname[MAX]; + snprintf(fname, MAX, rep_template, cache); + assert (replica_buf_.mmap_read(fname) == 0); +} + +template +void C1::SetTreeBufs(mmap_t* bufs, const char* cache, + const char* prefix, size_t num_files, + bool include_slot) { + for (size_t l = 0; l < num_files; ++l) { + const size_t MAX = 256; + char fname[MAX]; + if (include_slot) { + snprintf(fname, MAX, prefix, cache, sector_slot_, l); + } else { + if (num_files == 1) { + snprintf(fname, MAX, prefix, cache); + } else { + snprintf(fname, MAX, prefix, cache, l); + } + } + + int tree_fd = open(fname, O_RDONLY); + if (tree_fd == -1) { + printf("Failed to open tree file %s\n", fname); + break; + } + close(tree_fd); + + assert (bufs[l].mmap_read(fname) == 0); + } +} + +template +void C1::GetRoots(const char* cache) { + // Get tree_c_root and tree_r_last_root from p_aux file + const char* p_aux_template = "%s/p_aux"; + const size_t MAX = 256; + char fname[MAX]; + snprintf(fname, MAX, p_aux_template, cache); + + mmap_t p_aux_buf; + p_aux_buf.mmap_read(fname); + + std::memcpy(&tree_c_root_, &(p_aux_buf[0]), sizeof(node_t)); + std::memcpy(&tree_r_root_, &(p_aux_buf[1]), sizeof(node_t)); + + // Calculate comm r + Poseidon poseidon_comm_r(2); + poseidon_comm_r.Hash((uint8_t*)&comm_r_, (uint8_t*)&p_aux_buf[0]); +} + +template +size_t C1::ProofSize(bool do_tree, bool do_node) { + uint64_t num_partitions = C::GetNumPartitions(); + uint64_t num_challenges = C::GetNumChallenges(); + + size_t tree_d_proof_size = TreeProof::ProofSize(C::GetNumTreeDArity(), + C::GetNumTreeDLevels(), SINGLE_PROOF_DATA); + size_t tree_rc_proof_size = TreeProof::ProofSize(C::GetNumTreeRCArity(), + C::GetNumTreeRCLevels(), C::GetNumTreeRCConfig()); + size_t tree_proof_size = tree_d_proof_size + tree_rc_proof_size; + + size_t label_proof_size = LabelProof::ProofSize(C::GetNumLayers(), + false); + size_t enc_proof_size = LabelProof::ProofSize(C::GetNumLayers(), + true); + size_t col_proof_size = ((1 + PARENT_COUNT_BASE + PARENT_COUNT_EXP) * + ColumnProof::ProofSize()) + + (2 * sizeof(uint64_t)); + + size_t node_proof_size = label_proof_size + enc_proof_size + col_proof_size; + + size_t proof_size = (num_partitions * sizeof(uint64_t)) + sizeof(uint64_t); + + if (do_tree == true) { + proof_size += (tree_proof_size * num_challenges); + proof_size += 2 * sizeof(node_t); + } + + if (do_node == true) { + proof_size += (node_proof_size * num_challenges); + proof_size += 3 * sizeof(node_t); + } + + return proof_size; +} + +template +void C1::WriteProofs(const char* filename, bool do_tree, bool do_node) { + remove(filename); + mmap_t file_ptr; + size_t expected_file_size = ProofSize(do_tree, do_node); + file_ptr.mmap_write(filename, expected_file_size); + + size_t buf_index = 0; + + // Need to put together pub vanilla_proofs: Vec>>, + uint64_t vp_outer_length = C::GetNumPartitions(); + uint64_t vp_inner_length = challenges_count_; + + std::memcpy(&file_ptr[0] + buf_index, &vp_outer_length, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + // Gather the output buffers into a contiguous array to keep challenge agnostic + // about file IO + size_t num_files = C::GetNumTreeRCFiles(); + std::vector tree_r(num_files); + std::vector tree_c(num_files); + for (size_t i = 0; i < num_files; i++) { + tree_r[i] = &tree_r_bufs_[i][0]; + tree_c[i] = &tree_c_bufs_[i][0]; + } + node_t* tree_d = tree_d_buf_.is_open() ? &tree_d_buf_[0] : nullptr; + + for (uint64_t i = 0; i < vp_outer_length; ++i) { + std::memcpy(&file_ptr[0] + buf_index, &vp_inner_length, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + for (uint64_t j = 0; j < vp_inner_length; ++j) { + C1Challenge challenge(challenges_[j + (i * challenges_count_)], + &tree_r_root_, &tree_c_root_, comm_d_); + + + if (do_node == true) { + challenge.GetParents(&parents_buf_[0]); + challenge.GetNodes(reader_, sector_slot_); + if (do_tree == true) { + challenge.GetTreeRNodes(replica_buf_); + buf_index = challenge.WriteProof(&file_ptr[0], buf_index, &tree_r[0], + &tree_c[0], tree_d); + } else { + buf_index = challenge.WriteNodeProof(&file_ptr[0], buf_index, + &tree_c[0]); + } + } else { + challenge.GetTreeRNodes(replica_buf_); + buf_index = challenge.WriteTreeProof(&file_ptr[0], buf_index, + &tree_r[0], tree_d); + } + } + } + + if (do_tree == true) { + // Comm R + std::memcpy(&file_ptr[0] + buf_index, &comm_r_, sizeof(node_t)); + buf_index += sizeof(node_t); + + // Comm D + std::memcpy(&file_ptr[0] + buf_index, comm_d_, sizeof(node_t)); + buf_index += sizeof(node_t); + } + + if (do_node == true) { + // Replica ID + std::memcpy(&file_ptr[0] + buf_index, replica_id_, sizeof(node_t)); + buf_index += sizeof(node_t); + + // Seed + std::memcpy(&file_ptr[0] + buf_index, seed_, sizeof(node_t)); + buf_index += sizeof(node_t); + + // Ticket + std::memcpy(&file_ptr[0] + buf_index, ticket_, sizeof(node_t)); + buf_index += sizeof(node_t); + } + + //printf("WriteProofs buf_index %ld\n", buf_index); + assert(buf_index == expected_file_size); +} + +template +void C1::CombineProofs(const char* filename, + const char* tree_filename, + const char* node_filename) { + remove(filename); + mmap_t file_ptr; + size_t expected_file_size = ProofSize(true, true); + file_ptr.mmap_write(filename, expected_file_size); + + mmap_t tree_ptr; + size_t exp_tree_buf_size = ProofSize(true, false); + tree_ptr.mmap_read(tree_filename); + + mmap_t node_ptr; + node_ptr.mmap_read(node_filename); + + size_t buf_index = 0; + size_t tree_buf_index = 0; + size_t node_buf_index = 0; + + size_t tree_d_proof_size = TreeProof::ProofSize(C::GetNumTreeDArity(), + C::GetNumTreeDLevels(), SINGLE_PROOF_DATA); + size_t tree_rc_proof_size = TreeProof::ProofSize(C::GetNumTreeRCArity(), + C::GetNumTreeRCLevels(), C::GetNumTreeRCConfig()); + size_t tree_proof_size = tree_d_proof_size + tree_rc_proof_size; + + size_t label_proof_size = LabelProof::ProofSize(C::GetNumLayers(), + false); + size_t enc_proof_size = LabelProof::ProofSize(C::GetNumLayers(), + true); + size_t col_proof_size = ((1 + PARENT_COUNT_BASE + PARENT_COUNT_EXP) * + ColumnProof::ProofSize()) + + (2 * sizeof(uint64_t)); + size_t node_proof_size = label_proof_size + enc_proof_size + col_proof_size; + + // Need to put together pub vanilla_proofs: Vec>>, + uint64_t vp_outer_length = C::GetNumPartitions(); + uint64_t vp_inner_length = challenges_count_; + + std::memcpy(&file_ptr[0] + buf_index, &vp_outer_length, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + tree_buf_index += sizeof(uint64_t); + node_buf_index += sizeof(uint64_t); + + for (uint64_t i = 0; i < vp_outer_length; ++i) { + std::memcpy(&file_ptr[0] + buf_index, &vp_inner_length, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + tree_buf_index += sizeof(uint64_t); + node_buf_index += sizeof(uint64_t); + + for (uint64_t j = 0; j < vp_inner_length; ++j) { + std::memcpy(&file_ptr[0] + buf_index, &tree_ptr[0] + tree_buf_index, tree_proof_size); + buf_index += tree_proof_size; + tree_buf_index += tree_proof_size; + + std::memcpy(&file_ptr[0] + buf_index, &node_ptr[0] + node_buf_index, node_proof_size); + buf_index += node_proof_size; + node_buf_index += node_proof_size; + } + } + + // Comm R + std::memcpy(&file_ptr[0] + buf_index, &tree_ptr[0] + tree_buf_index, sizeof(node_t)); + buf_index += sizeof(node_t); + tree_buf_index += sizeof(node_t); + + // Comm D + std::memcpy(&file_ptr[0] + buf_index, &tree_ptr[0] + tree_buf_index, sizeof(node_t)); + buf_index += sizeof(node_t); + tree_buf_index += sizeof(node_t); + + // Replica ID + std::memcpy(&file_ptr[0] + buf_index, &node_ptr[0] + node_buf_index, sizeof(node_t)); + buf_index += sizeof(node_t); + node_buf_index += sizeof(node_t); + + // Seed + std::memcpy(&file_ptr[0] + buf_index, &node_ptr[0] + node_buf_index, sizeof(node_t)); + buf_index += sizeof(node_t); + node_buf_index += sizeof(node_t); + + // Ticket + std::memcpy(&file_ptr[0] + buf_index, &node_ptr[0] + node_buf_index, sizeof(node_t)); + buf_index += sizeof(node_t); + node_buf_index += sizeof(node_t); + + assert(buf_index == expected_file_size); + assert(tree_buf_index == exp_tree_buf_size); + assert(node_buf_index == node_ptr.get_size()); +} + + +template +int do_c1(streaming_node_reader_t& reader, + size_t num_sectors, size_t sector_slot, + const uint8_t* replica_id, const uint8_t* seed, + const uint8_t* ticket, const char* cache_path, + const char* parents_filename, const char* replica_path, + const char* output_dir) { + C1 c1(reader, sector_slot); + c1.SetReplicaID((node_t*)replica_id); + c1.SetTicket((node_t*)ticket); + + c1.DeriveChallenges(seed); + if (C::GetNumTreeRCFiles() == 1) { + c1.SetTreeRBufs(cache_path, "%s/sc-02-data-tree-r-last.dat"); + c1.SetTreeCBufs(cache_path, "%s/sc-02-data-tree-c.dat"); + } else { + c1.SetTreeRBufs(cache_path, "%s/sc-02-data-tree-r-last-%ld.dat"); + c1.SetTreeCBufs(cache_path, "%s/sc-02-data-tree-c-%ld.dat"); + } + c1.SetTreeDBuf(cache_path, "%s/sc-02-data-tree-d.dat"); + + c1.GetRoots(cache_path); + c1.SetReplicaBuf(replica_path); + c1.SetParentsBuf(parents_filename); + + const size_t MAX = 256; + char fname[MAX]; + snprintf(fname, MAX, "%s/commit-phase1-output", output_dir); + c1.WriteProofs(fname, true, true); + + return 0; +} + +template +int do_c1_tree(streaming_node_reader_t& reader, + size_t num_sectors, size_t sector_slot, + const uint8_t* replica_id, const uint8_t* seed, + const uint8_t* ticket, const char* cache_path, + const char* parents_filename, const char* replica_path, + const char* output_dir) { + C1 c1_tree(reader, sector_slot); + c1_tree.SetReplicaID((node_t*)replica_id); + c1_tree.DeriveChallenges(seed); + if (C::GetNumTreeRCFiles() == 1) { + c1_tree.SetTreeRBufs(cache_path, "%s/sc-02-data-tree-r-last.dat"); + } else { + c1_tree.SetTreeRBufs(cache_path, "%s/sc-02-data-tree-r-last-%ld.dat"); + } + c1_tree.SetTreeDBuf(cache_path, "%s/sc-02-data-tree-d.dat"); + c1_tree.GetRoots(cache_path); + c1_tree.SetReplicaBuf(replica_path); + + const size_t MAX = 256; + char fname_tree[MAX]; + snprintf(fname_tree, MAX, "%s/commit-phase1-output-tree", output_dir); + c1_tree.WriteProofs(fname_tree, true, false); + + return 0; +} + +template +int do_c1_node(streaming_node_reader_t& reader, + size_t num_sectors, size_t sector_slot, + const uint8_t* replica_id, const uint8_t* seed, + const uint8_t* ticket, const char* cache_path, + const char* parents_filename, const char* replica_path, + const char* output_dir) { + C1 c1_node(reader, sector_slot); + c1_node.SetReplicaID((node_t*)replica_id); + c1_node.SetTicket((node_t*)ticket); + c1_node.DeriveChallenges(seed); + if (C::GetNumTreeRCFiles() == 1) { + c1_node.SetTreeCBufs(cache_path, "%s/sc-02-data-tree-c.dat"); + } else { + c1_node.SetTreeCBufs(cache_path, "%s/sc-02-data-tree-c-%ld.dat"); + } + c1_node.SetParentsBuf(parents_filename); + c1_node.GetRoots(cache_path); + + const size_t MAX = 256; + char fname_node[MAX]; + snprintf(fname_node, MAX, "%s/commit-phase1-output-node", output_dir); + c1_node.WriteProofs(fname_node, false, true); + + return 0; +} + +template +int do_c1_comb(streaming_node_reader_t& reader, + size_t num_sectors, size_t sector_slot, + const uint8_t* replica_id, const uint8_t* seed, + const uint8_t* ticket, const char* cache_path, + const char* parents_filename, const char* replica_path, + const char* output_dir) { + C1 c1_combine(reader, sector_slot); + + const size_t MAX = 256; + char fname_tree[MAX]; + snprintf(fname_tree, MAX, "%s/commit-phase1-output-tree", output_dir); + + char fname_node[MAX]; + snprintf(fname_node, MAX, "%s/commit-phase1-output-node", output_dir); + + char fname_comb[MAX]; + snprintf(fname_comb, MAX, "%s/commit-phase1-output-comb", output_dir); + + c1_combine.CombineProofs(fname_comb, fname_tree, fname_node); + + return 0; +} diff --git a/extern/supraseal/c1/challenge.hpp b/extern/supraseal/c1/challenge.hpp new file mode 100644 index 000000000..f75f20556 --- /dev/null +++ b/extern/supraseal/c1/challenge.hpp @@ -0,0 +1,205 @@ +// Copyright Supranational LLC + +#ifndef __C1CHALLENGE_HPP__ +#define __C1CHALLENGE_HPP__ + +template +class C1Challenge { + public: + C1Challenge(uint64_t challenge, + node_t* tree_r_root_, node_t* tree_c_root_, node_t* tree_d_root); + ~C1Challenge(); + + void GetParents(const uint32_t* parents_buf); + void GetNodes(streaming_node_reader_t& reader, size_t sector_slot); + void GetTreeRNodes(node_t* replica_buf); + size_t WriteTreeProof(uint8_t* file_ptr, size_t buf_index, + node_t** tree_r_bufs, node_t* tree_d_buf); + size_t WriteNodeProof(uint8_t* file_ptr, size_t buf_index, + node_t** tree_c_bufs); + size_t WriteProof(uint8_t* file_ptr, size_t buf_index, + node_t** tree_r_bufs, node_t** tree_c_bufs, + node_t* tree_d_buf); + + private: + uint64_t challenge_; + uint32_t drg_parents_[PARENT_COUNT_BASE]; + uint32_t exp_parents_[PARENT_COUNT_EXP]; + node_t* nodes_; // This node and its parents for each layer + node_t* tree_r_nodes_; // Replica nodes to rebuild discarded rows + node_t* tree_r_root_; + node_t* tree_c_root_; + node_t* tree_d_root_; +}; + +template +C1Challenge::C1Challenge(uint64_t challenge, + node_t* tree_r_root, node_t* tree_c_root, + node_t* tree_d_root) : + challenge_(challenge), + tree_r_root_(tree_r_root), + tree_c_root_(tree_c_root), + tree_d_root_(tree_d_root) { + + nodes_ = new node_t[C::GetNumLayers() * (PARENT_COUNT + 1)]; + tree_r_nodes_ = new node_t[C::GetNumTreeRLabels()]; +} + +template +C1Challenge::~C1Challenge() { + if (nodes_ != nullptr) delete nodes_; + if (tree_r_nodes_ != nullptr) delete tree_r_nodes_; +} + +template +void C1Challenge::GetParents(const uint32_t* parents_buf) { + size_t p_idx = challenge_ * PARENT_COUNT; + for (size_t k = 0; k < PARENT_COUNT_BASE; ++k) { + drg_parents_[k] = parents_buf[p_idx]; + p_idx++; + } + for (size_t k = 0; k < PARENT_COUNT_EXP; ++k) { + exp_parents_[k] = parents_buf[p_idx]; + p_idx++; + } +} + +template +void C1Challenge::GetNodes(streaming_node_reader_t& reader, + size_t sector_slot) { + std::vector> nodes; + + size_t layer_count = C::GetNumLayers(); + for (size_t l = 0; l < layer_count; ++l) { + nodes.push_back(std::pair(l, challenge_)); + + // Get all base parents + for (size_t k = 0; k < PARENT_COUNT_BASE; ++k) { + nodes.push_back(std::pair(l, drg_parents_[k])); + } + + // Get all exp parents + for (size_t k = 0; k < PARENT_COUNT_EXP; ++k) { + nodes.push_back(std::pair(l, exp_parents_[k])); + } + } + reader.alloc_slots(1, nodes.size(), false); + reader.load_nodes(0, nodes); + for (size_t i = 0; i < nodes.size(); i++) { + node_t n = reader.get_node(0, nodes, i, sector_slot); + nodes_[i] = n; + } + reader.free_slots(); +} + +template +void C1Challenge::GetTreeRNodes(node_t* replica_buf) { + size_t tree_r_label_idx = challenge_ & C::GetChallengeStartMask(); + for (size_t k = 0; k < C::GetNumTreeRLabels(); ++k) { + std::memcpy(tree_r_nodes_ + k, &(replica_buf[tree_r_label_idx]), + sizeof(node_t)); + tree_r_label_idx++; + } +} + +template +size_t C1Challenge::WriteTreeProof(uint8_t* file_ptr, size_t buf_index, + node_t** tree_r_bufs, node_t* tree_d_buf) { + /////////////////////////////////////// + // Build Tree D inclusion proof + /////////////////////////////////////// + if (tree_d_buf == nullptr) { + TreeDCCProof tree_d(C::GetNumTreeDArity(), + C::GetNumTreeDLevels(), nullptr, 0, 0); + tree_d.GenInclusionPath(challenge_, (node_t*) CC_TREE_D_NODE_VALUES); + buf_index = tree_d.WriteProof(file_ptr, buf_index, SINGLE_PROOF_DATA); + } else { + TreeProof tree_d(C::GetNumTreeDArity(), + C::GetNumTreeDLevels(), &tree_d_buf, 1, 0); + tree_d.SetRoot(tree_d_root_); + tree_d.GenInclusionPath(challenge_, nullptr); + buf_index = tree_d.WriteProof(file_ptr, buf_index, SINGLE_PROOF_DATA); + } + + /////////////////////////////////////// + // Build Tree R inclusion proof + /////////////////////////////////////// + TreeProof tree_r(C::GetNumTreeRCArity(), + C::GetNumTreeRCLevels(), tree_r_bufs, + C::GetNumTreeRCFiles(), + C::GetNumTreeRDiscardRows()); + tree_r.SetRoot(tree_r_root_); + tree_r.GenInclusionPath(challenge_, tree_r_nodes_); + buf_index = tree_r.WriteProof(file_ptr, buf_index, + C::GetNumTreeRCConfig()); + + return buf_index; +} + +template +size_t C1Challenge::WriteNodeProof(uint8_t* file_ptr, size_t buf_index, + node_t** tree_c_bufs) { + /////////////////////////////////////// + // Column proofs + /////////////////////////////////////// + ColumnProof c_x = ColumnProof(challenge_, + nodes_, 0, (PARENT_COUNT + 1), + tree_c_bufs, tree_c_root_); + buf_index = c_x.WriteProof(file_ptr, buf_index, + C::GetNumTreeRCConfig()); + + /////////////////////////////////////// + // DRG Parents + /////////////////////////////////////// + std::memcpy(file_ptr + buf_index, &PARENT_COUNT_BASE, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + for (size_t k = 0; k < PARENT_COUNT_BASE; ++k) { + ColumnProof drg = ColumnProof(drg_parents_[k], + nodes_, k + 1, (PARENT_COUNT + 1), + tree_c_bufs, tree_c_root_); + buf_index = drg.WriteProof(file_ptr, buf_index, + C::GetNumTreeRCConfig()); + } + + /////////////////////////////////////// + // Expander Parents + /////////////////////////////////////// + std::memcpy(file_ptr + buf_index, &PARENT_COUNT_EXP, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + for (size_t k = 0; k < PARENT_COUNT_EXP; ++k) { + ColumnProof exp = ColumnProof(exp_parents_[k], nodes_, + k + 1 + PARENT_COUNT_BASE, + (PARENT_COUNT + 1), + tree_c_bufs, tree_c_root_); + buf_index = exp.WriteProof(file_ptr, buf_index, + C::GetNumTreeRCConfig()); + } + + /////////////////////////////////////// + // Labeling Proofs + /////////////////////////////////////// + size_t layer_count = C::GetNumLayers(); + LabelProof label_proof(challenge_, layer_count, nodes_, (PARENT_COUNT + 1)); + buf_index = label_proof.WriteProof(file_ptr, buf_index); + + /////////////////////////////////////// + // Encoding Proof + /////////////////////////////////////// + LabelProof enc_proof(challenge_, layer_count, nodes_, (PARENT_COUNT + 1)); + buf_index = enc_proof.WriteProof(file_ptr, buf_index, true); + + return buf_index; +} + +template +size_t C1Challenge::WriteProof(uint8_t* file_ptr, size_t buf_index, + node_t** tree_r_bufs, node_t** tree_c_bufs, + node_t* tree_d_buf) { + buf_index = WriteTreeProof(file_ptr, buf_index, tree_r_bufs, tree_d_buf); + buf_index = WriteNodeProof(file_ptr, buf_index, tree_c_bufs); + + return buf_index; +} +#endif // __C1CHALLENGE_HPP__ diff --git a/extern/supraseal/c1/column_proof.hpp b/extern/supraseal/c1/column_proof.hpp new file mode 100644 index 000000000..5cc7fde5e --- /dev/null +++ b/extern/supraseal/c1/column_proof.hpp @@ -0,0 +1,81 @@ +// Copyright Supranational LLC + +#ifndef __COLUMN_PROOF_HPP__ +#define __COLUMN_PROOF_HPP__ + +template +class ColumnProof { + public: + ColumnProof(uint64_t challenge, + node_t* labels, size_t label_idx, size_t label_inc, + node_t** tree_bufs, node_t* root); + ~ColumnProof(); + + size_t WriteProof(uint8_t* file_ptr, size_t buf_index, uint32_t proof_type); + static size_t ProofSize(); + + private: + uint64_t challenge_; + uint64_t layers_; + node_t* labels_; + size_t label_idx_; + size_t label_inc_; + TreeProof* tree_; +}; + +template +ColumnProof

::ColumnProof(uint64_t challenge, + node_t* labels, size_t label_idx, size_t label_inc, + node_t** tree_bufs, node_t* root) : + challenge_(challenge), + layers_(P::GetNumLayers()), + labels_(labels), + label_idx_(label_idx), + label_inc_(label_inc) +{ + tree_ = new TreeProof(P::GetNumTreeRCArity(), + P::GetNumTreeRCLevels(), + tree_bufs, P::GetNumTreeRCFiles()); + tree_->SetRoot(root); + tree_->GenInclusionPath(challenge, nullptr); +} + +template +ColumnProof

::~ColumnProof() { + if (tree_ != nullptr) { + delete tree_; + } +} + +template +size_t ColumnProof

::ProofSize() { + size_t proof_size = 4; + proof_size += 8; + proof_size += (sizeof(node_t) * P::GetNumLayers()); + proof_size += TreeProof::ProofSize(P::GetNumTreeRCArity(), + P::GetNumTreeRCLevels(), + P::GetNumTreeRCConfig()); + return proof_size; +} + +template +size_t ColumnProof

::WriteProof(uint8_t* file_ptr, size_t buf_index, + uint32_t proof_type) { + std::memcpy(file_ptr + buf_index, (uint32_t*)&challenge_, sizeof(uint32_t)); + buf_index += sizeof(uint32_t); + + std::memcpy(file_ptr + buf_index, &layers_, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + for (uint64_t l = 0; l < layers_; ++l) { + std::memcpy(file_ptr + buf_index, labels_ + label_idx_ + (l * label_inc_), + sizeof(node_t)); + buf_index += sizeof(node_t); + } + + buf_index = tree_->WriteProof(file_ptr, buf_index, proof_type); + + return buf_index; +} + +#endif // __COLUMN_PROOF_HPP__ diff --git a/extern/supraseal/c1/label_proof.hpp b/extern/supraseal/c1/label_proof.hpp new file mode 100644 index 000000000..be6559c50 --- /dev/null +++ b/extern/supraseal/c1/label_proof.hpp @@ -0,0 +1,133 @@ +// Copyright Supranational LLC + +#ifndef __LABEL_PROOF_HPP__ +#define __LABEL_PROOF_HPP__ + +#include "../sealing/data_structures.hpp" + +class LabelProof { + public: + LabelProof(uint64_t challenge, uint64_t layers, + node_t* labels, size_t label_inc); + ~LabelProof() { } + + size_t WriteProof(uint8_t* file_ptr, size_t buf_index, bool enc = false); + static size_t ProofSize(size_t layers, bool enc); + + private: + uint64_t challenge_; + uint64_t layers_; + node_t* labels_; + size_t label_inc_; +}; + +LabelProof::LabelProof(uint64_t challenge, uint64_t layers, + node_t* labels, size_t label_inc) : + challenge_(challenge), + layers_(layers), + labels_(labels), + label_inc_(label_inc) { } + +size_t LabelProof::ProofSize(size_t layers, bool enc) { + size_t proof_size = 8; + + if ((enc == false) || (layers == 1)) { + if (enc == false) + proof_size += (layers * 8); + proof_size += sizeof(node_t) * LAYER_ONE_REPEAT_SEQ * PARENT_COUNT_BASE; + proof_size += sizeof(node_t) * LAYER_ONE_FINAL_SEQ; + proof_size += 4; + proof_size += 8; + layers--; + } + + if ((enc == true) && (layers > 1)) { + layers = 1; + } + + proof_size += (layers * sizeof(node_t) * LAYER_N_REPEAT_SEQ * + PARENT_COUNT_BASE); + proof_size += (layers * sizeof(node_t) * LAYER_N_REPEAT_SEQ * + PARENT_COUNT_EXP); + proof_size += (layers * sizeof(node_t) * LAYER_N_FINAL_SEQ); + proof_size += (layers * 4); + proof_size += (layers * 8); + + return proof_size; +} + +size_t LabelProof::WriteProof(uint8_t* file_ptr, size_t buf_index, + bool enc) { + uint32_t l = 1; + + if (enc == true) { // Encoding, only last layer + l = layers_; + } else { + // Write vector length of proofs + std::memcpy(file_ptr + buf_index, &layers_, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + } + + while (l <= layers_) { + // Number of parents in label calculation + std::memcpy(file_ptr + buf_index, &LABEL_PARENTS, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + if (l == 1) { + for (size_t k = 0; k < LAYER_ONE_REPEAT_SEQ; ++k) { + for (size_t c = 0; c < PARENT_COUNT_BASE; ++c) { + std::memcpy(file_ptr + buf_index, + labels_ + c + 1 + ((l - 1) * label_inc_), sizeof(node_t)); + buf_index += sizeof(node_t); + } + } + + for (size_t c = 0; c < LAYER_ONE_FINAL_SEQ; ++c) { + std::memcpy(file_ptr + buf_index, + labels_ + c + 1 + ((l - 1) * label_inc_), sizeof(node_t)); + buf_index += sizeof(node_t); + } + } else { + for (size_t k = 0; k < LAYER_N_REPEAT_SEQ; ++k) { + for (size_t c = 0; c < PARENT_COUNT_BASE; ++c) { + std::memcpy(file_ptr + buf_index, + labels_ + c + 1 + ((l - 1) * label_inc_), sizeof(node_t)); + buf_index += sizeof(node_t); + } + + for (size_t c = 0; c < PARENT_COUNT_EXP; ++c) { + std::memcpy(file_ptr + buf_index, + labels_ + c + 1 + PARENT_COUNT_BASE + ((l - 2) * label_inc_), + sizeof(node_t)); + buf_index += sizeof(node_t); + } + } + + for (size_t c = 0; c < LAYER_N_FINAL_SEQ; ++c) { + if (c < PARENT_COUNT_BASE) { + std::memcpy(file_ptr + buf_index, + labels_ + c + 1 + ((l - 1) * label_inc_), sizeof(node_t)); + } else { + std::memcpy(file_ptr + buf_index, + labels_ + c + 1 + ((l - 2) * label_inc_), + sizeof(node_t)); + } + buf_index += sizeof(node_t); + } + } + + // Layer index + std::memcpy(file_ptr + buf_index, &l, sizeof(uint32_t)); + buf_index += sizeof(uint32_t); + + // Node - challenge + std::memcpy(file_ptr + buf_index, &challenge_, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + l++; + } + + return buf_index; +} + +#endif // __LABEL_PROOF_HPP__ diff --git a/extern/supraseal/c1/path_element.hpp b/extern/supraseal/c1/path_element.hpp new file mode 100644 index 000000000..23403e020 --- /dev/null +++ b/extern/supraseal/c1/path_element.hpp @@ -0,0 +1,44 @@ +// Copyright Supranational LLC + +#ifndef __PATH_ELEMENT_HPP__ +#define __PATH_ELEMENT_HPP__ + +class PathElement { + public: + PathElement(size_t arity, uint64_t index); + ~PathElement(); + void SetHash(size_t index, node_t* hash) { hashes_[index] = hash; } + size_t Write(uint8_t* file_ptr, size_t buf_index); + + private: + size_t arity_; + uint64_t index_; + node_t** hashes_; // arity - 1 hashes +}; + +PathElement::PathElement(size_t arity, uint64_t index) : + arity_(arity), + index_(index) { + hashes_ = new node_t*[arity - 1]{ nullptr }; +} + +PathElement::~PathElement() { + delete hashes_; +} + +size_t PathElement::Write(uint8_t* file_ptr, size_t buf_index) { + uint64_t len = (uint64_t)arity_ - 1; + std::memcpy(file_ptr + buf_index, &len, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + for(uint64_t i = 0; i < len; ++i) { + std::memcpy(file_ptr + buf_index, hashes_[i], sizeof(node_t)); + buf_index += sizeof(node_t); + } + + std::memcpy(file_ptr + buf_index, &index_, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + return buf_index; +} +#endif // __PATH_ELEMENT_HPP__ diff --git a/extern/supraseal/c1/streaming_node_reader_files.hpp b/extern/supraseal/c1/streaming_node_reader_files.hpp new file mode 100644 index 000000000..2352dd612 --- /dev/null +++ b/extern/supraseal/c1/streaming_node_reader_files.hpp @@ -0,0 +1,140 @@ +// Copyright Supranational LLC + +#ifndef __STREAMING_LAYER_READER_FILES_HPP__ +#define __STREAMING_LAYER_READER_FILES_HPP__ + +#include +#include +#include +#include "../util/mmap_t.hpp" +#include + +// Encapsulate the SPDK portion of reading layers from files +// C is not used here but is retained to be consistent with +// multi-sector c1 +template +class streaming_node_reader_t { + std::vector> layer_files; + // Packed indicates nodes within a single layer will be contiguous + bool packed; + size_t num_slots; + size_t pages_per_slot; + + node_t* buffer; + + thread_pool_t pool; + +public: + streaming_node_reader_t(size_t sector_size, std::vector layer_filenames) + : buffer(nullptr) + { + layer_files.resize(layer_filenames.size()); + for (size_t i = 0; i < layer_filenames.size(); i++) { + layer_files[i].mmap_read(layer_filenames[i], sector_size); + } + } + + ~streaming_node_reader_t() { + free_slots(); + } + + bool data_is_big_endian() { + return true; + } + + // Allocate resource to perform N reads, each of size slot_node_count. These + // will be indexed by slot_id + // For C1 (load_nodes, get_node), we don't need local storage because it can + // just use the mmapped files. + // For PC2 create buffers to consolidate the data. + void alloc_slots(size_t _num_slots, size_t slot_node_count, bool _packed) { + packed = _packed; + if (!packed) { + // Reading will occur directly from files, so do nothing + } else { + pages_per_slot = (slot_node_count + C::NODES_PER_PAGE - 1) / C::NODES_PER_PAGE; + num_slots = _num_slots; + assert (posix_memalign((void **)&buffer, PAGE_SIZE, + num_slots * pages_per_slot * PAGE_SIZE) == 0); + } + } + + node_t* get_full_buffer(size_t &bytes) { + bytes = num_slots * pages_per_slot * PAGE_SIZE; + return buffer; + } + + node_t* get_slot(size_t slot) { + return &buffer[slot * pages_per_slot * C::NODES_PER_PAGE]; + } + + void free_slots() { + free(buffer); + buffer = nullptr; + } + + //////////////////////////////////////// + // Used for PC2 + //////////////////////////////////////// + node_t* load_layers(size_t slot, uint32_t layer, uint64_t node, + size_t node_count, size_t num_layers, + std::atomic* valid, size_t* valid_count) { + if (num_layers == 1) { + // Simply return a pointer to the mmap'd file data + // This is used by pc2 when bulding just tree-r + assert (layer == C::GetNumLayers() - 1); + assert (C::PARALLEL_SECTORS == 1); + assert (layer_files.size() == 1); + + *valid = 1; + *valid_count = 1; + + return &layer_files[0][node]; + } else { + // Consolidate the layer data into the buffer + assert (C::PARALLEL_SECTORS == 1); + assert (layer_files.size() == num_layers); + // Nodes in each layer are expected to evenly fit in a page so that + // the result is packed + assert (node_count % C::NODES_PER_PAGE == 0); + node_t* dest = &buffer[slot * pages_per_slot * C::NODES_PER_PAGE]; + + pool.par_map(num_layers, 1, [&](size_t i) { + layer_files[i].read_data(node, &dest[i * node_count], node_count); + }); + + *valid = 1; + *valid_count = 1; + + return dest; + } + } + + //////////////////////////////////////// + // Used for C1 + //////////////////////////////////////// + + // Load a vector of node IDs into the local buffer + // The nodes are a vector of layer, node_id pairs + // Since the nodes may be non-consecutive each node will use + // an entire page in the buffer. + int load_nodes(size_t slot, std::vector>& nodes) { + assert (!packed); + return 0; + } + + // Retrieve a sector and node from the local buffer + // nodes - the vector of nodes originally read into the local buffer + // idx - the index of the node to retrieve + // sector_slot - the slot to retrive + node_t& get_node(size_t slot, std::vector>& nodes, + size_t idx, size_t sector_slot) { + assert (!packed); + size_t layer = nodes[idx].first; + size_t node = nodes[idx].second; + node_t& n = layer_files[layer][node]; + return n; + } +}; + +#endif diff --git a/extern/supraseal/c1/tree_d_cc_nodes.h b/extern/supraseal/c1/tree_d_cc_nodes.h new file mode 100644 index 000000000..c649e231e --- /dev/null +++ b/extern/supraseal/c1/tree_d_cc_nodes.h @@ -0,0 +1,141 @@ +// Copyright Supranational LLC + +#ifndef __TREE_D_CC_NODES_H__ +#define __TREE_D_CC_NODES_H__ + +// CC Sector Tree D is perfectly symmetrical, all nodes per level are equal +// These values support CC sectors up to 32GB +// TODO: Need another layer for 64GB +const uint8_t CC_TREE_D_NODE_VALUES[][32] = { + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, + { 0xf5, 0xa5, 0xfd, 0x42, 0xd1, 0x6a, 0x20, 0x30, + 0x27, 0x98, 0xef, 0x6e, 0xd3, 0x09, 0x97, 0x9b, + 0x43, 0x00, 0x3d, 0x23, 0x20, 0xd9, 0xf0, 0xe8, + 0xea, 0x98, 0x31, 0xa9, 0x27, 0x59, 0xfb, 0x0b }, + { 0x37, 0x31, 0xbb, 0x99, 0xac, 0x68, 0x9f, 0x66, + 0xee, 0xf5, 0x97, 0x3e, 0x4a, 0x94, 0xda, 0x18, + 0x8f, 0x4d, 0xdc, 0xae, 0x58, 0x07, 0x24, 0xfc, + 0x6f, 0x3f, 0xd6, 0x0d, 0xfd, 0x48, 0x83, 0x33 }, + { 0x64, 0x2a, 0x60, 0x7e, 0xf8, 0x86, 0xb0, 0x04, + 0xbf, 0x2c, 0x19, 0x78, 0x46, 0x3a, 0xe1, 0xd4, + 0x69, 0x3a, 0xc0, 0xf4, 0x10, 0xeb, 0x2d, 0x1b, + 0x7a, 0x47, 0xfe, 0x20, 0x5e, 0x5e, 0x75, 0x0f }, + { 0x57, 0xa2, 0x38, 0x1a, 0x28, 0x65, 0x2b, 0xf4, + 0x7f, 0x6b, 0xef, 0x7a, 0xca, 0x67, 0x9b, 0xe4, + 0xae, 0xde, 0x58, 0x71, 0xab, 0x5c, 0xf3, 0xeb, + 0x2c, 0x08, 0x11, 0x44, 0x88, 0xcb, 0x85, 0x26 }, + { 0x1f, 0x7a, 0xc9, 0x59, 0x55, 0x10, 0xe0, 0x9e, + 0xa4, 0x1c, 0x46, 0x0b, 0x17, 0x64, 0x30, 0xbb, + 0x32, 0x2c, 0xd6, 0xfb, 0x41, 0x2e, 0xc5, 0x7c, + 0xb1, 0x7d, 0x98, 0x9a, 0x43, 0x10, 0x37, 0x2f }, + { 0xfc, 0x7e, 0x92, 0x82, 0x96, 0xe5, 0x16, 0xfa, + 0xad, 0xe9, 0x86, 0xb2, 0x8f, 0x92, 0xd4, 0x4a, + 0x4f, 0x24, 0xb9, 0x35, 0x48, 0x52, 0x23, 0x37, + 0x6a, 0x79, 0x90, 0x27, 0xbc, 0x18, 0xf8, 0x33 }, + { 0x08, 0xc4, 0x7b, 0x38, 0xee, 0x13, 0xbc, 0x43, + 0xf4, 0x1b, 0x91, 0x5c, 0x0e, 0xed, 0x99, 0x11, + 0xa2, 0x60, 0x86, 0xb3, 0xed, 0x62, 0x40, 0x1b, + 0xf9, 0xd5, 0x8b, 0x8d, 0x19, 0xdf, 0xf6, 0x24 }, + { 0xb2, 0xe4, 0x7b, 0xfb, 0x11, 0xfa, 0xcd, 0x94, + 0x1f, 0x62, 0xaf, 0x5c, 0x75, 0x0f, 0x3e, 0xa5, + 0xcc, 0x4d, 0xf5, 0x17, 0xd5, 0xc4, 0xf1, 0x6d, + 0xb2, 0xb4, 0xd7, 0x7b, 0xae, 0xc1, 0xa3, 0x2f }, + { 0xf9, 0x22, 0x61, 0x60, 0xc8, 0xf9, 0x27, 0xbf, + 0xdc, 0xc4, 0x18, 0xcd, 0xf2, 0x03, 0x49, 0x31, + 0x46, 0x00, 0x8e, 0xae, 0xfb, 0x7d, 0x02, 0x19, + 0x4d, 0x5e, 0x54, 0x81, 0x89, 0x00, 0x51, 0x08 }, + { 0x2c, 0x1a, 0x96, 0x4b, 0xb9, 0x0b, 0x59, 0xeb, + 0xfe, 0x0f, 0x6d, 0xa2, 0x9a, 0xd6, 0x5a, 0xe3, + 0xe4, 0x17, 0x72, 0x4a, 0x8f, 0x7c, 0x11, 0x74, + 0x5a, 0x40, 0xca, 0xc1, 0xe5, 0xe7, 0x40, 0x11 }, + { 0xfe, 0xe3, 0x78, 0xce, 0xf1, 0x64, 0x04, 0xb1, + 0x99, 0xed, 0xe0, 0xb1, 0x3e, 0x11, 0xb6, 0x24, + 0xff, 0x9d, 0x78, 0x4f, 0xbb, 0xed, 0x87, 0x8d, + 0x83, 0x29, 0x7e, 0x79, 0x5e, 0x02, 0x4f, 0x02 }, + { 0x8e, 0x9e, 0x24, 0x03, 0xfa, 0x88, 0x4c, 0xf6, + 0x23, 0x7f, 0x60, 0xdf, 0x25, 0xf8, 0x3e, 0xe4, + 0x0d, 0xca, 0x9e, 0xd8, 0x79, 0xeb, 0x6f, 0x63, + 0x52, 0xd1, 0x50, 0x84, 0xf5, 0xad, 0x0d, 0x3f }, + { 0x75, 0x2d, 0x96, 0x93, 0xfa, 0x16, 0x75, 0x24, + 0x39, 0x54, 0x76, 0xe3, 0x17, 0xa9, 0x85, 0x80, + 0xf0, 0x09, 0x47, 0xaf, 0xb7, 0xa3, 0x05, 0x40, + 0xd6, 0x25, 0xa9, 0x29, 0x1c, 0xc1, 0x2a, 0x07 }, + { 0x70, 0x22, 0xf6, 0x0f, 0x7e, 0xf6, 0xad, 0xfa, + 0x17, 0x11, 0x7a, 0x52, 0x61, 0x9e, 0x30, 0xce, + 0xa8, 0x2c, 0x68, 0x07, 0x5a, 0xdf, 0x1c, 0x66, + 0x77, 0x86, 0xec, 0x50, 0x6e, 0xef, 0x2d, 0x19 }, + { 0xd9, 0x98, 0x87, 0xb9, 0x73, 0x57, 0x3a, 0x96, + 0xe1, 0x13, 0x93, 0x64, 0x52, 0x36, 0xc1, 0x7b, + 0x1f, 0x4c, 0x70, 0x34, 0xd7, 0x23, 0xc7, 0xa9, + 0x9f, 0x70, 0x9b, 0xb4, 0xda, 0x61, 0x16, 0x2b }, + { 0xd0, 0xb5, 0x30, 0xdb, 0xb0, 0xb4, 0xf2, 0x5c, + 0x5d, 0x2f, 0x2a, 0x28, 0xdf, 0xee, 0x80, 0x8b, + 0x53, 0x41, 0x2a, 0x02, 0x93, 0x1f, 0x18, 0xc4, + 0x99, 0xf5, 0xa2, 0x54, 0x08, 0x6b, 0x13, 0x26 }, + { 0x84, 0xc0, 0x42, 0x1b, 0xa0, 0x68, 0x5a, 0x01, + 0xbf, 0x79, 0x5a, 0x23, 0x44, 0x06, 0x4f, 0xe4, + 0x24, 0xbd, 0x52, 0xa9, 0xd2, 0x43, 0x77, 0xb3, + 0x94, 0xff, 0x4c, 0x4b, 0x45, 0x68, 0xe8, 0x11 }, + { 0x65, 0xf2, 0x9e, 0x5d, 0x98, 0xd2, 0x46, 0xc3, + 0x8b, 0x38, 0x8c, 0xfc, 0x06, 0xdb, 0x1f, 0x6b, + 0x02, 0x13, 0x03, 0xc5, 0xa2, 0x89, 0x00, 0x0b, + 0xdc, 0xe8, 0x32, 0xa9, 0xc3, 0xec, 0x42, 0x1c }, + { 0xa2, 0x24, 0x75, 0x08, 0x28, 0x58, 0x50, 0x96, + 0x5b, 0x7e, 0x33, 0x4b, 0x31, 0x27, 0xb0, 0xc0, + 0x42, 0xb1, 0xd0, 0x46, 0xdc, 0x54, 0x40, 0x21, + 0x37, 0x62, 0x7c, 0xd8, 0x79, 0x9c, 0xe1, 0x3a }, + { 0xda, 0xfd, 0xab, 0x6d, 0xa9, 0x36, 0x44, 0x53, + 0xc2, 0x6d, 0x33, 0x72, 0x6b, 0x9f, 0xef, 0xe3, + 0x43, 0xbe, 0x8f, 0x81, 0x64, 0x9e, 0xc0, 0x09, + 0xaa, 0xd3, 0xfa, 0xff, 0x50, 0x61, 0x75, 0x08 }, + { 0xd9, 0x41, 0xd5, 0xe0, 0xd6, 0x31, 0x4a, 0x99, + 0x5c, 0x33, 0xff, 0xbd, 0x4f, 0xbe, 0x69, 0x11, + 0x8d, 0x73, 0xd4, 0xe5, 0xfd, 0x2c, 0xd3, 0x1f, + 0x0f, 0x7c, 0x86, 0xeb, 0xdd, 0x14, 0xe7, 0x06 }, + { 0x51, 0x4c, 0x43, 0x5c, 0x3d, 0x04, 0xd3, 0x49, + 0xa5, 0x36, 0x5f, 0xbd, 0x59, 0xff, 0xc7, 0x13, + 0x62, 0x91, 0x11, 0x78, 0x59, 0x91, 0xc1, 0xa3, + 0xc5, 0x3a, 0xf2, 0x20, 0x79, 0x74, 0x1a, 0x2f }, + { 0xad, 0x06, 0x85, 0x39, 0x69, 0xd3, 0x7d, 0x34, + 0xff, 0x08, 0xe0, 0x9f, 0x56, 0x93, 0x0a, 0x4a, + 0xd1, 0x9a, 0x89, 0xde, 0xf6, 0x0c, 0xbf, 0xee, + 0x7e, 0x1d, 0x33, 0x81, 0xc1, 0xe7, 0x1c, 0x37 }, + { 0x39, 0x56, 0x0e, 0x7b, 0x13, 0xa9, 0x3b, 0x07, + 0xa2, 0x43, 0xfd, 0x27, 0x20, 0xff, 0xa7, 0xcb, + 0x3e, 0x1d, 0x2e, 0x50, 0x5a, 0xb3, 0x62, 0x9e, + 0x79, 0xf4, 0x63, 0x13, 0x51, 0x2c, 0xda, 0x06 }, + { 0xcc, 0xc3, 0xc0, 0x12, 0xf5, 0xb0, 0x5e, 0x81, + 0x1a, 0x2b, 0xbf, 0xdd, 0x0f, 0x68, 0x33, 0xb8, + 0x42, 0x75, 0xb4, 0x7b, 0xf2, 0x29, 0xc0, 0x05, + 0x2a, 0x82, 0x48, 0x4f, 0x3c, 0x1a, 0x5b, 0x3d }, + { 0x7d, 0xf2, 0x9b, 0x69, 0x77, 0x31, 0x99, 0xe8, + 0xf2, 0xb4, 0x0b, 0x77, 0x91, 0x9d, 0x04, 0x85, + 0x09, 0xee, 0xd7, 0x68, 0xe2, 0xc7, 0x29, 0x7b, + 0x1f, 0x14, 0x37, 0x03, 0x4f, 0xc3, 0xc6, 0x2c }, + { 0x66, 0xce, 0x05, 0xa3, 0x66, 0x75, 0x52, 0xcf, + 0x45, 0xc0, 0x2b, 0xcc, 0x4e, 0x83, 0x92, 0x91, + 0x9b, 0xde, 0xac, 0x35, 0xde, 0x2f, 0xf5, 0x62, + 0x71, 0x84, 0x8e, 0x9f, 0x7b, 0x67, 0x51, 0x07 }, + { 0xd8, 0x61, 0x02, 0x18, 0x42, 0x5a, 0xb5, 0xe9, + 0x5b, 0x1c, 0xa6, 0x23, 0x9d, 0x29, 0xa2, 0xe4, + 0x20, 0xd7, 0x06, 0xa9, 0x6f, 0x37, 0x3e, 0x2f, + 0x9c, 0x9a, 0x91, 0xd7, 0x59, 0xd1, 0x9b, 0x01 }, + { 0x6d, 0x36, 0x4b, 0x1e, 0xf8, 0x46, 0x44, 0x1a, + 0x5a, 0x4a, 0x68, 0x86, 0x23, 0x14, 0xac, 0xc0, + 0xa4, 0x6f, 0x01, 0x67, 0x17, 0xe5, 0x34, 0x43, + 0xe8, 0x39, 0xee, 0xdf, 0x83, 0xc2, 0x85, 0x3c }, + { 0x07, 0x7e, 0x5f, 0xde, 0x35, 0xc5, 0x0a, 0x93, + 0x03, 0xa5, 0x50, 0x09, 0xe3, 0x49, 0x8a, 0x4e, + 0xbe, 0xdf, 0xf3, 0x9c, 0x42, 0xb7, 0x10, 0xb7, + 0x30, 0xd8, 0xec, 0x7a, 0xc7, 0xaf, 0xa6, 0x3e }, + // TODO: Placeholder for 64GB CC + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } + +}; +#endif // __TREE_D_CC_NODES_H__ diff --git a/extern/supraseal/c1/tree_proof.hpp b/extern/supraseal/c1/tree_proof.hpp new file mode 100644 index 000000000..1d6b8590a --- /dev/null +++ b/extern/supraseal/c1/tree_proof.hpp @@ -0,0 +1,342 @@ +// Copyright Supranational LLC + +#ifndef __TREE_PROOF_HPP__ +#define __TREE_PROOF_HPP__ + +#include "tree_d_cc_nodes.h" + +class TreeProof { + public: + TreeProof(size_t arity, size_t levels, + node_t** tree_bufs, size_t num_tree_bufs, + size_t discard_rows); + virtual ~TreeProof(); + + void SetRoot(node_t* root) { root_ = root; } + void SetLeaf(node_t* leaf) { leaf_ = leaf; } + size_t WriteProof(uint8_t* file_ptr, size_t buf_index, uint32_t proof_type); + static size_t ProofSize(size_t arity, size_t levels, uint32_t proof_type); + + virtual void GenInclusionPath(uint64_t challenge, + node_t* first_level = nullptr); + protected: + bool PerformFirstLevels(uint64_t challenge, node_t* first_level, + size_t* indices); + + // The index array will be filled with which side of the input the + // challenge node to prove is located on all the way up the tree. + void GetTreePaths(size_t* indices, uint64_t challenge); + + size_t arity_; + size_t levels_; + node_t** tree_bufs_; + size_t tree_bufs_len_; + size_t discard_rows_; + node_t* root_; + node_t* leaf_; + node_t* path_buf_; // Used for rebuilding trees if needed + std::vector path_; // levels number of PathElements +}; + +TreeProof::TreeProof(size_t arity, size_t levels, + node_t** tree_bufs, size_t tree_bufs_len = 1, + size_t discard_rows = 0) : + arity_(arity), + levels_(levels), + tree_bufs_(tree_bufs), + tree_bufs_len_(tree_bufs_len), + discard_rows_(discard_rows) +{ + path_.reserve(levels); + + if (discard_rows > 0) { + path_buf_ = new node_t[discard_rows * (arity - 1)]; + } else { + path_buf_ = nullptr; + } +} + +TreeProof::~TreeProof() { + if (path_buf_ != nullptr) { + delete path_buf_; + } + + for (size_t l = 0; l < levels_; ++l) { + delete path_[l]; + } +} + +size_t TreeProof::ProofSize(size_t arity, size_t levels, uint32_t proof_type) { + size_t proof_size = 4; // proof type u32 + proof_size += sizeof(node_t); // root + proof_size += sizeof(node_t); // leaf + + proof_size += 8; // base size u64 + proof_size += (((sizeof(node_t) * (arity - 1)) + 8 + 8) * levels); // path + + if (proof_type == 1) { + proof_size += 8; // sub size u64 + } + + return proof_size; +} + +size_t TreeProof::WriteProof(uint8_t* file_ptr, size_t buf_index, + uint32_t proof_type) { + std::memcpy(file_ptr + buf_index, &proof_type, sizeof(uint32_t)); + buf_index += sizeof(uint32_t); + + if (proof_type == 0) { + // Root + std::memcpy(file_ptr + buf_index, root_, sizeof(node_t)); + buf_index += sizeof(node_t); + + // Leaf + std::memcpy(file_ptr + buf_index, leaf_, sizeof(node_t)); + buf_index += sizeof(node_t); + + // Proof size + std::memcpy(file_ptr + buf_index, &levels_, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + // Proofs + for (size_t i = 0; i < levels_; ++i) { + buf_index = path_[i]->Write(file_ptr, buf_index); + } + } else if (proof_type == 1) { + // Only supports specific tree of single level sub (e.g. 32G case) + + // Base proof size + uint64_t base_proof_vec_len = levels_ - 1; + std::memcpy(file_ptr + buf_index, &base_proof_vec_len, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + // Base proofs + for (size_t i = 0; i < base_proof_vec_len; ++i) { + buf_index = path_[i]->Write(file_ptr, buf_index); + } + + // Sub proof size + uint64_t sub_proof_vec_len = 1; + std::memcpy(file_ptr + buf_index, &sub_proof_vec_len, sizeof(uint64_t)); + buf_index += sizeof(uint64_t); + + // Sub proof + buf_index = path_[base_proof_vec_len]->Write(file_ptr, buf_index); + + // Root + std::memcpy(file_ptr + buf_index, root_, sizeof(node_t)); + buf_index += sizeof(node_t); + + // Leaf + std::memcpy(file_ptr + buf_index, leaf_, sizeof(node_t)); + buf_index += sizeof(node_t); + } + + return buf_index; +} + +/* + Rebuilding discarded tree r rows + Gather enough nodes around the challenge to build subtree + First level inclusion path is nodes + Second level inclusion path requires hashing the 7 adjacent nodes + Third level inclusion path requires hashing two levels to get 7 adjacent + Fourth level and above are in the tree r files + + O + ____/|\____ + / ... \ + O O + ____________________/|\__ __/|\_____________________ + / | | \ + O O O O + / / / / \ \ \ \ / / / / \ \ \ \ ... / / / / \ \ \ \ / / / / \ \ \ \ + O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O O + 0 1 2 3 4 5 6 7 8 9 A B C D E F ... 1F0 1F7 1F8 1FF +*/ +bool TreeProof::PerformFirstLevels(uint64_t challenge, + node_t* first_level, + size_t* indices) { + const size_t arity_mask = ~(arity_ - 1); + const size_t labels = pow(arity_, discard_rows_ + 1); + const size_t index_mask = labels - 1; + const size_t sec_mask = ~((arity_ * arity_) - 1); + + size_t leaf_start = (challenge & arity_mask) & index_mask; + size_t leaf_idx = indices[0]; + size_t hash_idx = 0; + + // Set leaf from first level + SetLeaf((node_t*)(first_level + leaf_start + leaf_idx)); + + // First level labels are separate from tree buffer files + path_.push_back(new PathElement(arity_, (uint64_t) indices[0])); + for (size_t a = 0; a < arity_; ++a) { + if (a != leaf_idx) { + path_[0]->SetHash(hash_idx++, (node_t*)(first_level + leaf_start + a)); + } + } + + // Second level needs to hash adjacent labels + leaf_idx = indices[1]; + path_.push_back(new PathElement(arity_, (uint64_t) indices[1])); + + Poseidon p(arity_); + + hash_idx = 0; + leaf_start &= sec_mask; + for (size_t a = 0; a < arity_; ++a) { + if (a != leaf_idx) { + p.Hash((uint8_t*)&(path_buf_[hash_idx]), + (uint8_t*)&(first_level[leaf_start + (a * arity_)])); + path_[1]->SetHash(hash_idx, &(path_buf_[hash_idx])); + hash_idx++; + } + } + + if (levels_ == 2) { // 2K case + return true; + } + + // Third level needs to hash adjacent labels for two levels + uint8_t p_hash_buf[arity_][sizeof(node_t)]; + path_.push_back(new PathElement(arity_, (uint64_t) indices[2])); + hash_idx = 0; + leaf_start >>= (size_t) log2(arity_ * arity_); + for (size_t a_o = 0; a_o < arity_; ++a_o) { + // leaf_start is the node to skip + if (a_o != leaf_start) { + for (size_t a_i = 0; a_i < arity_; ++a_i) { + p.Hash(p_hash_buf[a_i], (uint8_t*)&(first_level[(a_o * arity_ * arity_)+ + (a_i * arity_)])); + } + p.Hash((uint8_t*)&(path_buf_[hash_idx + arity_ - 1]), p_hash_buf[0]); + path_[2]->SetHash(hash_idx, &(path_buf_[hash_idx + arity_ - 1])); + hash_idx++; + } + } + + if (levels_ == 3) { + return true; + } + + return false; +} + +void TreeProof::GenInclusionPath(uint64_t challenge, + node_t* first_level) { + // Get the challenge index for each level of the tree + size_t indices[levels_]; + GetTreePaths(indices, challenge); + + size_t starting_level = 0; + + if (first_level != nullptr) { + bool done = PerformFirstLevels(challenge, first_level, indices); + if (done) return; + starting_level = 3; + } + + size_t finish_level = levels_ - 1; + if (tree_bufs_len_ == 1) { + finish_level = levels_; + } + + const size_t arity_mask = ~(arity_ - 1); + const size_t arity_lg = (size_t) log2(arity_); + const size_t leaves = pow(2, levels_ * arity_lg); + const size_t file_leaves = (size_t) (leaves / tree_bufs_len_); + const size_t file_shift = (size_t) log2(file_leaves); + const size_t tree_idx_mask = file_leaves - 1; + size_t start_level_size = file_leaves; + + if (first_level != nullptr) { + size_t act_file_leaves = pow(2, (levels_ - (discard_rows_ + 1)) * arity_lg); + start_level_size = (size_t) (act_file_leaves / tree_bufs_len_); + } + + const size_t buf_idx = challenge >> file_shift; + size_t cur_level_size = start_level_size; + size_t add_level_size = 0; + size_t leaf_idx; + size_t hash_idx; + size_t leaf_start; + + for (size_t l = starting_level; l < finish_level; ++l) { + leaf_idx = indices[l]; + leaf_start = challenge & tree_idx_mask; + leaf_start >>= (l * arity_lg); + leaf_start &= arity_mask; + leaf_start += add_level_size; + add_level_size += cur_level_size; + cur_level_size >>= arity_lg; + + if (l == 0) { + SetLeaf((node_t*)(tree_bufs_[buf_idx] + leaf_start + leaf_idx)); + } + + path_.push_back(new PathElement(arity_, (uint64_t)leaf_idx)); + hash_idx = 0; + for (size_t a = 0; a < arity_; ++a) { + if (a != leaf_idx) { + path_[l]->SetHash(hash_idx++, + (node_t*)(tree_bufs_[buf_idx] + leaf_start + a)); + } + } + } + + if (tree_bufs_len_ == 1) { + return; + } + + leaf_idx = indices[levels_ - 1]; + path_.push_back(new PathElement(arity_, (uint64_t)leaf_idx)); + hash_idx = 0; + for (size_t a = 0; a < arity_; ++a) { + if (a != leaf_idx) { + path_[levels_ - 1]->SetHash(hash_idx++, + (node_t*)(tree_bufs_[a] + add_level_size)); + } + } +} + +void TreeProof::GetTreePaths(size_t* indices, uint64_t challenge) { + size_t arity_lg = log2(arity_); + size_t arity_mask = arity_ - 1; + + for (size_t i = 0; i < levels_; ++i) { + indices[i] = challenge & arity_mask; + challenge >>= arity_lg; + } +} + +class TreeDCCProof : public TreeProof { + public: + TreeDCCProof(size_t arity, size_t levels, + node_t** tree_bufs, size_t num_tree_bufs, + size_t discard_rows) : + TreeProof(arity, levels, tree_bufs, num_tree_bufs, discard_rows) { + // TODO: for 64GB would need to access the next layer. CC_TREE_D_NODE_VALUES + // would need to be filled in. + assert (levels <= 31); + + SetRoot((node_t*)(CC_TREE_D_NODE_VALUES[levels])); + SetLeaf((node_t*)(CC_TREE_D_NODE_VALUES[0])); + } + + void GenInclusionPath(size_t challenge, node_t* first_level); +}; + +void TreeDCCProof::GenInclusionPath(uint64_t challenge, + node_t* first_level) { + size_t comm_d_indices[levels_]; + GetTreePaths(comm_d_indices, challenge); + + for (size_t l = 0; l < levels_; ++l) { + path_.push_back(new PathElement(arity_, (uint64_t) comm_d_indices[l])); + path_[l]->SetHash(0, (node_t*)(first_level + l)); + } +} + +#endif // __TREE_PROOF_HPP__ diff --git a/extern/supraseal/c2/Cargo.toml b/extern/supraseal/c2/Cargo.toml new file mode 100644 index 000000000..802e1391c --- /dev/null +++ b/extern/supraseal/c2/Cargo.toml @@ -0,0 +1,24 @@ +[package] +name = "supraseal-c2" +version = "0.1.0" +edition = "2021" +license = "Apache-2.0" +description = "CUDA Groth16 proof generator for Filecoin" +repository = "https://github.com/supranational/supra_seal" + +[dependencies] +blst = "^0.3.11" +sppark = "^0.1.5" + +[features] +default = [] +# Compile in portable mode, without ISA extensions. +# Binary can be executed on all systems. +portable = [ "blst/portable" ] +# Enable ADX even if the host CPU doesn't support it. +# Binary can be executed on Broadwell+ and Ryzen+ systems. +force-adx = [ "blst/force-adx" ] +quiet = [] + +[build-dependencies] +cc = { version = "^1.0.70", features = ["parallel"] } diff --git a/extern/supraseal/c2/README.md b/extern/supraseal/c2/README.md new file mode 100644 index 000000000..2a084e40e --- /dev/null +++ b/extern/supraseal/c2/README.md @@ -0,0 +1,11 @@ +# Commit 2 + +The final step of the sealing process is to generate a zkSNARK for the proof of replication (porep). Using the inclusion proofs from C1, the inputs are put through the porep circuit and a proof generated using Groth16. + +## Intended Usage + +The SupraSeal C2 operations are different than the rest of the library in that there are dependencies on primitives in external libraries. Specifically with bellperson through the use of a modified version of synthesize_circuits_batch() to generate the witness. From there the vectors are put through various MSM and NTT kernels on GPU and CPU. Note this requires the usage of a Rust based interface as opposed to the C/C++ seen throughout SupraSeal. + +bellperson v0.26 interfaces to this implementation through `cuda-supraseal` feature. + +To perform a 32GiB test/benchmark change directory to `demos/c2-test` and execute `cargo test --release -- --nocapture`. It's assumed that you've previously fetched the corresponding parameters. The expected execution time for the test is approximately 2-3 minutes depending on system. diff --git a/extern/supraseal/c2/build.rs b/extern/supraseal/c2/build.rs new file mode 100644 index 000000000..3f42899a0 --- /dev/null +++ b/extern/supraseal/c2/build.rs @@ -0,0 +1,52 @@ +use std::env; + +fn main() { + groth16_cuda(); +} + +fn groth16_cuda() { + let mut nvcc = cc::Build::new(); + nvcc.cuda(true); + nvcc.flag("-arch=sm_80"); + nvcc.flag("-gencode").flag("arch=compute_70,code=sm_70"); + nvcc.flag("-t0"); + nvcc.define("TAKE_RESPONSIBILITY_FOR_ERROR_MESSAGE", None); + nvcc.define("FEATURE_BLS12_381", None); + apply_blst_flags(&mut nvcc); + if let Some(include) = env::var_os("DEP_BLST_C_SRC") { + nvcc.include(&include); + } + if let Some(include) = env::var_os("DEP_SPPARK_ROOT") { + nvcc.include(include); + } + nvcc.flag("-Xcompiler").flag("-Wno-subobject-linkage"); + nvcc.flag("-Xcompiler").flag("-Wno-unused-function"); + + nvcc.file("cuda/groth16_cuda.cu").compile("groth16_cuda"); + + println!("cargo:rerun-if-changed=cuda"); + println!("cargo:rerun-if-env-changed=CXXFLAGS"); +} + +fn apply_blst_flags(nvcc: &mut cc::Build) { + let target_arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); + + match (cfg!(feature = "portable"), cfg!(feature = "force-adx")) { + (true, false) => { + nvcc.define("__BLST_PORTABLE__", None); + } + (false, true) => { + if target_arch.eq("x86_64") { + nvcc.define("__ADX__", None); + } + } + (false, false) => + { + #[cfg(target_arch = "x86_64")] + if target_arch.eq("x86_64") && std::is_x86_feature_detected!("adx") { + nvcc.define("__ADX__", None); + } + } + (true, true) => panic!("Cannot compile with both `portable` and `force-adx` features"), + } +} diff --git a/extern/supraseal/c2/cuda/groth16_cuda.cu b/extern/supraseal/c2/cuda/groth16_cuda.cu new file mode 100644 index 000000000..affab5c9e --- /dev/null +++ b/extern/supraseal/c2/cuda/groth16_cuda.cu @@ -0,0 +1,698 @@ +// Copyright Supranational LLC + +#include +#include +#include +#include +#include + +#if defined(FEATURE_BLS12_381) +# include +#else +# error "only FEATURE_BLS12_381 is supported" +#endif + +#include +#include + +typedef jacobian_t point_t; +typedef xyzz_t bucket_t; +typedef bucket_t::affine_t affine_t; + +typedef jacobian_t point_fp2_t; +typedef xyzz_t bucket_fp2_t; +typedef bucket_fp2_t::affine_t affine_fp2_t; + +typedef fr_t scalar_t; + +#define SPPARK_DONT_INSTANTIATE_TEMPLATES +#include +#include + +template +struct Assignment { + // Density of queries + const uint64_t* a_aux_density; + size_t a_aux_bit_len; + size_t a_aux_popcount; + + const uint64_t* b_inp_density; + size_t b_inp_bit_len; + size_t b_inp_popcount; + + const uint64_t* b_aux_density; + size_t b_aux_bit_len; + size_t b_aux_popcount; + + // Evaluations of A, B, C polynomials + const Scalar* a; + const Scalar* b; + const Scalar* c; + size_t abc_size; + + // Assignments of variables + const Scalar* inp_assignment_data; + size_t inp_assignment_size; + + const Scalar* aux_assignment_data; + size_t aux_assignment_size; +}; + +#include "groth16_ntt_h.cu" +#include "groth16_split_msm.cu" + +template +static void mult(point_t& ret, const affine_t point, const scalar_t& fr, + size_t top = scalar_t::nbits) +{ +#ifndef __CUDA_ARCH__ + scalar_t::pow_t scalar; + fr.to_scalar(scalar); + + mult(ret, point, scalar, top); +#endif +} + +static thread_pool_t groth16_pool; + +struct msm_results { + std::vector h; + std::vector l; + std::vector a; + std::vector b_g1; + std::vector b_g2; + + msm_results(size_t num_circuits) : h(num_circuits), + l(num_circuits), + a(num_circuits), + b_g1(num_circuits), + b_g2(num_circuits) {} +}; + +struct groth16_proof { + point_t::affine_t a; + point_fp2_t::affine_t b; + point_t::affine_t c; +}; + +#include "groth16_srs.cuh" + +#if defined(_MSC_VER) && !defined(__clang__) && !defined(__builtin_popcountll) +#define __builtin_popcountll(x) __popcnt64(x) +#endif + +extern "C" +RustError::by_value generate_groth16_proofs_c(const Assignment provers[], + size_t num_circuits, + const fr_t r_s[], const fr_t s_s[], + groth16_proof proofs[], SRS& srs) +{ + // Mutex to serialize execution of this subroutine + static std::mutex mtx; + std::lock_guard lock(mtx); + + if (!ngpus()) { + return RustError{ENODEV, "No CUDA devices available"}; + } + + const verifying_key* vk = &srs.get_vk(); + + auto points_h = srs.get_h_slice(); + auto points_l = srs.get_l_slice(); + auto points_a = srs.get_a_slice(); + auto points_b_g1 = srs.get_b_g1_slice(); + auto points_b_g2 = srs.get_b_g2_slice(); + + for (size_t c = 0; c < num_circuits; c++) { + auto& p = provers[c]; + + assert(points_l.size() == p.aux_assignment_size); + assert(points_a.size() == p.inp_assignment_size + p.a_aux_popcount); + assert(points_b_g1.size() == p.b_inp_popcount + p.b_aux_popcount); + assert(p.a_aux_bit_len == p.aux_assignment_size); + assert(p.b_aux_bit_len == p.aux_assignment_size); + assert(p.b_inp_bit_len == p.inp_assignment_size); + } + + bool l_split_msm = true, a_split_msm = true, + b_split_msm = true; + size_t l_popcount = 0, a_popcount = 0, b_popcount = 0; + + split_vectors split_vectors_l{num_circuits, points_l.size()}; + split_vectors split_vectors_a{num_circuits, points_a.size()}; + split_vectors split_vectors_b{num_circuits, points_b_g1.size()}; + + std::vector tail_msm_l_bases, + tail_msm_a_bases, + tail_msm_b_g1_bases; + std::vector tail_msm_b_g2_bases; + + msm_results results{num_circuits}; + + semaphore_t barrier; + std::atomic caught_exception{false}; + size_t n_gpus = std::min(ngpus(), num_circuits); + + std::thread prep_msm_thread([&, num_circuits] + { + // pre-processing step + // mark inp and significant scalars in aux assignments + groth16_pool.par_map(num_circuits, [&](size_t c) { + auto& prover = provers[c]; + auto& l_bit_vector = split_vectors_l.bit_vector[c]; + auto& a_bit_vector = split_vectors_a.bit_vector[c]; + auto& b_bit_vector = split_vectors_b.bit_vector[c]; + + size_t a_bits_cursor = 0, b_bits_cursor = 0; + uint64_t a_bits = 0, b_bits = 0; + uint32_t a_bit_off = 0, b_bit_off = 0; + + size_t inp_size = prover.inp_assignment_size; + + for (size_t i = 0; i < inp_size; i += CHUNK_BITS) { + uint64_t b_map = prover.b_inp_density[i / CHUNK_BITS]; + uint64_t map_mask = 1; + size_t chunk_bits = std::min(CHUNK_BITS, inp_size - i); + + for (size_t j = 0; j < chunk_bits; j++, map_mask <<= 1) { + a_bits |= map_mask; + + if (b_map & map_mask) { + b_bits |= (uint64_t)1 << b_bit_off; + if (++b_bit_off == CHUNK_BITS) { + b_bit_off = 0; + b_bit_vector[b_bits_cursor++] = b_bits; + b_bits = 0; + } + } + } + + a_bit_vector[i / CHUNK_BITS] = a_bits; + if (chunk_bits == CHUNK_BITS) + a_bits = 0; + } + + a_bits_cursor = inp_size / CHUNK_BITS; + a_bit_off = inp_size % CHUNK_BITS; + + auto* aux_assignment = prover.aux_assignment_data; + size_t aux_size = prover.aux_assignment_size; + + for (size_t i = 0; i < aux_size; i += CHUNK_BITS) { + uint64_t a_map = prover.a_aux_density[i / CHUNK_BITS]; + uint64_t b_map = prover.b_aux_density[i / CHUNK_BITS]; + uint64_t l_bits = 0; + uint64_t map_mask = 1; + size_t chunk_bits = std::min(CHUNK_BITS, aux_size - i); + + for (size_t j = 0; j < chunk_bits; j++, map_mask <<= 1) { + const fr_t& scalar = aux_assignment[i + j]; + + bool is_one = scalar.is_one(); + bool is_zero = scalar.is_zero(); + + if (!is_zero && !is_one) + l_bits |= map_mask; + + if (a_map & map_mask) { + if (!is_zero && !is_one) { + a_bits |= ((uint64_t)1 << a_bit_off); + } + + if (++a_bit_off == CHUNK_BITS) { + a_bit_off = 0; + a_bit_vector[a_bits_cursor++] = a_bits; + a_bits = 0; + } + } + + if (b_map & map_mask) { + if (!is_zero && !is_one) { + b_bits |= ((uint64_t)1 << b_bit_off); + } + + if (++b_bit_off == CHUNK_BITS) { + b_bit_off = 0; + b_bit_vector[b_bits_cursor++] = b_bits; + b_bits = 0; + } + } + } + + l_bit_vector[i / CHUNK_BITS] = l_bits; + } + + if (a_bit_off) + a_bit_vector[a_bits_cursor] = a_bits; + + if (b_bit_off) + b_bit_vector[b_bits_cursor] = b_bits; + }); + + if (caught_exception) + return; + + // merge all the masks from aux_assignments and count set bits + std::vector tail_msm_l_mask(split_vectors_l.bit_vector_size); + std::vector tail_msm_a_mask(split_vectors_a.bit_vector_size); + std::vector tail_msm_b_mask(split_vectors_b.bit_vector_size); + + for (size_t i = 0; i < tail_msm_l_mask.size(); i++) { + uint64_t mask = split_vectors_l.bit_vector[0][i]; + for (size_t c = 1; c < num_circuits; c++) + mask |= split_vectors_l.bit_vector[c][i]; + tail_msm_l_mask[i] = mask; + l_popcount += __builtin_popcountll(mask); + } + + for (size_t i = 0; i < tail_msm_a_mask.size(); i++) { + uint64_t mask = split_vectors_a.bit_vector[0][i]; + for (size_t c = 1; c < num_circuits; c++) + mask |= split_vectors_a.bit_vector[c][i]; + tail_msm_a_mask[i] = mask; + a_popcount += __builtin_popcountll(mask); + } + + for (size_t i = 0; i < tail_msm_b_mask.size(); i++) { + uint64_t mask = split_vectors_b.bit_vector[0][i]; + for (size_t c = 1; c < num_circuits; c++) + mask |= split_vectors_b.bit_vector[c][i]; + tail_msm_b_mask[i] = mask; + b_popcount += __builtin_popcountll(mask); + } + + if (caught_exception) + return; + + if (l_split_msm = (l_popcount <= points_l.size() / 2)) { + split_vectors_l.tail_msms_resize(l_popcount); + tail_msm_l_bases.resize(l_popcount); + } + + if (a_split_msm = (a_popcount <= points_a.size() / 2)) { + split_vectors_a.tail_msms_resize(a_popcount); + tail_msm_a_bases.resize(a_popcount); + } else { + split_vectors_a.tail_msms_resize(points_a.size()); + } + + if (b_split_msm = (b_popcount <= points_b_g1.size() / 2)) { + split_vectors_b.tail_msms_resize(b_popcount); + tail_msm_b_g1_bases.resize(b_popcount); + tail_msm_b_g2_bases.resize(b_popcount); + } else { + split_vectors_b.tail_msms_resize(points_b_g1.size()); + } + + // populate bitmaps for batch additions, bases and scalars for tail msms + groth16_pool.par_map(num_circuits, [&](size_t c) { + auto& prover = provers[c]; + auto& l_bit_vector = split_vectors_l.bit_vector[c]; + auto& a_bit_vector = split_vectors_a.bit_vector[c]; + auto& b_bit_vector = split_vectors_b.bit_vector[c]; + auto& tail_msm_l_scalars = split_vectors_l.tail_msm_scalars[c]; + auto& tail_msm_a_scalars = split_vectors_a.tail_msm_scalars[c]; + auto& tail_msm_b_scalars = split_vectors_b.tail_msm_scalars[c]; + + size_t a_cursor = 0, b_cursor = 0; + + uint32_t a_bit_off = 0, b_bit_off = 0; + size_t a_bits_cursor = 0, b_bits_cursor = 0; + + auto* inp_assignment = prover.inp_assignment_data; + size_t inp_size = prover.inp_assignment_size; + + for (size_t i = 0; i < inp_size; i += CHUNK_BITS) { + uint64_t b_map = prover.b_inp_density[i / CHUNK_BITS]; + size_t chunk_bits = std::min(CHUNK_BITS, inp_size - i); + + for (size_t j = 0; j < chunk_bits; j++, b_map >>= 1) { + const fr_t& scalar = inp_assignment[i + j]; + + if (b_map & 1) { + if (c == 0 && b_split_msm) { + tail_msm_b_g1_bases[b_cursor] = points_b_g1[b_cursor]; + tail_msm_b_g2_bases[b_cursor] = points_b_g2[b_cursor]; + } + tail_msm_b_scalars[b_cursor] = scalar; + b_cursor++; + + if (++b_bit_off == CHUNK_BITS) { + b_bit_off = 0; + b_bit_vector[b_bits_cursor++] = 0; + } + } + + if (c == 0 && a_split_msm) + tail_msm_a_bases[a_cursor] = points_a[a_cursor]; + tail_msm_a_scalars[a_cursor] = scalar; + a_cursor++; + } + + a_bit_vector[i / CHUNK_BITS] = 0; + } + + assert(b_cursor == prover.b_inp_popcount); + + a_bits_cursor = inp_size / CHUNK_BITS; + a_bit_off = inp_size % CHUNK_BITS; + + uint64_t a_mask = tail_msm_a_mask[a_bits_cursor], a_bits = 0; + uint64_t b_mask = tail_msm_b_mask[b_bits_cursor], b_bits = 0; + + size_t points_a_cursor = a_cursor, + points_b_cursor = b_cursor, + l_cursor = 0; + + auto* aux_assignment = prover.aux_assignment_data; + size_t aux_size = prover.aux_assignment_size; + + for (size_t i = 0; i < aux_size; i += CHUNK_BITS) { + uint64_t a_map = prover.a_aux_density[i / CHUNK_BITS]; + uint64_t b_map = prover.b_aux_density[i / CHUNK_BITS]; + uint64_t l_map = tail_msm_l_mask[i / CHUNK_BITS], l_bits = 0; + uint64_t map_mask = 1; + + size_t chunk_bits = std::min(CHUNK_BITS, aux_size - i); + for (size_t j = 0; j < chunk_bits; j++, map_mask <<= 1) { + const fr_t& scalar = aux_assignment[i + j]; + bool is_one = scalar.is_one(); + + if (l_split_msm) { + if (is_one) + l_bits |= map_mask; + + if (l_map & map_mask) { + if (c == 0) + tail_msm_l_bases[l_cursor] = points_l[i+j]; + tail_msm_l_scalars[l_cursor] = czero(scalar, is_one); + l_cursor++; + } + } + + if (a_split_msm) { + if (a_map & map_mask) { + uint64_t mask = (uint64_t)1 << a_bit_off; + + if (a_mask & mask) { + if (c == 0) + tail_msm_a_bases[a_cursor] = points_a[points_a_cursor]; + tail_msm_a_scalars[a_cursor] = czero(scalar, is_one); + a_cursor++; + } + + points_a_cursor++; + + if (is_one) + a_bits |= mask; + + if (++a_bit_off == CHUNK_BITS) { + a_bit_off = 0; + a_bit_vector[a_bits_cursor++] = a_bits; + a_bits = 0; + a_mask = tail_msm_a_mask[a_bits_cursor]; + } + } + } else { + if (a_map & map_mask) { + tail_msm_a_scalars[a_cursor] = scalar; + a_cursor++; + } + } + + if (b_split_msm) { + if (b_map & map_mask) { + uint64_t mask = (uint64_t)1 << b_bit_off; + + if (b_mask & mask) { + if (c == 0) { + tail_msm_b_g1_bases[b_cursor] = + points_b_g1[points_b_cursor]; + tail_msm_b_g2_bases[b_cursor] = + points_b_g2[points_b_cursor]; + } + tail_msm_b_scalars[b_cursor] = czero(scalar, + is_one); + b_cursor++; + } + + points_b_cursor++; + + if (is_one) + b_bits |= mask; + + if (++b_bit_off == CHUNK_BITS) { + b_bit_off = 0; + b_bit_vector[b_bits_cursor++] = b_bits; + b_bits = 0; + b_mask = tail_msm_b_mask[b_bits_cursor]; + } + } + } else { + if (b_map & map_mask) { + tail_msm_b_scalars[b_cursor] = scalar; + b_cursor++; + } + } + } + + l_bit_vector[i / CHUNK_BITS] = l_bits; + } + + if (a_bit_off) + a_bit_vector[a_bits_cursor] = a_bits; + + if (b_bit_off) + b_bit_vector[b_bits_cursor] = b_bits; + + if (l_split_msm) + assert(l_cursor == l_popcount); + + if (a_split_msm) { + assert(points_a_cursor == points_a.size()); + assert(a_cursor == a_popcount); + } else { + assert(a_cursor == points_a.size()); + } + + if (b_split_msm) { + assert(points_b_cursor == points_b_g1.size()); + assert(b_cursor == b_popcount); + } else { + assert(b_cursor == points_b_g1.size()); + } + + }); + // end of pre-processing step + + for (size_t i = 0; i < n_gpus; i++) + barrier.notify(); + + if (caught_exception) + return; + + // tail MSM b_g2 - on CPU + for (size_t c = 0; c < num_circuits; c++) { +#ifndef __CUDA_ARCH__ + mult_pippenger(results.b_g2[c], + b_split_msm ? tail_msm_b_g2_bases.data() : + points_b_g2.data(), + split_vectors_b.tail_msm_scalars[c].size(), + split_vectors_b.tail_msm_scalars[c].data(), + true, &groth16_pool); +#endif + + if (caught_exception) + return; + } + }); + + batch_add_results batch_add_res{num_circuits}; + std::vector per_gpu; + RustError ret{cudaSuccess}; + + for (size_t tid = 0; tid < n_gpus; tid++) { + per_gpu.emplace_back(std::thread([&, tid, n_gpus](size_t num_circuits) + { + const gpu_t& gpu = select_gpu(tid); + + size_t rem = num_circuits % n_gpus; + num_circuits /= n_gpus; + num_circuits += tid < rem; + size_t circuit0 = tid * num_circuits; + if (tid >= rem) + circuit0 += rem; + + try { + { + size_t d_a_sz = sizeof(fr_t) << (lg2(points_h.size() - 1) + 1); + gpu_ptr_t d_a{(scalar_t*)gpu.Dmalloc(d_a_sz)}; + + for (size_t c = circuit0; c < circuit0 + num_circuits; c++) { +#ifndef __CUDA_ARCH__ + ntt_msm_h::execute_ntt_msm_h(gpu, d_a, provers[c], + points_h, + results.h[c]); +#endif + if (caught_exception) + return; + } + } + + barrier.wait(); + + if (caught_exception) + return; + + if (l_split_msm) { + // batch addition L - on GPU + execute_batch_addition(gpu, circuit0, num_circuits, + points_l, split_vectors_l, + &batch_add_res.l[circuit0]); + + if (caught_exception) + return; + } + + if (a_split_msm) { + // batch addition a - on GPU + execute_batch_addition(gpu, circuit0, num_circuits, + points_a, split_vectors_a, + &batch_add_res.a[circuit0]); + + if (caught_exception) + return; + } + + if (b_split_msm) { + // batch addition b_g1 - on GPU + execute_batch_addition(gpu, circuit0, num_circuits, + points_b_g1, split_vectors_b, + &batch_add_res.b_g1[circuit0]); + + if (caught_exception) + return; + + // batch addition b_g2 - on GPU + execute_batch_addition(gpu, circuit0, + num_circuits, points_b_g2, + split_vectors_b, &batch_add_res.b_g2[circuit0]); + + if (caught_exception) + return; + } + + { + msm_t msm{nullptr, + (l_popcount + a_popcount + b_popcount) / 3}; + + for (size_t c = circuit0; c < circuit0+num_circuits; c++) { + // tail MSM l - on GPU + if (l_split_msm) + msm.invoke(results.l[c], tail_msm_l_bases, + split_vectors_l.tail_msm_scalars[c], true); + else + msm.invoke(results.l[c], points_l, + provers[c].aux_assignment_data, true); + + if (caught_exception) + return; + + // tail MSM a - on GPU + if (a_split_msm) + msm.invoke(results.a[c], tail_msm_a_bases, + split_vectors_a.tail_msm_scalars[c], true); + else + msm.invoke(results.a[c], points_a, + split_vectors_a.tail_msm_scalars[c], true); + + if (caught_exception) + return; + + // tail MSM b_g1 - on GPU + if (b_split_msm) + msm.invoke(results.b_g1[c], tail_msm_b_g1_bases, + split_vectors_b.tail_msm_scalars[c], true); + else + msm.invoke(results.b_g1[c], points_b_g1, + split_vectors_b.tail_msm_scalars[c], true); + + if (caught_exception) + return; + } + } + } catch (const cuda_error& e) { + bool already = caught_exception.exchange(true); + if (!already) { + for (size_t i = 1; i < n_gpus; i++) + barrier.notify(); +#ifdef TAKE_RESPONSIBILITY_FOR_ERROR_MESSAGE + ret = RustError{e.code(), e.what()}; +#else + ret = RustError{e.code()}; +#endif + } + gpu.sync(); + } + }, num_circuits)); + } + + prep_msm_thread.join(); + for (auto& tid : per_gpu) + tid.join(); + + if (caught_exception) + return ret; + + for (size_t circuit = 0; circuit < num_circuits; circuit++) { + if (l_split_msm) + results.l[circuit].add(batch_add_res.l[circuit]); + if (a_split_msm) + results.a[circuit].add(batch_add_res.a[circuit]); + if (b_split_msm) { + results.b_g1[circuit].add(batch_add_res.b_g1[circuit]); + results.b_g2[circuit].add(batch_add_res.b_g2[circuit]); + } + + fr_t r = r_s[circuit], s = s_s[circuit]; + fr_t rs = r * s; + // we want the scalars to be in Montomery form when passing them to + // "mult" routine + + point_t g_a, g_c, a_answer, b1_answer, vk_delta_g1_rs, vk_alpha_g1_s, + vk_beta_g1_r; + point_fp2_t g_b; + + mult(vk_delta_g1_rs, vk->delta_g1, rs); + mult(vk_alpha_g1_s, vk->alpha_g1, s); + mult(vk_beta_g1_r, vk->beta_g1, r); + + mult(b1_answer, results.b_g1[circuit], r); + + // A + mult(g_a, vk->delta_g1, r); + g_a.add(vk->alpha_g1); + g_a.add(results.a[circuit]); + + // B + mult(g_b, vk->delta_g2, s); + g_b.add(vk->beta_g2); + g_b.add(results.b_g2[circuit]); + + // C + mult(g_c, results.a[circuit], s); + g_c.add(b1_answer); + g_c.add(vk_delta_g1_rs); + g_c.add(vk_alpha_g1_s); + g_c.add(vk_beta_g1_r); + g_c.add(results.h[circuit]); + g_c.add(results.l[circuit]); + + // to affine + proofs[circuit].a = g_a; + proofs[circuit].b = g_b; + proofs[circuit].c = g_c; + } + + return ret; +} diff --git a/extern/supraseal/c2/cuda/groth16_ntt_h.cu b/extern/supraseal/c2/cuda/groth16_ntt_h.cu new file mode 100644 index 000000000..6d072d89d --- /dev/null +++ b/extern/supraseal/c2/cuda/groth16_ntt_h.cu @@ -0,0 +1,127 @@ +// Copyright Supranational LLC + +#include + +__launch_bounds__(1024) +__global__ void coeff_wise_mult(fr_t* a, const fr_t* b, uint32_t lg_domain_size) +{ + uint32_t idx0 = threadIdx.x + blockIdx.x * blockDim.x; + size_t limit = (size_t)1 << lg_domain_size; + + for (size_t idx = idx0; idx < limit; idx += blockDim.x * gridDim.x) + a[idx] *= b[idx]; +} + +__launch_bounds__(1024) +__global__ void sub_mult_with_constant(fr_t* a, const fr_t* c, fr_t z, + uint32_t lg_domain_size) +{ + uint32_t idx0 = threadIdx.x + blockIdx.x * blockDim.x; + size_t limit = (size_t)1 << lg_domain_size; + + for (size_t idx = idx0; idx < limit; idx += blockDim.x * gridDim.x) { + fr_t r = a[idx] - c[idx]; + a[idx] = r * z; + } +} + +#ifndef __CUDA_ARCH__ + +const size_t gib = (size_t)1 << 30; + +class ntt_msm_h : public NTT { +private: + static fr_t calculate_z_inv(size_t lg_domain_size) { + fr_t gen_pow = group_gen; + while (lg_domain_size--) + gen_pow ^= 2; + return (gen_pow - fr_t::one()).reciprocal(); + } + + static void execute_ntts_single(fr_t* d_inout, const fr_t* in, + size_t lg_domain_size, size_t actual_size, + stream_t& stream) + { + size_t domain_size = (size_t)1 << lg_domain_size; + + assert(actual_size <= domain_size); + + stream.HtoD(&d_inout[0], in, actual_size); + + if (actual_size < domain_size) { + cudaMemsetAsync(&d_inout[actual_size], 0, + (domain_size - actual_size) * sizeof(fr_t), stream); + } + + NTT_internal(&d_inout[0], lg_domain_size, + NTT::InputOutputOrder::NR, NTT::Direction::inverse, + NTT::Type::standard, stream); + NTT_internal(&d_inout[0], lg_domain_size, + NTT::InputOutputOrder::RN, NTT::Direction::forward, + NTT::Type::coset, stream); + } + + static int lg2(size_t n) + { int ret = 0; while (n >>= 1) ret++; return ret; } + +public: + + // a, b, c = coset_ntt(intt(a, b, c)) + // a *= b + // a -= c + // a[i] /= (multiplicative_gen^domain_size) - 1 + // a = coset_intt(a) + // a is the result vector + static void execute_ntt_msm_h(const gpu_t& gpu, gpu_ptr_t d_a, + const Assignment& input, + slice_t points_h, + point_t& result_h) + { + size_t actual_size = input.abc_size; + size_t npoints = points_h.size(); + size_t lg_domain_size = lg2(npoints - 1) + 1; + size_t domain_size = (size_t)1 << lg_domain_size; + + fr_t z_inv = calculate_z_inv(lg_domain_size); + + int sm_count = gpu.props().multiProcessorCount; + + bool lot_of_memory = 3 * domain_size * sizeof(fr_t) < + gpu.props().totalGlobalMem - gib; + { + dev_ptr_t d_b(domain_size * (lot_of_memory + 1)); + fr_t* d_c = &d_b[domain_size * lot_of_memory]; + + event_t sync_event; + + execute_ntts_single(&d_a[0], input.a, lg_domain_size, + actual_size, gpu[0]); + sync_event.record(gpu[0]); + + execute_ntts_single(&d_b[0], input.b, lg_domain_size, + actual_size, gpu[1]); + + sync_event.wait(gpu[1]); + coeff_wise_mult<<>> + (&d_a[0], &d_b[0], (index_t)lg_domain_size); + sync_event.record(gpu[1]); + + execute_ntts_single(&d_c[0], input.c, lg_domain_size, + actual_size, gpu[1 + lot_of_memory]); + + sync_event.wait(gpu[1 + lot_of_memory]); + sub_mult_with_constant<<>> + (&d_a[0], &d_c[0], z_inv, (index_t)lg_domain_size); + } + + NTT_internal(&d_a[0], lg_domain_size, NTT::InputOutputOrder::NN, + NTT::Direction::inverse, NTT::Type::coset, gpu[1 + lot_of_memory]); + + gpu[1 + lot_of_memory].sync(); + + msm_t msm(nullptr, npoints); + msm.invoke(result_h, points_h, d_a, true); + } +}; + +#endif diff --git a/extern/supraseal/c2/cuda/groth16_split_msm.cu b/extern/supraseal/c2/cuda/groth16_split_msm.cu new file mode 100644 index 000000000..a164f3300 --- /dev/null +++ b/extern/supraseal/c2/cuda/groth16_split_msm.cu @@ -0,0 +1,134 @@ +// Copyright Supranational LLC + +#include + +template __global__ +void batch_addition(bucket_t::mem_t ret_[], + const affine_t::mem_t points_[], uint32_t npoints, + const uint32_t bitmap[], bool accumulate, + uint32_t sid); + +template __global__ +void batch_addition(bucket_fp2_t::mem_t ret_[], + const affine_fp2_t::mem_t points_[], + uint32_t npoints, const uint32_t bitmap[], + bool accumulate, uint32_t sid); + +struct batch_add_results { + std::vector l; + std::vector a; + std::vector b_g1; + std::vector b_g2; + + batch_add_results(size_t num_circuits) : l(num_circuits), + a(num_circuits), + b_g1(num_circuits), + b_g2(num_circuits) { } +}; + +template class uninit { + T val; +public: + uninit() { } // don't zero std::vector> + uninit(T v) { val = v; } + operator T() const { return val; } +}; + +using mask_t = uninit; + +const size_t CHUNK_BITS = sizeof(mask_t) * 8; // 64 bits + +#define NUM_BATCHES 8 +#define GPU_DIV (32*WARP_SZ) + +class split_vectors { +public: + std::vector> bit_vector; + std::vector> tail_msm_scalars; + size_t batch_size, bit_vector_size; + + split_vectors(size_t num_circuits, size_t num_points) + : bit_vector{num_circuits}, + tail_msm_scalars{num_circuits} + { + batch_size = (num_points + GPU_DIV - 1) / GPU_DIV; + batch_size = (batch_size + NUM_BATCHES - 1) / NUM_BATCHES; + batch_size *= GPU_DIV; + + bit_vector_size = (num_points + CHUNK_BITS - 1) / CHUNK_BITS; + + for (size_t c = 0; c < num_circuits; c++) { + bit_vector[c].resize(bit_vector_size); + } + } + + void tail_msms_resize(size_t num_sig_scalars) { + size_t num_circuits = tail_msm_scalars.size(); + for (size_t c = 0; c < num_circuits; c++) { + tail_msm_scalars[c].resize(num_sig_scalars); + } + } +}; + +template +void execute_batch_addition(const gpu_t& gpu, + size_t circuit0, size_t num_circuits, + slice_t points, + const split_vectors& split_vector, + point_t batch_add_res[]) +{ + int sm_count = gpu.sm_count(); + + uint32_t nbuckets = sm_count * BATCH_ADD_BLOCK_SIZE / WARP_SZ; + + uint32_t bit_vector_size = (split_vector.bit_vector_size + WARP_SZ - 1) & (0u - WARP_SZ); + size_t batch_size = split_vector.batch_size; + + assert(batch_size == (uint32_t)batch_size); + + size_t d_points_size = batch_size * 2 * sizeof(affine_h); + size_t d_buckets_size = num_circuits * nbuckets * sizeof(bucket_h); + + dev_ptr_t d_temp{d_points_size + d_buckets_size + + num_circuits * bit_vector_size * sizeof(mask_t)}; + + vec2d_t d_points{&d_temp[0], (uint32_t)batch_size}; + vec2d_t d_buckets{&d_temp[d_points_size], nbuckets}; + vec2d_t d_bit_vectors{&d_temp[d_points_size + d_buckets_size], + bit_vector_size}; + + uint32_t sid = 0; + + for (size_t c = 0; c < num_circuits; c++) + gpu[sid].HtoD(d_bit_vectors[c], split_vector.bit_vector[circuit0 + c]); + + size_t npoints = points.size(); + for (uint32_t batch = 0; npoints > 0; batch++, sid ^= 1) { + uint32_t amount = std::min(npoints, batch_size); + size_t cursor = batch * batch_size; + + gpu[sid].HtoD(d_points[sid], &points[cursor], amount); + + for (size_t c = 0; c < num_circuits; c++) + gpu[sid].launch_coop(batch_addition, + {sm_count, BATCH_ADD_BLOCK_SIZE}, + d_buckets[c], (const affine_h*)d_points[sid], amount, + (const uint32_t*)&d_bit_vectors[c][cursor / CHUNK_BITS], + batch > 0, sid); + + npoints -= amount; + } + sid ^= 1; + + vec2d_t buckets{nbuckets, num_circuits}; + gpu[sid].DtoH(buckets[0], d_buckets[0], num_circuits * nbuckets); + gpu[sid].sync(); + + gpu.par_map(num_circuits, 1, [&, batch_add_res, nbuckets](size_t c) { + batch_add_res[c] = sum_up(buckets[c], nbuckets); + }); +} diff --git a/extern/supraseal/c2/cuda/groth16_srs.cuh b/extern/supraseal/c2/cuda/groth16_srs.cuh new file mode 100644 index 000000000..2040684ad --- /dev/null +++ b/extern/supraseal/c2/cuda/groth16_srs.cuh @@ -0,0 +1,471 @@ +// Copyright Supranational LLC + +#include +#include +#include +#include + +#include + +struct verifying_key { + affine_t alpha_g1; + affine_t beta_g1; + affine_fp2_t beta_g2; + affine_fp2_t gamma_g2; + affine_t delta_g1; + affine_fp2_t delta_g2; +}; + +#ifdef __CUDA_ARCH__ +typedef uint8_t byte; +#endif + +extern "C" { + int blst_p1_deserialize(affine_t*, const byte[96]); + int blst_p2_deserialize(affine_fp2_t*, const byte[192]); +} + +class SRS { +private: + // This class assumes that the SRS files used by filecoin have a specific file + // layout and assumes some properties of data types that are present in the file + // + // There are 3 data types in the file: + // 4-byte big-endian unsigned integer, + // 92-byte BLS12-381 P1 affine point, + // 192-byte BLS12-381 P2 affine point + // + // The layout of the file is as such, in order, without any padding: + // + // alpha_g1: g1 affine + // beta_g1 : g1 affine + // beta_g2 : g2 affine + // gamma_g2: g2 affine + // delta_g1: g1 affine + // delta_g2: g2 affine + // number of ic points: 4-byte big-endian unsigned integer + // ic points: g1 affines + // number of h points: 4-byte big-endian unsigned integer + // h points: g1 affines + // number of l points: 4-byte big-endian unsigned integer + // l points: g1 affines + // number of a points: 4-byte big-endian unsigned integer + // a points: g1 affines + // number of b_g1 points: 4-byte big-endian unsigned integer + // b_g1 points: g1 affines + // number of b_g2 points: 4-byte big-endian unsigned integer + // b_g2 points: g2 affines + class SRS_internal { + friend class SRS; + + private: + static const int max_num_circuits = 10; + + static size_t get_num_threads() { + int total_threads = groth16_pool.size(); + + // Assume that the CPU supports hyperthreading to be on the safe + // side and ensure that there are at least max_num_circuits number + // of physical cores left available if the SRS is going to be read + // concurrently with synthesis + // If there are not enough physical cores, just use all of them + // and read it. + return (total_threads / 2 - max_num_circuits) < max_num_circuits ? + (size_t)total_threads / 2 : + (size_t)total_threads / 2 - max_num_circuits; + } + + // size of p1 affine and p2 affine points in the SRS file in bytes + static const size_t p1_affine_size = 96; + static const size_t p2_affine_size = 192; + + // 3 p1 affine and 3 p2 affine points are in the verification key. 864 bytes + static const size_t vk_offset = p1_affine_size * 3 + p2_affine_size * 3; + + template + static T from_big_endian(const unsigned char* ptr) { + T res = ptr[0]; + for (size_t i = 1; i < sizeof(T); i++) { + res <<= 8; + res |= ptr[i]; + } + + return res; + } + + static size_t get_batch_size(size_t num_points, size_t num_threads) { + size_t batch_size = (num_points + num_threads - 1) / num_threads; + batch_size = (batch_size + 64 - 1) / 64; + return batch_size; + } + + static inline size_t read_g1_point(affine_t* point, const byte* srs_ptr) + { + blst_p1_deserialize(point, srs_ptr); + return p1_affine_size; + } + + static inline size_t read_g2_point(affine_fp2_t* point, const byte* srs_ptr) + { + blst_p2_deserialize(point, srs_ptr); + return p2_affine_size; + } + + static void read_g1_points(slice_t points, const byte* srs_ptr) + { + size_t num_points = points.size(); + size_t batch_size = get_batch_size(num_points, get_num_threads()); + + const byte (*srs)[p1_affine_size] = + reinterpret_cast(srs_ptr); + + groth16_pool.par_map(num_points, batch_size, [&](size_t i) { + (void)read_g1_point(const_cast(&points[i]), srs[i]); + }, get_num_threads()); + } + + static void read_g2_points(slice_t points, const byte* srs_ptr) + { + size_t num_points = points.size(); + size_t batch_size = get_batch_size(num_points, get_num_threads()); + + const byte (*srs)[p2_affine_size] = + reinterpret_cast(srs_ptr); + + groth16_pool.par_map(num_points, batch_size, [&](size_t i) { + (void)read_g2_point(const_cast(&points[i]), srs[i]); + }, get_num_threads()); + } + + std::thread read_th; + mutable std::mutex mtx; + + std::string path; + verifying_key vk; + +#if 0 +#define H_IS_STD__VECTOR + std::vector h; +#else + slice_t h; +#endif + slice_t l, a, b_g1; + slice_t b_g2; + void* pinned; + + SRS_internal(SRS_internal const&) = delete; + void operator=(SRS_internal const&) = delete; + + inline static size_t round_up(size_t sz) + { return (sz + 4095) & ((size_t)0 - 4096); } + + public: + SRS_internal(const char* srs_path) : path(srs_path), pinned(nullptr) { + struct { + struct { + uint32_t size; + size_t off; // in bytes + } h, l, a, b_g1, b_g2; + } data; + + if (!ngpus()) { + throw sppark_error{ENODEV, std::string("No CUDA devices available")}; + } + + int srs_file = open(srs_path, O_RDONLY); + + if (srs_file < 0) { + throw sppark_error{errno, "open(\"%s\") failed: ", srs_path}; + } + + struct stat st; + fstat(srs_file, &st); + size_t file_size = st.st_size; + + const byte* srs_ptr = (const byte*)mmap(NULL, file_size, PROT_READ, + MAP_PRIVATE, srs_file, 0); + + { + int err = errno; + close(srs_file); + if (srs_ptr == MAP_FAILED) { + throw sppark_error{err, "mmap(srs_file) failed: "}; + } + } + + size_t cursor = 0; + cursor += read_g1_point(&vk.alpha_g1, srs_ptr + cursor); + cursor += read_g1_point(&vk.beta_g1, srs_ptr + cursor); + cursor += read_g2_point(&vk.beta_g2, srs_ptr + cursor); + cursor += read_g2_point(&vk.gamma_g2, srs_ptr + cursor); + cursor += read_g1_point(&vk.delta_g1, srs_ptr + cursor); + cursor += read_g2_point(&vk.delta_g2, srs_ptr + cursor); + + if (file_size <= cursor + sizeof(uint32_t)) { + munmap(const_cast(srs_ptr), file_size); + throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")}; + } + uint32_t vk_ic_size = from_big_endian(srs_ptr + cursor); + cursor += sizeof(uint32_t); + + cursor += vk_ic_size * p1_affine_size; + if (file_size <= cursor + sizeof(uint32_t)) { + munmap(const_cast(srs_ptr), file_size); + throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")}; + } + data.h.size = from_big_endian(srs_ptr + cursor); + data.h.off = cursor += sizeof(uint32_t); + + cursor += data.h.size * p1_affine_size; + if (file_size <= cursor + sizeof(uint32_t)) { + munmap(const_cast(srs_ptr), file_size); + throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")}; + } + data.l.size = from_big_endian(srs_ptr + cursor); + data.l.off = cursor += sizeof(uint32_t); + + cursor += data.l.size * p1_affine_size; + if (file_size <= cursor + sizeof(uint32_t)) { + munmap(const_cast(srs_ptr), file_size); + throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")}; + } + data.a.size = from_big_endian(srs_ptr + cursor); + data.a.off = cursor += sizeof(uint32_t); + + cursor += data.a.size * p1_affine_size; + if (file_size <= cursor + sizeof(uint32_t)) { + munmap(const_cast(srs_ptr), file_size); + throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")}; + } + data.b_g1.size = from_big_endian(srs_ptr + cursor); + data.b_g1.off = cursor += sizeof(uint32_t); + + cursor += data.b_g1.size * p1_affine_size; + if (file_size <= cursor + sizeof(uint32_t)) { + munmap(const_cast(srs_ptr), file_size); + throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")}; + } + data.b_g2.size = from_big_endian(srs_ptr + cursor); + data.b_g2.off = cursor += sizeof(uint32_t); + + cursor += data.b_g2.size * p1_affine_size; + if (file_size < cursor) { + munmap(const_cast(srs_ptr), file_size); + throw sppark_error{EINVAL, std::string("SRS file size/layout mismatch")}; + } + + size_t l_size = round_up(data.l.size * sizeof(affine_t)), + a_size = round_up(data.a.size * sizeof(affine_t)), + b1_size = round_up(data.b_g1.size * sizeof(affine_t)), + b2_size = round_up(data.b_g2.size * sizeof(affine_fp2_t)), + total = l_size + a_size + b1_size + b2_size; +#ifndef H_IS_STD__VECTOR + total += round_up(data.h.size * sizeof(affine_t)); +#endif + + cudaError_t cuda_err = cudaHostAlloc(&pinned, total, cudaHostAllocPortable); + if (cuda_err != cudaSuccess) { + munmap(const_cast(srs_ptr), file_size); + CUDA_OK(cuda_err); + } + byte *ptr = reinterpret_cast(pinned); + + l = slice_t{ptr, data.l.size}; ptr += l_size; + a = slice_t{ptr, data.a.size}; ptr += a_size; + b_g1 = slice_t{ptr, data.b_g1.size}; ptr += b1_size; + b_g2 = slice_t{ptr, data.b_g2.size}; ptr += b2_size; + +#ifdef H_IS_STD__VECTOR + h.resize(data.h.size); +#else + h = slice_t{ptr, data.h.size}; +#endif + + semaphore_t barrier; + read_th = std::thread([&, srs_ptr, file_size, data] { + std::lock_guard guard(mtx); + barrier.notify(); + + read_g1_points(h, srs_ptr + data.h.off); + read_g1_points(l, srs_ptr + data.l.off); + read_g1_points(a, srs_ptr + data.a.off); + read_g1_points(b_g1, srs_ptr + data.b_g1.off); + read_g2_points(b_g2, srs_ptr + data.b_g2.off); + + munmap(const_cast(srs_ptr), file_size); + }); + barrier.wait(); + } + ~SRS_internal() { + if (read_th.joinable()) + read_th.join(); + if (pinned) + cudaFreeHost(pinned); + } + }; + + struct inner { + const SRS_internal srs; + std::atomic ref_cnt; + inline inner(const char* srs_path) : srs(srs_path), ref_cnt(1) {} + }; + inner* ptr; + +public: + SRS(const char* srs_path) { ptr = new inner(srs_path); } + SRS(const SRS& r) { *this = r; } + ~SRS() { + if (ptr && ptr->ref_cnt.fetch_sub(1, std::memory_order_seq_cst) == 1) { + delete ptr; + } + } + + SRS& operator=(const SRS& r) { + if (this != &r) + (ptr = r.ptr)->ref_cnt.fetch_add(1, std::memory_order_relaxed); + return *this; + } + + SRS& operator=(SRS&& r) noexcept { + if (this != &r) { + ptr = r.ptr; + r.ptr = nullptr; + } + return *this; + } + + const verifying_key& get_vk() const { + std::lock_guard guard(ptr->srs.mtx); + return ptr->srs.vk; + } + + const affine_t* get_h() const { + std::lock_guard guard(ptr->srs.mtx); + return ptr->srs.h.data(); + } + + const affine_t* get_l() const { + std::lock_guard guard(ptr->srs.mtx); + return ptr->srs.l.data(); + } + + const affine_t* get_a() const { + std::lock_guard guard(ptr->srs.mtx); + return ptr->srs.a.data(); + } + + const affine_t* get_b_g1() const { + std::lock_guard guard(ptr->srs.mtx); + return ptr->srs.b_g1.data(); + } + + const affine_fp2_t* get_b_g2() const { + std::lock_guard guard(ptr->srs.mtx); + return ptr->srs.b_g2.data(); + } + + const slice_t get_h_slice() const { + std::lock_guard guard(ptr->srs.mtx); + return {ptr->srs.h.data(), ptr->srs.h.size()}; + } + + const slice_t& get_l_slice() const { + std::lock_guard guard(ptr->srs.mtx); + return ptr->srs.l; + } + + const slice_t& get_a_slice() const { + std::lock_guard guard(ptr->srs.mtx); + return ptr->srs.a; + } + + const slice_t& get_b_g1_slice() const { + std::lock_guard guard(ptr->srs.mtx); + return ptr->srs.b_g1; + } + + const slice_t& get_b_g2_slice() const { + std::lock_guard guard(ptr->srs.mtx); + return ptr->srs.b_g2; + } + + const std::string& get_path() const { + return ptr->srs.path; + } + + // facilitate return by value through FFI, as SRS::by_value. + struct by_value { inner *ptr; }; + operator by_value() const { + ptr->ref_cnt.fetch_add(1, std::memory_order_relaxed); + return {ptr}; + } + SRS(by_value v) { ptr = v.ptr; } + + class SRS_cache { + std::list> list; + std::mutex mtx; + + public: + SRS lookup(const char *key) + { + std::lock_guard lock(mtx); + + for (auto it = list.begin(); it != list.end(); ++it) { + if (it->first == key) { + if (it != list.begin()) { + // move to the beginning of the list + list.splice(list.begin(), list, it); + } + return it->second; + } + } + + if (list.size() > 3) + list.pop_back(); // least recently used + + list.emplace_front(std::make_pair(key, SRS{key})); + + return list.begin()->second; + } + + void evict(const char *key) + { + std::lock_guard lock(mtx); + + list.remove_if([=](decltype(list)::value_type& elem) { + return elem.first == key; + }); + } + }; + + static SRS_cache& cache() + { + static SRS_cache da_cache; + return da_cache; + } + + void evict() const { SRS::cache().evict(ptr->srs.path.c_str()); } +}; + +extern "C" RustError::by_value create_SRS(SRS& ret, const char* srs_path, bool cache) +{ + try { + ret = cache ? SRS::cache().lookup(srs_path) : SRS{srs_path}; + return RustError{cudaSuccess}; + } catch (const sppark_error& e) { +#ifdef TAKE_RESPONSIBILITY_FOR_ERROR_MESSAGE + return RustError{e.code(), e.what()}; +#else + return RustError{e.code()}; +#endif + } +} + +extern "C" void evict_SRS(const SRS& ref) +{ ref.evict(); } + +extern "C" void drop_SRS(SRS& ref) +{ ref.~SRS(); } + +extern "C" SRS::by_value clone_SRS(const SRS& rhs) +{ return rhs; } diff --git a/extern/supraseal/c2/src/lib.rs b/extern/supraseal/c2/src/lib.rs new file mode 100644 index 000000000..e48733add --- /dev/null +++ b/extern/supraseal/c2/src/lib.rs @@ -0,0 +1,222 @@ +// Copyright Supranational LLC + +sppark::cuda_error!(); + +use std::path::PathBuf; + +#[repr(C)] +pub struct SRS { + ptr: *const core::ffi::c_void, +} + +impl Default for SRS { + fn default() -> Self { + Self { + ptr: core::ptr::null(), + } + } +} + +impl SRS { + pub fn try_new(srs_path: PathBuf, cache: bool) -> Result { + extern "C" { + fn create_SRS( + ret: &mut SRS, + srs_path: *const std::os::raw::c_char, + cache: bool, + ) -> cuda::Error; + } + let c_srs_path = std::ffi::CString::new(srs_path.to_str().unwrap()).unwrap(); + + let mut ret = SRS::default(); + let err = unsafe { create_SRS(&mut ret, c_srs_path.as_ptr(), cache) }; + if err.code != 0 { + Err(err) + } else { + Ok(ret) + } + } + + pub fn evict(&self) { + extern "C" { + fn evict_SRS(by_ref: &SRS); + } + unsafe { evict_SRS(self) }; + } +} + +impl Drop for SRS { + fn drop(&mut self) { + extern "C" { + fn drop_SRS(by_ref: &SRS); + } + unsafe { drop_SRS(self) }; + self.ptr = core::ptr::null(); + } +} + +impl Clone for SRS { + fn clone(&self) -> Self { + extern "C" { + fn clone_SRS(by_ref: &SRS) -> SRS; + } + unsafe { clone_SRS(self) } + } +} + +unsafe impl Sync for SRS {} +unsafe impl Send for SRS {} + +pub fn generate_groth16_proof( + ntt_a_scalars: &[*const S], + ntt_b_scalars: &[*const S], + ntt_c_scalars: &[*const S], + ntt_scalars_actual_size: usize, + input_assignments: &[*const S], + aux_assignments: &[*const S], + input_assignments_size: usize, + aux_assignments_size: usize, + a_aux_density_bv: &[D], + b_g1_input_density_bv: &[D], + b_g1_aux_density_bv: &[D], + a_aux_total_density: usize, + b_g1_input_total_density: usize, + b_g1_aux_total_density: usize, + num_circuits: usize, + r_s: &[S], + s_s: &[S], + proofs: &mut [PR], + srs: &SRS, +) { + assert_eq!(ntt_a_scalars.len(), num_circuits); + assert_eq!(ntt_b_scalars.len(), num_circuits); + assert_eq!(ntt_c_scalars.len(), num_circuits); + assert_eq!(input_assignments.len(), num_circuits); + assert_eq!(aux_assignments.len(), num_circuits); + assert_eq!(r_s.len(), num_circuits); + assert_eq!(s_s.len(), num_circuits); + assert_eq!(proofs.len(), num_circuits); + + let bv_element_size: usize = std::mem::size_of::() * 8; // length of D in bits + assert!( + bv_element_size == 64, + "only 64-bit elements in bit vectors are supported" + ); + + assert!(a_aux_density_bv.len() * bv_element_size >= aux_assignments_size); + assert!(b_g1_aux_density_bv.len() * bv_element_size >= aux_assignments_size); + + let provers: Vec<_> = (0..num_circuits) + .map(|c| Assignment:: { + // Density of queries + a_aux_density: a_aux_density_bv.as_ptr() as *const _, + a_aux_bit_len: aux_assignments_size, + a_aux_popcount: a_aux_total_density, + + b_inp_density: b_g1_input_density_bv.as_ptr() as *const _, + b_inp_bit_len: input_assignments_size, + b_inp_popcount: b_g1_input_total_density, + + b_aux_density: b_g1_aux_density_bv.as_ptr() as *const _, + b_aux_bit_len: aux_assignments_size, + b_aux_popcount: b_g1_aux_total_density, + + // Evaluations of A, B, C polynomials + a: ntt_a_scalars[c], + b: ntt_b_scalars[c], + c: ntt_c_scalars[c], + abc_size: ntt_scalars_actual_size, + + // Assignments of variables + inp_assignment_data: input_assignments[c], + inp_assignment_size: input_assignments_size, + + aux_assignment_data: aux_assignments[c], + aux_assignment_size: aux_assignments_size, + }) + .collect(); + + let err = unsafe { + generate_groth16_proofs_c( + provers.as_ptr() as *const _, + num_circuits, + r_s.as_ptr() as *const _, + s_s.as_ptr() as *const _, + proofs.as_mut_ptr() as *mut _, + srs, + ) + }; + + if err.code != 0 { + panic!("{}", String::from(err)); + } +} + +#[repr(C)] +pub struct Assignment { + // Density of queries + pub a_aux_density: *const usize, + pub a_aux_bit_len: usize, + pub a_aux_popcount: usize, + + pub b_inp_density: *const usize, + pub b_inp_bit_len: usize, + pub b_inp_popcount: usize, + + pub b_aux_density: *const usize, + pub b_aux_bit_len: usize, + pub b_aux_popcount: usize, + + // Evaluations of A, B, C polynomials + pub a: *const Scalar, + pub b: *const Scalar, + pub c: *const Scalar, + pub abc_size: usize, + + // Assignments of variables + pub inp_assignment_data: *const Scalar, + pub inp_assignment_size: usize, + + pub aux_assignment_data: *const Scalar, + pub aux_assignment_size: usize, +} + +extern "C" { + fn generate_groth16_proofs_c( + provers: *const core::ffi::c_void, + num_circuits: usize, + r_s: *const core::ffi::c_void, + s_s: *const core::ffi::c_void, + proofs: *mut core::ffi::c_void, + srs: &SRS, + ) -> cuda::Error; +} + +pub fn generate_groth16_proofs( + provers: &[Assignment], + r_s: &[S], + s_s: &[S], + proofs: &mut [PR], + srs: &SRS, +) { + let num_circuits = provers.len(); + + assert_eq!(r_s.len(), num_circuits); + assert_eq!(s_s.len(), num_circuits); + assert_eq!(proofs.len(), num_circuits); + + let err = unsafe { + generate_groth16_proofs_c( + provers.as_ptr() as *const _, + num_circuits, + r_s.as_ptr() as *const _, + s_s.as_ptr() as *const _, + proofs.as_mut_ptr() as *mut _, + srs, + ) + }; + + if err.code != 0 { + panic!("{}", String::from(err)); + } +} diff --git a/extern/supraseal/c2/tests/c2.rs b/extern/supraseal/c2/tests/c2.rs new file mode 100644 index 000000000..6b8a8e55d --- /dev/null +++ b/extern/supraseal/c2/tests/c2.rs @@ -0,0 +1,4 @@ +#[test] +fn run_seal() { + assert!(false, "c2 test is moved to /demos/c2-test"); +} diff --git a/extern/supraseal/demos/.cargo/config.toml b/extern/supraseal/demos/.cargo/config.toml new file mode 100644 index 000000000..978eb1f8b --- /dev/null +++ b/extern/supraseal/demos/.cargo/config.toml @@ -0,0 +1,2 @@ +[patch.crates-io] +supraseal-c2 = { path = "../c2" } diff --git a/extern/supraseal/demos/c2-test/Cargo.toml b/extern/supraseal/demos/c2-test/Cargo.toml new file mode 100644 index 000000000..58c4de8f7 --- /dev/null +++ b/extern/supraseal/demos/c2-test/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "supraseal-c2-test" +version = "0.1.0" +edition = "2021" +publish = false + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] + +[dev-dependencies] +supraseal-c2 = "0" +anyhow = "1.0.26" +bincode = "1.1.2" +filecoin-proofs = { version = "16.0.0", default-features = false, features = ["cuda-supraseal"] } +storage-proofs-core = { version = "16.0.0", default-features = false, features = ["cuda-supraseal"] } diff --git a/extern/supraseal/demos/c2-test/resources/test/commit-phase1-output b/extern/supraseal/demos/c2-test/resources/test/commit-phase1-output new file mode 100644 index 000000000..11a4c09fe Binary files /dev/null and b/extern/supraseal/demos/c2-test/resources/test/commit-phase1-output differ diff --git a/extern/supraseal/demos/c2-test/src/main.rs b/extern/supraseal/demos/c2-test/src/main.rs new file mode 100644 index 000000000..e7a11a969 --- /dev/null +++ b/extern/supraseal/demos/c2-test/src/main.rs @@ -0,0 +1,3 @@ +fn main() { + println!("Hello, world!"); +} diff --git a/extern/supraseal/demos/c2-test/tests/c2.rs b/extern/supraseal/demos/c2-test/tests/c2.rs new file mode 100644 index 000000000..70104663e --- /dev/null +++ b/extern/supraseal/demos/c2-test/tests/c2.rs @@ -0,0 +1,83 @@ +// Copyright Supranational LLC + +const COMMIT_PHASE1_OUTPUT_FILE: &str = "resources/test/commit-phase1-output"; + +use anyhow::Context; +use bincode::deserialize; +use std::fs::read; +use std::path::PathBuf; +use std::time::Instant; + +use filecoin_proofs::{ + constants::SECTOR_SIZE_32_GIB, seal_commit_phase2, verify_seal, + PoRepConfig, SealCommitPhase1Output, SectorShape32GiB, +}; +use storage_proofs_core::{api_version::ApiVersion, sector::SectorId}; + +#[test] +fn run_seal() { + let commit_phase1_output = { + let mut commit_phase1_output_path = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + commit_phase1_output_path.push(COMMIT_PHASE1_OUTPUT_FILE); + println!("*** Restoring commit phase1 output file"); + let commit_phase1_output_bytes = read(&commit_phase1_output_path) + .with_context(|| { + format!( + "couldn't read file commit_phase1_output_path={:?}", + commit_phase1_output_path + ) + }) + .unwrap(); + println!( + "commit_phase1_output_bytes len {}", + commit_phase1_output_bytes.len() + ); + + let res: SealCommitPhase1Output = + deserialize(&commit_phase1_output_bytes).unwrap(); + res + }; + + let sector_id = SectorId::from(0); + let prover_id: [u8; 32] = [9u8; 32]; + let arbitrary_porep_id = [99; 32]; + + let porep_config = + PoRepConfig::new_groth16(SECTOR_SIZE_32_GIB, arbitrary_porep_id, ApiVersion::V1_1_0); + + let SealCommitPhase1Output { + vanilla_proofs: _, + comm_d, + comm_r, + replica_id: _, + seed, + ticket, + } = commit_phase1_output; + + println!("Starting seal_commit_phase2"); + let now = Instant::now(); + let commit_output = + seal_commit_phase2(&porep_config, commit_phase1_output, prover_id, sector_id).unwrap(); + println!("seal_commit_phase2 took: {:.2?}", now.elapsed()); + + println!("Verifying result"); + let result = verify_seal::( + &porep_config, + comm_r, + comm_d, + prover_id, + sector_id, + ticket, + seed, + &commit_output.proof, + ) + .unwrap(); + + if result == true { + println!("Verification PASSED!"); + } else { + println!("Verification FAILED!"); + } + + assert!(result, "Verification FAILED"); +} diff --git a/extern/supraseal/demos/main.cpp b/extern/supraseal/demos/main.cpp new file mode 100644 index 000000000..0c0e5aab7 --- /dev/null +++ b/extern/supraseal/demos/main.cpp @@ -0,0 +1,257 @@ +// Copyright Supranational LLC + +#include +#include +#include // file read +#include // printing +#include +#include // htonl +#include + +#include "../sealing/supra_seal.hpp" +#include "../util/sector_util.hpp" + +uint8_t replica_id_buf_2K[] = { 24, 108, 245, 122, 161, 8, 61, 88, 51, 81, 141, 176, 97, 225, 25, 135, 218, 165, 249, 113, 195, 10, 255, 24, 6, 140, 145, 244, 253, 107, 8, 39 }; +uint8_t replica_id_buf_4K[] = { 2, 239, 249, 237, 200, 74, 74, 118, 230, 239, 207, 194, 109, 161, 27, 24, 208, 63, 44, 254, 14, 250, 200, 138, 74, 35, 123, 115, 123, 86, 98, 2 }; +uint8_t replica_id_buf_16K[] = { 240, 26, 25, 20, 201, 110, 242, 173, 62, 74, 255, 96, 37, 143, 120, 69, 91, 52, 81, 243, 134, 37, 112, 41, 27, 213, 208, 145, 107, 149, 76, 52 }; +uint8_t replica_id_buf_32K[] = { 50, 213, 77, 230, 65, 212, 193, 39, 25, 125, 41, 233, 147, 28, 126, 201, 217, 162, 65, 39, 132, 252, 61, 245, 39, 34, 32, 38, 158, 149, 24, 24 }; +uint8_t replica_id_buf_8M[] = { 23, 124, 26, 248, 237, 136, 178, 226, 193, 239, 173, 27, 131, 214, 147, 242, 18, 110, 7, 252, 4, 245, 118, 152, 94, 125, 73, 140, 25, 102, 152, 57 }; +uint8_t replica_id_buf_16M[] = { 0, 104, 11, 183, 198, 151, 180, 179, 187, 46, 233, 221, 244, 44, 204, 221, 108, 14, 17, 49, 254, 229, 229, 252, 200, 102, 16, 240, 84, 175, 220, 52 }; +uint8_t replica_id_buf_512M[] = { 37, 249, 121, 174, 70, 206, 91, 232, 165, 246, 66, 184, 198, 10, 232, 126, 215, 171, 221, 76, 26, 2, 117, 118, 201, 142, 116, 143, 25, 131, 167, 37 }; +uint8_t replica_id_buf_1G[] = { 36, 67, 76, 192, 211, 223, 90, 159, 60, 141, 212, 178, 36, 120, 21, 93, 28, 92, 79, 231, 31, 100, 115, 240, 114, 152, 20, 78, 80, 158, 122, 34 }; +uint8_t replica_id_buf_32G[] = { 121, 145, 135, 251, 187, 117, 51, 109, 88, 99, 80, 105, 79, 235, 85, 240, 147, 153, 120, 231, 144, 247, 244, 201, 42, 10, 149, 142, 203, 151, 188, 43 }; +uint8_t replica_id_buf_64G[] = { 96, 159, 133, 62, 63, 177, 24, 234, 146, 31, 140, 109, 39, 48, 219, 3, 168, 169, 249, 98, 25, 210, 33, 210, 4, 217, 45, 216, 99, 90, 114, 4 }; + +// This ultimately comes from the sealing flows +const char* get_parent_filename(size_t sector_size_lg) { + switch (sector_size_lg) { + case SectorSizeLg::Sector2KB: + // 2KB + return "/var/tmp/filecoin-parents/v28-sdr-parent-652bae61e906c0732e9eb95b1217cfa6afcce221ff92a8aedf62fa778fa765bc.cache"; + case SectorSizeLg::Sector4KB: + // 4KB + return "/var/tmp/filecoin-parents/v28-sdr-parent-56d4865ec3476221fd1412409b5d9439182d71bf5e2078d0ecde76c0f7e33986.cache"; + case SectorSizeLg::Sector16KB: + // 16KB + return "/var/tmp/filecoin-parents/v28-sdr-parent-41059e359f8a8b479f9e29bdf20344fcd43d9c03ce4a7d01daf2c9a77909fd4f.cache"; + case SectorSizeLg::Sector32KB: + // 32KB + return "/var/tmp/filecoin-parents/v28-sdr-parent-81a0489b0dd6c7755cdce0917dd436288b6e82e17d596e5a23836e7a602ab9be.cache"; + case SectorSizeLg::Sector8MB: + // 8MB + return "/var/tmp/filecoin-parents/v28-sdr-parent-1139cb33af3e3c24eb644da64ee8bc43a8df0f29fc96b5337bee369345884cdc.cache"; + case SectorSizeLg::Sector16MB: + // 16MB + return "/var/tmp/filecoin-parents/v28-sdr-parent-7fa3ff8ffb57106211c4be413eb15ea072ebb363fa5a1316fe341ac8d7a03d51.cache"; + case SectorSizeLg::Sector512MB: + // 512MB + return "/var/tmp/filecoin-parents/v28-sdr-parent-7ba215a1d2345774ab90b8cb1158d296e409d6068819d7b8c7baf0b25d63dc34.cache"; + case SectorSizeLg::Sector1GB: + // 1GB + return "/var/tmp/filecoin-parents/v28-sdr-parent-637f021bceb5248f0d1dcf4dbf132fedc025d0b3b55d3e7ac171c02676a96ccb.cache"; + case SectorSizeLg::Sector32GB: + // 32GB + return "/var/tmp/filecoin-parents/v28-sdr-parent-21981246c370f9d76c7a77ab273d94bde0ceb4e938292334960bce05585dc117.cache"; + case SectorSizeLg::Sector64GB: + // 64GB + return "/var/tmp/filecoin-parents/v28-sdr-parent-767ee5400732ee77b8762b9d0dd118e88845d28bfa7aee875dc751269f7d0b87.cache"; + default: + printf("ERROR: unknown sector size lg %ld\n", sector_size_lg); + return nullptr; + } +} + +template +void demo_pipeline(size_t num_sectors, uint8_t* replica_ids) { + size_t slot0 = 0; + size_t slot1 = get_slot_size(num_sectors, P::GetSectorSize()) * 1; + const char* parent_filename = get_parent_filename(P::GetSectorSizeLg()); + const char* output_dir0 = "/var/tmp/supra_seal/0"; + const char* output_dir1 = "/var/tmp/supra_seal/1"; + + printf("slot0 %08lx\n", slot0); + printf("slot1 %08lx\n", slot1); + + // Fill slot0 pc1 + printf("Starting slot0 pc1\n"); + pc1(slot0, num_sectors, replica_ids, parent_filename, P::GetSectorSize()); + + // Slot0 PC2 + slot1 pc1 + std::thread j0([&]() { + printf("Starting slot1 pc1\n"); + pc1(slot1, num_sectors, replica_ids, parent_filename, P::GetSectorSize()); + }); + std::thread j1([&]() { + printf("Starting slot0 pc2\n"); + pc2(slot0, num_sectors, output_dir0, nullptr, P::GetSectorSize()); + }); + j0.join(); + j1.join(); + + // slot1 pc2 + printf("Starting slot1 pc2\n"); + pc2(slot1, num_sectors, output_dir1, nullptr, P::GetSectorSize()); +} + +int main(int argc, char** argv) { + uint64_t node_to_read = 0; + uint64_t slot = 0; + size_t num_sectors = 64; + std::string sector_size_string = ""; + const char* output_dir = "/var/tmp/supra_seal/0"; + + enum { SEAL_MODE, READ_MODE, PARENTS_MODE, PIPELINE_MODE } mode = PIPELINE_MODE; + bool perform_pc1 = false; + bool perform_pc2 = false; + bool perform_c1 = false; + + int opt; + while ((opt = getopt(argc, argv, "123r:s:n:po:b:h")) != -1) { + switch (opt) { + case '1': + mode = SEAL_MODE; + perform_pc1 = true; + break; + case '2': + mode = SEAL_MODE; + perform_pc2 = true; + break; + case '3': + mode = SEAL_MODE; + perform_c1 = true; + break; + case 'r': + mode = READ_MODE; + node_to_read = strtol(optarg, NULL, 16); + break; + case 's': + slot = strtol(optarg, NULL, 16); + break; + case 'o': + output_dir = optarg; + break; + case 'n': + num_sectors = strtol(optarg, NULL, 10); + break; + case 'p': + mode = PIPELINE_MODE; + break; + case 'b': + sector_size_string = optarg; + break; + case 'h': + printf("Usage: sudo ./seal [options]\n"); + printf(" -1 - perform pc1\n"); + printf(" -2 - perform pc2\n"); + printf(" -3 - perform c1\n"); + printf(" -p - perform pc1, pc2, and c1 pipeline (default)\n"); + printf(" -n - number of parallel sectors (default 64)\n"); + printf(" -b - sector size e.g 32GiB\n"); + exit(0); + break; + } + } + + if (sector_size_string == "") { + printf("Please specify a sector size\n"); + exit(0); + } + + size_t sector_size = get_sector_size_from_string(sector_size_string); + size_t sector_size_lg; + + SECTOR_PARAMS_TABLE(sector_size_lg = params.GetSectorSizeLg()); + + supra_seal_init(sector_size, "demos/rust/supra_seal.cfg"); + + // // 512mb random data + // uint8_t replica_id_buf_512M[] = { + // 89, 186, 126, 238, 239, 37, 73, 20, + // 148, 180, 147, 227, 154, 153, 224, 173, + // 101, 206, 212, 202, 229, 49, 100, 20, + // 19, 156, 251, 17, 68, 212, 238, 32 + // }; + + uint8_t* replica_id_buf; + switch (sector_size_lg) { + case (size_t)SectorSizeLg::Sector2KB: + replica_id_buf = replica_id_buf_2K; + break; + case (size_t)SectorSizeLg::Sector16KB: + replica_id_buf = replica_id_buf_16K; + break; + case (size_t)SectorSizeLg::Sector8MB: + replica_id_buf = replica_id_buf_8M; + break; + case (size_t)SectorSizeLg::Sector512MB: + replica_id_buf = replica_id_buf_512M; + break; + case (size_t)SectorSizeLg::Sector32GB: + replica_id_buf = replica_id_buf_32G; + break; + case (size_t)SectorSizeLg::Sector64GB: + replica_id_buf = replica_id_buf_64G; + break; + default: + replica_id_buf = replica_id_buf_2K; + break; + } + uint8_t* replica_ids = new uint8_t[num_sectors * sizeof(replica_id_buf_2K)]; + assert (replica_ids != nullptr); + for (size_t i = 0; i < num_sectors; i++) { + memcpy(&replica_ids[sizeof(replica_id_buf_2K) * i], + replica_id_buf, sizeof(replica_id_buf_2K)); + } + + if (mode == PIPELINE_MODE) { + SECTOR_PARAMS_TABLE(demo_pipeline(num_sectors, replica_ids)); + exit(0); + } + + printf("mode %d, node_to_read %lx, slot %lx, num_sectors %ld\n", + mode, node_to_read, slot, num_sectors); + + size_t block_offset = get_slot_size(num_sectors, sector_size) * slot; + node_to_read += block_offset; + + // Perform sealing + if (mode == SEAL_MODE) { + if (perform_pc1) { + pc1(block_offset, num_sectors, replica_ids, + get_parent_filename(sector_size_lg), sector_size); + } + + if (perform_pc2) { + pc2(block_offset, num_sectors, output_dir, nullptr, sector_size); + } + + if (perform_c1) { + std::string replica_cache_path = output_dir; + replica_cache_path += "/replicas"; + if (!std::filesystem::exists(replica_cache_path.c_str())) { + replica_cache_path = output_dir; + } + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t i = 0; i < num_sectors; i++) { + const size_t MAX = 256; + char sector_output_dir[MAX]; + snprintf(sector_output_dir, MAX, "%s/%03ld", output_dir, i); + char sector_replica_dir[MAX]; + snprintf(sector_replica_dir, MAX, "%s/%03ld", replica_cache_path.c_str(), i); + + c1(block_offset, num_sectors, i, replica_ids, SEED, + TICKET, sector_output_dir, get_parent_filename(sector_size_lg), + sector_replica_dir, sector_size); + } + auto stop = std::chrono::high_resolution_clock::now(); + uint64_t secs = std::chrono::duration_cast< + std::chrono::seconds>(stop - start).count(); + printf("c1 took %ld seconds\n", secs); + } + } else if (mode == READ_MODE) { + node_read(sector_size, num_sectors, node_to_read); + } + + exit(0); +} diff --git a/extern/supraseal/demos/rust/Cargo.toml b/extern/supraseal/demos/rust/Cargo.toml new file mode 100644 index 000000000..91e09a40b --- /dev/null +++ b/extern/supraseal/demos/rust/Cargo.toml @@ -0,0 +1,27 @@ +[package] +name = "supra-seal-demo" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[[bin]] +name = "c2" +path = "src/c2.rs" + +[features] +default = [] + +[dependencies] +anyhow = "1.0.26" +bincode = "1.1.2" +chrono = "0.4" +filecoin-proofs-api = { version = "16.0.0", default-features = false, features = ["cuda-supraseal"] } +filecoin-proofs-v1 = { package = "filecoin-proofs", version = "16.0.0", default-features = false, features = ["cuda-supraseal"] } +storage-proofs-core = { version = "16.0.0", default-features = false, features = ["cuda-supraseal"] } +supraseal-c2 = "0" + +[target."cfg(target_arch = \"aarch64\")".dependencies] +sha2 = { version = "0.10.2", features = ["compress", "asm"] } +[target."cfg(not(target_arch = \"aarch64\"))".dependencies] +sha2 = { version = "0.10.2", features = ["compress"] } diff --git a/extern/supraseal/demos/rust/README.md b/extern/supraseal/demos/rust/README.md new file mode 100644 index 000000000..7ab3dfa24 --- /dev/null +++ b/extern/supraseal/demos/rust/README.md @@ -0,0 +1,66 @@ +# SupraSeal pipeline demo + +This executable provides a example of building a rust application to work with the SupraSeal primitives. The demo runs a series of sealing operations in parallel to illustrate how a pipeline can be constructed. + +There is concept of a slot, which represents a set of compute resources allocated for PC1, PC2, C1, and C2. The expectation is there will be two slots which allows multiple sealing pipelines to run in parallel. There is coordination between the pipelines to ensure a specific resource is only used by one pipeline at once. For example to avoid GPU contention, PC2 in one pipeline should not run while PC2 or C2 is running in another pipeline. + +This is for illustration purposes. The timing of operations is dependent on available hardware and the number of sectors to seal. + +```mermaid +flowchart TD; + subgraph Pipeline C - Slot 0; + C0(PC1) --> C1(PC2); + C1 --> C2(WaitSeed); + C2 --> C3(C1); + C3 --> C4(C2); + end; + subgraph Pipeline B - Slot 1; + B0(PC1) --> B1(PC2); + B1 --> B2(WaitSeed); + B2 --> B3(C1); + B3 --> B4(C2); + end; + subgraph Pipeline A - Slot 0; + A0(PC1) --> A1(PC2); + A1 --> A2(WaitSeed); + A2 --> A3(C1); + A3 --> A4(C2); + end; + + A0 -->|wait|B0; + A1 -->|wait|B1; + + A3 -->|wait|C0; + B0 -->|wait|C0; + + B1 -->|wait|A4; + A4 -->|wait|B4; + + B4 -->|wait|C1; +``` + +## Running + +Currently configured to run 3 pipelines of 32 sectors. + +First make sure the supraseal-c2 library is setup in ../../c2 + +Also make sure the core library has been built: +``` +./build.sh 512MiB + +or for 32GiB sectors: +./build.sh +``` + +512MB Sectors +``` +cargo build --release --features=512MiB --no-default-features +sudo ./target/release/supra-seal-demo +``` + +32GB Sectors +``` +cargo build --release --features=32GiB --no-default-features +sudo ./target/release/supra-seal-demo +``` diff --git a/extern/supraseal/demos/rust/build.rs b/extern/supraseal/demos/rust/build.rs new file mode 100644 index 000000000..1655943dd --- /dev/null +++ b/extern/supraseal/demos/rust/build.rs @@ -0,0 +1,124 @@ +// Copyright Supranational LLC + +use std::path::PathBuf; + +fn main() { + let cpp_lib_dir = PathBuf::from("../../obj") + .canonicalize() + .expect("cannot canonicalize path"); + + let spdk_lib_dir_buf = PathBuf::from("../../deps/spdk-v22.09/build/lib") + .canonicalize() + .expect("cannot canonicalize path"); + let spdk_lib_dir = spdk_lib_dir_buf.to_str().unwrap(); + + let dpdk_lib_dir_buf = PathBuf::from("../../deps/spdk-v22.09/dpdk/build/lib") + .canonicalize() + .expect("cannot canonicalize path"); + let dpdk_lib_dir = dpdk_lib_dir_buf.to_str().unwrap(); + + let dpdk_env_path = PathBuf::from("../../deps/spdk-v22.09/build/lib/libspdk_env_dpdk.a") + .canonicalize() + .expect("cannot canonicalize path"); + + println!("cargo:rustc-link-search={}", cpp_lib_dir.to_str().unwrap()); + println!("cargo:rustc-link-search={}", spdk_lib_dir); + + println!("cargo:rustc-link-arg=-fno-omit-frame-pointer"); + println!("cargo:rustc-link-arg=-Wl,-z,relro,-z,now"); + println!("cargo:rustc-link-arg=-Wl,-z,noexecstack"); + println!("cargo:rustc-link-arg=-fuse-ld=bfd"); + println!("cargo:rustc-link-arg=-Wl,--whole-archive"); + println!("cargo:rustc-link-arg=-Wl,--no-as-needed"); + println!("cargo:rustc-link-arg=-lspdk_bdev_malloc"); + println!("cargo:rustc-link-arg=-lspdk_bdev_null"); + println!("cargo:rustc-link-arg=-lspdk_bdev_nvme"); + println!("cargo:rustc-link-arg=-lspdk_bdev_passthru"); + println!("cargo:rustc-link-arg=-lspdk_bdev_lvol"); + println!("cargo:rustc-link-arg=-lspdk_bdev_raid"); + println!("cargo:rustc-link-arg=-lspdk_bdev_error"); + println!("cargo:rustc-link-arg=-lspdk_bdev_gpt"); + println!("cargo:rustc-link-arg=-lspdk_bdev_split"); + println!("cargo:rustc-link-arg=-lspdk_bdev_delay"); + println!("cargo:rustc-link-arg=-lspdk_bdev_zone_block"); + println!("cargo:rustc-link-arg=-lspdk_blobfs_bdev"); + println!("cargo:rustc-link-arg=-lspdk_blobfs"); + println!("cargo:rustc-link-arg=-lspdk_blob_bdev"); + println!("cargo:rustc-link-arg=-lspdk_lvol"); + println!("cargo:rustc-link-arg=-lspdk_blob"); + println!("cargo:rustc-link-arg=-lspdk_nvme"); + println!("cargo:rustc-link-arg=-lspdk_bdev_ftl"); + println!("cargo:rustc-link-arg=-lspdk_ftl"); + println!("cargo:rustc-link-arg=-lspdk_bdev_aio"); + println!("cargo:rustc-link-arg=-lspdk_bdev_virtio"); + println!("cargo:rustc-link-arg=-lspdk_virtio"); + println!("cargo:rustc-link-arg=-lspdk_vfio_user"); + println!("cargo:rustc-link-arg=-lspdk_accel_ioat"); + println!("cargo:rustc-link-arg=-lspdk_ioat"); + println!("cargo:rustc-link-arg=-lspdk_scheduler_dynamic"); + println!("cargo:rustc-link-arg=-lspdk_env_dpdk"); + println!("cargo:rustc-link-arg=-lspdk_scheduler_dpdk_governor"); + println!("cargo:rustc-link-arg=-lspdk_scheduler_gscheduler"); + println!("cargo:rustc-link-arg=-lspdk_sock_posix"); + println!("cargo:rustc-link-arg=-lspdk_event"); + println!("cargo:rustc-link-arg=-lspdk_event_bdev"); + println!("cargo:rustc-link-arg=-lspdk_bdev"); + println!("cargo:rustc-link-arg=-lspdk_notify"); + println!("cargo:rustc-link-arg=-lspdk_dma"); + println!("cargo:rustc-link-arg=-lspdk_event_accel"); + println!("cargo:rustc-link-arg=-lspdk_accel"); + println!("cargo:rustc-link-arg=-lspdk_event_vmd"); + println!("cargo:rustc-link-arg=-lspdk_vmd"); + println!("cargo:rustc-link-arg=-lspdk_event_sock"); + println!("cargo:rustc-link-arg=-lspdk_init"); + println!("cargo:rustc-link-arg=-lspdk_thread"); + println!("cargo:rustc-link-arg=-lspdk_trace"); + println!("cargo:rustc-link-arg=-lspdk_sock"); + println!("cargo:rustc-link-arg=-lspdk_rpc"); + println!("cargo:rustc-link-arg=-lspdk_jsonrpc"); + println!("cargo:rustc-link-arg=-lspdk_json"); + println!("cargo:rustc-link-arg=-lspdk_util"); + println!("cargo:rustc-link-arg=-lspdk_log"); + println!("cargo:rustc-link-arg=-Wl,--no-whole-archive"); + println!("cargo:rustc-link-arg={}", dpdk_env_path.to_str().unwrap()); + println!("cargo:rustc-link-arg=-Wl,--whole-archive"); + println!("cargo:rustc-link-arg={}/librte_bus_pci.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_cryptodev.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_dmadev.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_eal.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_ethdev.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_hash.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_kvargs.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_mbuf.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_mempool.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_mempool_ring.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_net.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_pci.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_power.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_rcu.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_ring.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_telemetry.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg={}/librte_vhost.a", dpdk_lib_dir); + println!("cargo:rustc-link-arg=-Wl,--no-whole-archive"); + println!("cargo:rustc-link-arg=-lnuma"); + println!("cargo:rustc-link-arg=-ldl"); + println!("cargo:rustc-link-arg=-L{}/../../isa-l/.libs", spdk_lib_dir); + println!("cargo:rustc-link-arg=-lisal"); + println!("cargo:rustc-link-arg=-pthread"); + println!("cargo:rustc-link-arg=-lrt"); + println!("cargo:rustc-link-arg=-luuid"); + println!("cargo:rustc-link-arg=-lssl"); + println!("cargo:rustc-link-arg=-lcrypto"); + println!("cargo:rustc-link-arg=-lm"); + println!("cargo:rustc-link-arg=-laio"); + println!("cargo:rustc-link-arg=-lc"); + println!("cargo:rustc-link-arg=-lgcc"); + + println!("cargo:rustc-link-lib=supraseal"); + println!("cargo:rustc-link-lib=gmp"); + println!("cargo:rustc-link-lib=config++"); + println!("cargo:rustc-link-lib=static:-bundle=stdc++"); + + println!("cargo:rerun-if-changed={}", cpp_lib_dir.to_str().unwrap()); + +} diff --git a/extern/supraseal/demos/rust/src/c2.rs b/extern/supraseal/demos/rust/src/c2.rs new file mode 100644 index 000000000..f4574ada8 --- /dev/null +++ b/extern/supraseal/demos/rust/src/c2.rs @@ -0,0 +1,188 @@ +// Copyright Supranational LLC + +use anyhow::Context; +use bincode::deserialize; +use filecoin_proofs_api::{RegisteredSealProof, SectorId}; +use filecoin_proofs_v1::{ + PoRepConfig, ProverId, seal_commit_phase2, SealCommitPhase1Output, + verify_seal, with_shape +}; + +use std::fs::read; +use std::path::PathBuf; +use storage_proofs_core::{ + api_version::ApiVersion, + merkle::MerkleTreeTrait, +}; +use std::time::Instant; + +use std::thread; +use std::sync::{Arc, Mutex}; + +fn run_c2( + num_sectors: usize, + c1_dir: &str, + start_sector_id: usize, + porep_config: Arc, +) -> usize { + // Choose some fixed values for demonstration + // All sectors using the same prover id, ticket, and wait seed + let prover_id: ProverId = [9u8; 32]; + let slots_sector_id = Arc::new(Mutex::new(start_sector_id)); + let successes = Arc::new(Mutex::new(0)); + + let mut provers = vec![]; + + let commit_phase1_output_path = + PathBuf::from(c1_dir); + + for _ in 0..2 { + let slots_sector_id = Arc::clone(&slots_sector_id); + let successes = Arc::clone(&successes); + let commit_phase1_output_base_path = commit_phase1_output_path.clone(); + let porep_config = Arc::clone(&porep_config); + + let prover = thread::spawn(move || { + loop { + let mut sector_lock = slots_sector_id.lock().unwrap(); + let cur_sector = *sector_lock; + let sector_slot = cur_sector - start_sector_id; + let sector_id = SectorId::from(cur_sector as u64); + + *sector_lock += 1; + drop(sector_lock); + if sector_slot >= num_sectors { + println!("Exiting, sector_slot {} num_sectors {}", + sector_slot, num_sectors); + break; + } + println!("Starting c2 sector {}", cur_sector); + + let commit_phase1_output = { + let mut commit_phase1_output_path = commit_phase1_output_base_path.clone(); + commit_phase1_output_path.push( + format!("{:03}/commit-phase1-output", sector_slot) + ); + println!("Restoring commit phase1 output file {:?}", commit_phase1_output_path); + let commit_phase1_output_bytes = + read(&commit_phase1_output_path).with_context(|| { + format!( + "couldn't read commit_phase1_output_path={:?}", + commit_phase1_output_path + ) + }).unwrap(); + + let res: SealCommitPhase1Output = + deserialize(&commit_phase1_output_bytes).unwrap(); + res + }; + + let SealCommitPhase1Output { + vanilla_proofs: _, + comm_d, + comm_r, + replica_id: _, + seed, + ticket, + } = commit_phase1_output; + + println!("Starting seal_commit_phase2 sector {}", sector_slot); + let now = Instant::now(); + let commit_output = seal_commit_phase2( + &porep_config, + commit_phase1_output, + prover_id, + sector_id + ) + .unwrap(); + println!("seal_commit_phase2 took: {:.2?}", now.elapsed()); + + let result = verify_seal::( + &porep_config, + comm_r, + comm_d, + prover_id, + sector_id, + ticket, + seed, + &commit_output.proof, + ) + .unwrap(); + + if result == true { + println!("Verification PASSED!"); + *successes.lock().unwrap() += 1; + } else { + println!("Verification FAILED!"); + } + } + }); + provers.push(prover); + } + for prover in provers { + prover.join().unwrap(); + } + let count = *successes.lock().unwrap(); + count +} + +fn c2_caller( + num_sectors: usize, + c1_dir: &str, + start_sector_id: usize, + sector_size: u64, + porep_id: [u8; 32], +) -> usize { + let porep_config = Arc::new(PoRepConfig::new_groth16( + sector_size, + porep_id, + ApiVersion::V1_1_0)); + + with_shape!(sector_size, run_c2, num_sectors, c1_dir, start_sector_id, porep_config) +} + +fn main() { + let num_sectors: usize = 128; + //let num_sectors: usize = 64; + //let num_sectors: usize = 32; + + let args: Vec = std::env::args().collect(); + + if args.len() < 4 { + println!("Usage: c2 e.g 32GiB"); + std::process::exit(-1); + } + println!("path: {:?}", args[1]); + println!("start sector: {:?}", args[2]); + println!("sector size: {:?}", args[3]); + let c1_dir = &args[1]; + let start_sector_id: usize = args[2].trim().parse().expect("Wanted a number"); + let sector_size_string = &args[3]; + + let sector_size = match sector_size_string.as_str() { + "2KiB" => 2048, + "4KiB" => 4096, + "16KiB" => 16384, + "32KiB" => 32768, + "8MiB" => 8388608, + "16MiB" => 16777216, + "512MiB" => 536870912, + "1GiB" => 1073741824, + "32GiB" => 34359738368, + "64GiB" => 68719476736, + _ => panic!("Invalid sector size"), + }; + + let porep_id: [u8; 32] = match sector_size_string.as_str() { + "2KiB" => RegisteredSealProof::StackedDrg2KiBV1_1.as_v1_config().porep_id, + "8MiB" => RegisteredSealProof::StackedDrg8MiBV1_1.as_v1_config().porep_id, + "512MiB" => RegisteredSealProof::StackedDrg512MiBV1_1.as_v1_config().porep_id, + "32GiB" => RegisteredSealProof::StackedDrg32GiBV1_1.as_v1_config().porep_id, + "64GiB" => RegisteredSealProof::StackedDrg64GiBV1_1.as_v1_config().porep_id, + _ => [99u8; 32], // use an arbitrary porep_id for other sizes + }; + + let successes = c2_caller(num_sectors, &c1_dir, start_sector_id, sector_size, porep_id); + + std::process::exit((num_sectors - successes) as i32); +} diff --git a/extern/supraseal/demos/rust/src/main.rs b/extern/supraseal/demos/rust/src/main.rs new file mode 100644 index 000000000..4014ee574 --- /dev/null +++ b/extern/supraseal/demos/rust/src/main.rs @@ -0,0 +1,581 @@ +// Copyright Supranational LLC + +// This is a basic demonstration of the sealing pipeline, the bindings +// interface and order of operations from a rust perspective + +#![feature(vec_into_raw_parts)] + +use filecoin_proofs_api::RegisteredSealProof; +use filecoin_proofs_v1::{ + ProverId, + Ticket, + with_shape, +}; + +use sha2::{Digest, Sha256}; +use std::ffi::CString; +use std::os::raw::c_char; +use std::os::unix::ffi::OsStrExt; +use std::path::{Path, PathBuf}; +use std::sync::{Arc, Mutex}; +use std::time::{Duration, Instant}; +use std::thread; +use storage_proofs_core::{ + merkle::MerkleTreeTrait, +}; + +extern crate chrono; + +pub type ReplicaId = Vec; + +// C Bindings +extern "C" { + // Optional init function. Default config file is supra_config.cfg + fn supra_seal_init(sector_size: usize, config_filename: *const c_char); + + fn get_max_block_offset(sector_size: usize) -> usize; + + fn get_slot_size(num_sectors: usize, sector_size: usize) -> usize; + + fn pc1(block_offset: usize, + num_sectors: usize, + replica_ids: *const u8, + parents_filename: *const c_char, + sector_size: usize) -> u32; + + fn pc2(block_offset: usize, + num_sectors: usize, + output_dir: *const c_char, + data_filenames: *const *const c_char, + sector_size: usize) -> u32; + + fn pc2_cleanup(num_sectors: usize, + output_dir: *const c_char, + sector_size: usize) -> u32; + + fn c1(block_offset: usize, + num_sectors: usize, + sector_slot: usize, + replica_id: *const u8, + seed: *const u8, + ticket: *const u8, + cache_path: *const c_char, + parents_filename: *const c_char, + replica_path: *const c_char, + sector_size: usize) -> u32; +} + +pub fn init_wrapper>(sector_size: usize, config: T) { + let config_c = CString::new(config.as_ref().as_os_str().as_bytes()).unwrap(); + unsafe { + supra_seal_init(sector_size, config_c.as_ptr()); + }; +} + +// Rust wrappers around unsafe C calls +pub fn get_max_block_offset_wrapper(sector_size: usize) -> usize { + let max_offset = unsafe { get_max_block_offset(sector_size) }; + println!("Max Offset returned {:x}", max_offset); + return max_offset; +} + +pub fn get_slot_size_wrapper(num_sectors: usize, sector_size: usize) -> usize { + let slot_size = unsafe { get_slot_size(num_sectors, sector_size) }; + println!("Slot size returned {:x} for {} sectors sized {}", slot_size, num_sectors, sector_size); + return slot_size; +} + +pub fn pc1_wrapper>( + block_offset: usize, + num_sectors: usize, + replica_ids: Vec, + path: T, + sector_size: usize) -> u32 { + + let f = replica_ids.into_iter().flatten().collect::>(); + let path_c = CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); + let pc1_status = unsafe { + pc1(block_offset, num_sectors, f.as_ptr(), path_c.as_ptr(), sector_size) + }; + println!("PC1 returned {}", pc1_status); + return pc1_status; +} + +pub fn pc2_wrapper>( + block_offset: usize, + num_sectors: usize, + path: T, + sector_size: usize) -> u32 { + + let path_c = CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); + let pc2_status = unsafe { pc2(block_offset, num_sectors, + path_c.as_ptr(), std::ptr::null(), + sector_size) }; + println!("PC2 returned {}", pc2_status); + return pc2_status; +} + +pub fn pc2_cleanup_wrapper>( + num_sectors: usize, + path: T, + sector_size: usize) -> u32 { + + let path_c = CString::new(path.as_ref().as_os_str().as_bytes()).unwrap(); + let pc2_status = unsafe { pc2_cleanup(num_sectors, path_c.as_ptr(), sector_size) }; + println!("PC2 cleanup returned {}", pc2_status); + return pc2_status; +} + +pub fn c1_wrapper>( + block_offset: usize, + num_sectors: usize, + sector_id: usize, + replica_id: *const u8, + seed: *const u8, + ticket: *const u8, + cache_path: T, + parents_filename: T, + replica_path: T, + sector_size: usize) -> u32 { + + let cache_path_c = + CString::new(cache_path.as_ref().as_os_str().as_bytes()).unwrap(); + let parents_c = + CString::new(parents_filename.as_ref().as_os_str().as_bytes()).unwrap(); + let replica_path_c = + CString::new(replica_path.as_ref().as_os_str().as_bytes()).unwrap(); + + let c1_status = unsafe { + c1(block_offset, + num_sectors, + sector_id, + replica_id, + seed, + ticket, + cache_path_c.as_ptr(), + parents_c.as_ptr(), + replica_path_c.as_ptr(), + sector_size) + }; + println!("C1 returned {}", c1_status); + return c1_status; +} + +// Helper function to create replica ids +fn create_replica_id( + prover_id: ProverId, + sector_size_string: &str, + sector_id: u64, + comm_d: &[u8; 32], + ticket: Ticket, +) -> ReplicaId { + let porep_id: [u8; 32] = match sector_size_string { + "2KiB" => RegisteredSealProof::StackedDrg2KiBV1_1.as_v1_config().porep_id, + "8MiB" => RegisteredSealProof::StackedDrg8MiBV1_1.as_v1_config().porep_id, + "512MiB" => RegisteredSealProof::StackedDrg512MiBV1_1.as_v1_config().porep_id, + "32GiB" => RegisteredSealProof::StackedDrg32GiBV1_1.as_v1_config().porep_id, + "64GiB" => RegisteredSealProof::StackedDrg64GiBV1_1.as_v1_config().porep_id, + _ => [99u8; 32], // use an arbitrary porep_id for other sizes + }; + + // Print porep_id + println!("PoRep ID for sector size {}: {:?}", sector_size_string, porep_id); + + let hash = Sha256::new() + .chain_update(&prover_id) + .chain_update(sector_id.to_be_bytes()) + .chain_update(&ticket) + .chain_update(comm_d) + .chain_update(&porep_id) + .finalize(); + + let mut id = [0u8; 32]; + id.copy_from_slice(&hash); + id[31] &= 0b0011_1111; + + // Print id as hex + println!("Replica ID (hex): {:x?}", id); + + id.to_vec() +} + +fn run_pipeline( + num_sectors: usize, + c2_cores: &str, + parents_cache_filename: &str, + comm_d: [u8; 32], + sector_size: usize, + sector_size_str: &str, +) { + let wait_seed_time = match sector_size_str { + "32GiB" => Duration::from_secs(60 * 75), // 75 min + "64GiB" => Duration::from_secs(60 * 75), + _ => Duration::from_secs(30), // 30 sec + }; + + let mut parents_cache_file = PathBuf::new(); + parents_cache_file.push(parents_cache_filename); + + // This is optional but if present must be the first call into the library + //init_wrapper("supra_seal_zen.cfg"); + + let max_offset = get_max_block_offset_wrapper(sector_size); + let slot_size = get_slot_size_wrapper(num_sectors, sector_size); + + println!("max_offset {} and slot_size {}", max_offset, slot_size); + + // Choose some fixed values for demonstration + // All sectors using the same prover id, ticket, and wait seed + let prover_id: ProverId = [ 9u8; 32]; + let ticket: Ticket = [ 1u8; 32]; + let seed: Ticket = [ 1u8; 32]; + + // Example showing operations running in parallel + let pc1_counter = Arc::new(Mutex::new((0, 0))); + let pc2_counter = Arc::new(Mutex::new(0)); + let c1_counter = Arc::new(Mutex::new(0)); + let c2_counter = Arc::new(Mutex::new(0)); + + //let passed_counter = Arc::new(Mutex::new(0)); + let failed_counter = Arc::new(Mutex::new(0)); + let gpu_counter = Arc::new(Mutex::new(0)); + + let pipeline_start = Arc::new(Mutex::new(Instant::now())); + let gpu_lock = Arc::new(Mutex::new(false)); + + // Specify the number of slots that can be run at a time + // This will come down to resources on the machine and number of sectors + // being sealed in parallel. + let num_slots = 2; // This matches mutex array below + let slot_counter = Arc::new([Mutex::new(0), Mutex::new(0)]); + let mut pipelines = vec![]; + + // Demonstrate three passes through the pipeline + // Batch 0 PC1 PC2 C1 C2 + // Batch 1 PC1 PC2 C1 C2 + // Batch 2 PC1 PC2 C1 C2 + let num_batches = 3; + for batch_num in 0..num_batches { + let pc1_counter = Arc::clone(&pc1_counter); + let pc2_counter = Arc::clone(&pc2_counter); + let c1_counter = Arc::clone(&c1_counter); + let c2_counter = Arc::clone(&c2_counter); + let c2_cores = c2_cores.to_string(); + + let failed_counter = Arc::clone(&failed_counter); + let gpu_counter = Arc::clone(&gpu_counter); + let slot_counter = Arc::clone(&slot_counter); + let parents_cache_file = parents_cache_file.clone(); + + let pipeline_start = Arc::clone(&pipeline_start); + let gpu_lock = Arc::clone(&gpu_lock); + + let sector_size_string = sector_size_str.to_string(); + + let pipeline = thread::spawn(move || { + let pipe_dir = "/var/tmp/supra_seal/".to_owned() + &batch_num.to_string(); + let output_dir = Path::new(&pipe_dir); + + // Grab unique sector ids for each sector in the batch + let batch_sector_start = batch_num * num_sectors; + + // Create replica_ids + let mut cur_sector_id = batch_sector_start; + let mut replica_ids: Vec = Vec::new(); + for _ in 0..num_sectors { + let replica_id = create_replica_id( + prover_id, + §or_size_string, + cur_sector_id as u64, + &comm_d, + ticket); + + cur_sector_id += 1; // Increment sector id for each sector + replica_ids.push(replica_id); + } + + // Indent based on batch number + let mut indent: String = "".to_owned(); + for _x in 0..batch_num { + indent += " "; + } + + //if batch_num > 0 { // TODO SNP: testing only, remove + // Wait until it's time for this batch's pc1 to start + { + let mut pc1_counter_lock = pc1_counter.lock().unwrap(); + while (*pc1_counter_lock).0 != batch_num { + drop(pc1_counter_lock); + thread::sleep(Duration::from_millis(100)); + pc1_counter_lock = pc1_counter.lock().unwrap(); + } + } + + // Lock the slot + let cur_slot = batch_num % num_slots; + let mut slot_count = slot_counter[cur_slot].lock().unwrap(); + println!("{}**** {} Batch {} locked slot {}", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num, cur_slot); + *slot_count += 1; + + // Lock PC1 + let cur_offset; + let cp_replica_ids = replica_ids.clone(); + { + let mut pc1_count = pc1_counter.lock().unwrap(); + println!("{}**** {} Batch {} start PC1", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num); + cur_offset = (*pc1_count).1; + (*pc1_count).1 += slot_size; + + // Check if pc1 will overflow available disk space + if ((*pc1_count).1 + slot_size) > max_offset { + (*pc1_count).1 = 0; + } + + // Do PC1 + //if batch_num > 0 { // TODO SNP: testing only, remove + pc1_wrapper(cur_offset, num_sectors, + replica_ids, parents_cache_file.clone(), + sector_size); + //} + (*pc1_count).0 += 1; + println!("{}**** {} Batch {} done with PC1", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num); + } + + // PC2 + { + let mut gpu_lock = gpu_lock.lock().unwrap(); + *gpu_lock = true; + + if batch_num == 1 { + let mut pipeline_start_lock = pipeline_start.lock().unwrap(); + *pipeline_start_lock = Instant::now(); + drop(pipeline_start_lock); + println!("\n**** {} Pipeline start\n", + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s")); + } + + let mut pc2_count = pc2_counter.lock().unwrap(); + let mut gpu_count = gpu_counter.lock().unwrap(); + println!("{}**** {} Batch {} start PC2", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num); + *gpu_count += 1; + + pc2_wrapper(cur_offset, num_sectors, output_dir, sector_size); + *pc2_count += 1; + *gpu_lock = false; + println!("{}**** {} Batch {} done with PC2", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num); + } + + println!("{}**** {} Batch {} Wait Seed sleeping {:?}", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num, wait_seed_time); + thread::sleep(wait_seed_time); + println!("{}**** {} Batch {} Wait Seed done sleeping", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num); + + // C1 + // Each of these can be parallelized, however the function only + // operates on a single sector at a time as opposed to PC1/PC2 + println!("{}**** {} Batch {} start C1", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num); + for sector_slot in 0..num_sectors { + let mut cur_cache_path = PathBuf::from(output_dir); + cur_cache_path.push(format!("{:03}", sector_slot)); + let mut cur_replica_dir = PathBuf::from(output_dir); + cur_replica_dir.push(format!("{:03}", sector_slot)); + + c1_wrapper(cur_offset, + num_sectors, + sector_slot, + cp_replica_ids[sector_slot].as_ptr(), + seed.as_ptr(), + ticket.as_ptr(), + cur_cache_path, + //parents_cache_file.to_path_buf(), + parents_cache_file.clone(), + cur_replica_dir, + sector_size); + } + *c1_counter.lock().unwrap() += 1; + println!("{}**** {} Batch {} done with C1", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num); + + // At this point the layers on NVME for this batch can be reused + println!("{}**** {} Batch {} dropping lock on slot {}", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num, cur_slot); + drop(slot_count); + + // Delete pc2 content + println!("{}**** {} Batch {} cleanup pc2 on slot {}", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num, cur_slot); + pc2_cleanup_wrapper(num_sectors, output_dir, sector_size); + + // Wait until it's time for this batch's c2 to start, which is after + // the next batch's pc2 + if batch_num < num_batches - 1 { + let mut pc2_counter_lock = pc2_counter.lock().unwrap(); + while *pc2_counter_lock != batch_num + 2 { + drop(pc2_counter_lock); + thread::sleep(Duration::from_millis(100)); + pc2_counter_lock = pc2_counter.lock().unwrap(); + } + } + + // TODO SNP: testing only, remove + // } else { + // let cur_slot = batch_num % num_slots; + // *slot_counter[cur_slot].lock().unwrap() += 1; + // let mut pc1_count = pc1_counter.lock().unwrap(); + // (*pc1_count).1 += slot_size; + // // Check if pc1 will overflow available disk space + // if ((*pc1_count).1 + slot_size) > max_offset { + // (*pc1_count).1 = 0; + // } + // (*pc1_count).0 += 1; + + // *pc2_counter.lock().unwrap() += 1; + // *c1_counter.lock().unwrap() += 1; + // } + + // C2 + let mut gpu_lock = gpu_lock.lock().unwrap(); + *gpu_lock = true; + println!("{}**** {} Batch {} start C2", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num); + let mut c2_count = c2_counter.lock().unwrap(); + *gpu_counter.lock().unwrap() += 1; + + let now = Instant::now(); + let status = std::process::Command::new("/usr/bin/taskset") + .arg("-c").arg(c2_cores).arg("./target/release/c2") + .arg(output_dir).arg(batch_sector_start.to_string()) + .arg(sector_size_string) + .status().expect("failed to execute process"); + println!("status: {}", status); + println!("{}**** {} Batch {} C2 done took {:.2?}", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num, now.elapsed()); + *failed_counter.lock().unwrap() += status.code().unwrap(); + + if batch_num == 0 { + let pipeline_start_lock = pipeline_start.lock().unwrap(); + println!("**** {} Pipeline took {:?}\n", + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + (*pipeline_start_lock).elapsed()); + } + println!("{}**** {} Batch {} done with C2", indent, + chrono::Local::now().format("%Y-%m-%d %H:%M:%S %s"), + batch_num); + *c2_count += 1; + drop(c2_count); + *gpu_lock = false; + drop(gpu_lock); + }); + pipelines.push(pipeline); + } + + for pipeline in pipelines { + pipeline.join().unwrap(); + } + + let pc1_count = pc1_counter.lock().unwrap(); + println!("PC1 counter: {} {}", (*pc1_count).0, (*pc1_count).1); + println!("PC2 counter: {}", *pc2_counter.lock().unwrap()); + println!("C1 counter: {}", *c1_counter.lock().unwrap()); + println!("C2 counter: {}", *c2_counter.lock().unwrap()); + println!("GPU counter: {}", *gpu_counter.lock().unwrap()); + println!("Failed counter: {}", *failed_counter.lock().unwrap()); + for i in 0..num_slots { + println!("Slot counter[{}]: {}", i, *slot_counter[i].lock().unwrap()); + } +} + +fn pipeline_caller(num_sectors: usize, c2_cores: &str, sector_size_string: &str) { + let parents_cache_filename = match sector_size_string { + "2KiB" => "/var/tmp/filecoin-parents/v28-sdr-parent-652bae61e906c0732e9eb95b1217cfa6afcce221ff92a8aedf62fa778fa765bc.cache", + "4KiB" => "/var/tmp/filecoin-parents/v28-sdr-parent-56d4865ec3476221fd1412409b5d9439182d71bf5e2078d0ecde76c0f7e33986.cache", + "16KiB" => "/var/tmp/filecoin-parents/v28-sdr-parent-cd17f936869de64be8cb1ae4496e788f6af982bc65f78bec83e33c42c7210a41.cache", + "32KiB" => "/var/tmp/filecoin-parents/v28-sdr-parent-81a0489b0dd6c7755cdce0917dd436288b6e82e17d596e5a23836e7a602ab9be.cache", + "8MiB" => "/var/tmp/filecoin-parents/v28-sdr-parent-1139cb33af3e3c24eb644da64ee8bc43a8df0f29fc96b5337bee369345884cdc.cache", + "16MiB" => "/var/tmp/filecoin-parents/v28-sdr-parent-7fa3ff8ffb57106211c4be413eb15ea072ebb363fa5a1316fe341ac8d7a03d51.cache", + "512MiB" => "/var/tmp/filecoin-parents/v28-sdr-parent-7ba215a1d2345774ab90b8cb1158d296e409d6068819d7b8c7baf0b25d63dc34.cache", + "1GiB" => "/var/tmp/filecoin-parents/v28-sdr-parent-637f021bceb5248f0d1dcf4dbf132fedc025d0b3b55d3e7ac171c02676a96ccb.cache", + "32GiB" => "/var/tmp/filecoin-parents/v28-sdr-parent-21981246c370f9d76c7a77ab273d94bde0ceb4e938292334960bce05585dc117.cache", + "64GiB" => "/var/tmp/filecoin-parents/v28-sdr-parent-767ee5400732ee77b8762b9d0dd118e88845d28bfa7aee875dc751269f7d0b87.cache", + _ => panic!("Invalid sector size"), + }; + + let comm_d: [u8; 32] = match sector_size_string { + "2KiB" => [252, 126, 146, 130, 150, 229, 22, 250, 173, 233, 134, 178, 143, 146, 212, 74, 79, 36, 185, 53, 72, 82, 35, 55, 106, 121, 144, 39, 188, 24, 248, 51], + "4KiB" => [8, 196, 123, 56, 238, 19, 188, 67, 244, 27, 145, 92, 14, 237, 153, 17, 162, 96, 134, 179, 237, 98, 64, 27, 249, 213, 139, 141, 25, 223, 246, 36], + "16KiB" => [249, 34, 97, 96, 200, 249, 39, 191, 220, 196, 24, 205, 242, 3, 73, 49, 70, 0, 142, 174, 251, 125, 2, 25, 77, 94, 84, 129, 137, 0, 81, 8], + "32KiB" => [44, 26, 150, 75, 185, 11, 89, 235, 254, 15, 109, 162, 154, 214, 90, 227, 228, 23, 114, 74, 143, 124, 17, 116, 90, 64, 202, 193, 229, 231, 64, 17], + "8MiB" => [101, 242, 158, 93, 152, 210, 70, 195, 139, 56, 140, 252, 6, 219, 31, 107, 2, 19, 3, 197, 162, 137, 0, 11, 220, 232, 50, 169, 195, 236, 66, 28], + "16MiB" => [162, 36, 117, 8, 40, 88, 80, 150, 91, 126, 51, 75, 49, 39, 176, 192, 66, 177, 208, 70, 220, 84, 64, 33, 55, 98, 124, 216, 121, 156, 225, 58], + "512MiB" => [57, 86, 14, 123, 19, 169, 59, 7, 162, 67, 253, 39, 32, 255, 167, 203, 62, 29, 46, 80, 90, 179, 98, 158, 121, 244, 99, 19, 81, 44, 218, 6], + "1GiB" => [204, 195, 192, 18, 245, 176, 94, 129, 26, 43, 191, 221, 15, 104, 51, 184, 66, 117, 180, 123, 242, 41, 192, 5, 42, 130, 72, 79, 60, 26, 91, 61], + "32GiB" => [7, 126, 95, 222, 53, 197, 10, 147, 3, 165, 80, 9, 227, 73, 138, 78, 190, 223, 243, 156, 66, 183, 16, 183, 48, 216, 236, 122, 199, 175, 166, 62], + "64GiB" => [230, 64, 5, 166, 191, 227, 119, 121, 83, 184, 173, 110, 249, 63, 15, 202, 16, 73, 178, 4, 22, 84, 242, 164, 17, 247, 112, 39, 153, 206, 206, 2], + _ => panic!("Invalid sector size"), + }; + + let sector_size: usize = match sector_size_string { + "2KiB" => 2048, + "4KiB" => 4096, + "16KiB" => 16384, + "32KiB" => 32768, + "8MiB" => 8388608, + "16MiB" => 16777216, + "512MiB" => 536870912, + "1GiB" => 1073741824, + "32GiB" => 34359738368, + "64GiB" => 68719476736, + _ => panic!("Invalid sector size"), + }; + + with_shape!( + sector_size as u64, + run_pipeline, + num_sectors, + c2_cores, + parents_cache_filename, + comm_d, + sector_size, + sector_size_string + ); +} + +fn main() { + let num_sectors: usize = 128; + //let num_sectors: usize = 64; + //let num_sectors: usize = 32; + + // Cores for C2 use, in a string that can be passed to taskset + let c2_cores = "4-7,45-47,48-63"; + + let args: Vec = std::env::args().collect(); + + if args.len() < 2 { + println!("Usage: supra-seal-demo e.g 32GiB"); + std::process::exit(-1); + } + println!("sector size: {:?}", args[1]); + let sector_size_string = &args[1]; + + pipeline_caller(num_sectors, c2_cores, §or_size_string.as_str()); +} diff --git a/extern/supraseal/demos/rust/supra_seal.cfg b/extern/supraseal/demos/rust/supra_seal.cfg new file mode 100644 index 000000000..250d0f32f --- /dev/null +++ b/extern/supraseal/demos/rust/supra_seal.cfg @@ -0,0 +1,160 @@ +# Configuration for supra_seal +spdk: { + # PCIe identifiers of NVMe drives to use to store layers + nvme = [ "0000:44:00.0", + "0000:43:00.0", + "0000:2c:00.0", + "0000:62:00.0", + "0000:61:00.0", + "0000:63:00.0", + "0000:2a:00.0", + "0000:41:00.0", + "0000:64:00.0", + "0000:2b:00.0", + "0000:29:00.0", + "0000:42:00.0", + "0000:04:00.0" ]; +} + +# CPU topology for various parallel sector counts +topology: +{ + pc1: { + # Core for writing hashed nodes to disk + writer = 1; + # Core for reading parent nodes + reader = 2; + # Core for coordinating buffers + orchestrator = 3; + # SPDK qpair for reading + qpair_reader = 0; + # SPDK qpair for writing + qpair_writer = 1; + + # Sleep time in usec when the nvme reader is idle + reader_sleep_time = 250; + # Sleep time in usec when the nvme writer is idle + writer_sleep_time = 500; + + # Number of hashers to instantiate per physical core + hashers_per_core = 2; + + # Configuration for coordinators and hashers for various parallel sector counts. + # Each entry has the following fields: + # sectors - the number of parallel sectors supported + # coordinators - a list of one or more coordinator nodes, each containing + # core - which core the coordinator runs on + # hashers - the number of hashing threads associated with the coordinator + # Each hashing thread processes two sectors. As a result the sum of "hashers" + # times two should equal "sectors". + # + # It's important to take into account the topology of the system when arranging + # threads. The purpose of the coordinator is to load data into the L3 cache so + # that the associated hashing threads have low latency access to the data. For + # this to be effective they must share the L3 cache. The system topology can be + # conveniently visualized using the `lstopo` command. + # + # The typical configuration would be one coordinator per core complex (CCX) on + # an AMD based machine to maximize cache data locality between the coordinator and + # hashing threads. + # + # To illustrate consider the configuration for 64 parallel sectors. Cores 0, 1 and 2 + # are used by the writer, reader, and orchestrator threads, so the first coordinator + # is placed on core 3. There are 4 more physical cores availabe in the CCX, so 8 + # hashers are assigned to utilize both the physical and hyperthread cores. This covers + # the first 16 sectors (2 sectors per hasher). + # + # The next coordinator is assigned to core 8. There are then 7 physical cores + # remaining, so 14 hashing threads are assigned, bring the sector count to 44. + # Finally core 16 gets the last coordinator with 10 hashers to cover the remaining + # 20 sectors. + sector_configs: ( + { + sectors = 2; + coordinators = ( + { core = 8; + hashers = 1; } + ) + }, + { + sectors = 4; + coordinators = ( + { core = 8; + hashers = 2; } + ) + }, + { + sectors = 8; + coordinators = ( + { core = 8; + hashers = 4; } + ) + }, + { + sectors = 16; + coordinators = ( + { core = 8; + hashers = 8; } + ) + }, + { + sectors = 32; + coordinators = ( + { core = 8; + hashers = 14; }, + { core = 16; + hashers = 2; } + ) + }, + { + sectors = 64; + coordinators = ( + { core = 8; + hashers = 14; }, + { core = 16; + hashers = 14; }, + { core = 24; + hashers = 4; } + ) + }, + { + sectors = 128; + coordinators = ( + { core = 8; + hashers = 14; }, + { core = 16; + hashers = 14; }, + { core = 24; + hashers = 14; }, + { core = 32; + hashers = 14; }, + { core = 40; + hashers = 8; } + ) + } + ) + }, + pc2: { + # Core for reading columns from NVMe + reader = 48; + # Core for initiating layer reading and managing Poseidon hashing on GPU(s) + hasher = 49; + # Core for performing the final CPU portion of hashing + hasher_cpu = 50; + # Core for writing hashed data to tree-r and tree-c files + writer = 52; + writer_cores = 8; + # Sleep time in usec when the nvme reader is idle + sleep_time = 200; + # SPDK qpair for reading + qpair = 2; + }, + c1: { + # Core for reading nodes from NVMe + reader = 4; + # Sleep time in usec when the nvme reader is idle + sleep_time = 200; + # SPDK qpair for reading + qpair = 3; + } +} diff --git a/extern/supraseal/demos/rust/supra_seal_zen2.cfg b/extern/supraseal/demos/rust/supra_seal_zen2.cfg new file mode 100644 index 000000000..d538b52c0 --- /dev/null +++ b/extern/supraseal/demos/rust/supra_seal_zen2.cfg @@ -0,0 +1,120 @@ +# Configuration for supra_seal +spdk: { + # PCIe identifiers of NVMe drives to use to store layers + nvme = [ "0000:01:00.0", + "0000:02:00.0", + "0000:03:00.0", + "0000:29:00.0", + "0000:2a:00.0", + "0000:2b:00.0", + "0000:2c:00.0", + "0000:41:00.0", + "0000:44:00.0", + "0000:62:00.0", + "0000:63:00.0", + "0000:64:00.0", + "0000:65:00.0" ]; +} + +# CPU topology for various parallel sector counts +topology: +{ + pc1: { + # Core for writing hashed nodes to disk + writer = 0; + # Core for reading parent nodes + reader = 1; + # Core for coordinating buffers + orchestrator = 2; + + # Number of hashers to instantiate per physical core + hashers_per_core = 1; + + # Configuration for coordinators and hashers for various parallel sector counts. + # Each entry has the following fields: + # sectors - the number of parallel sectors supported + # coordinators - a list of one or more coordinator nodes, each containing + # core - which core the coordinator runs on + # hashers - the number of hashing threads associated with the coordinator + # Each hashing thread processes two sectors. As a result the sum of "hashers" + # times two should equal "sectors". + # + # It's important to take into account the topology of the system when arranging + # threads. The purpose of the coordinator is to load data into the L3 cache so + # that the associated hashing threads have low latency access to the data. For + # this to be effective they must share the L3 cache. The system topology can be + # conveniently visualized using the `lstopo` command. + # + # The typical configuration would be one coordinator per core complex (CCX) on + # an AMD based machine to maximize cache data locality between the coordinator and + # hashing threads. + # + # To illustrate consider the configuration for 64 parallel sectors. Cores 0, 1 and 2 + # are used by the writer, reader, and orchestrator threads, so the first coordinator + # is placed on core 3. There are 4 more physical cores availabe in the CCX, so 8 + # hashers are assigned to utilize both the physical and hyperthread cores. This covers + # the first 16 sectors (2 sectors per hasher). + # + # The next coordinator is assigned to core 8. There are then 7 physical cores + # remaining, so 14 hashing threads are assigned, bring the sector count to 44. + # Finally core 16 gets the last coordinator with 10 hashers to cover the remaining + # 20 sectors. + sector_configs: ( + { + sectors = 2; + coordinators = ( + { core = 3; + hashers = 1; } + ) + }, + { + sectors = 4; + coordinators = ( + { core = 3; + hashers = 2; } + ) + }, + { + sectors = 8; + coordinators = ( + { core = 3; + hashers = 4; } + ) + }, + { + sectors = 16; + coordinators = ( + { core = 3; + hashers = 4; }, + { core = 8; + hashers = 4; } + ) + }, + { + sectors = 32; + coordinators = ( + { core = 3; + hashers = 4; }, + { core = 8; + hashers = 7; }, + { core = 16; + hashers = 5; } + ) + } + ) + }, + # TODO: This conflicts with 128 sectors, but our current processor runs + # out of cores. + pc2: { + # Core for reading columns from NVMe + reader = 24; + # Core for initiating layer reading and managing Poseidon hashing on GPU(s) + hasher = 25; + # Core for writing hashed data to tree-r and tree-c files + writer = 26; + }, + c1: { + # Core for reading nodes from NVMe + reader = 27; + } +} \ No newline at end of file diff --git a/extern/supraseal/exec.sh b/extern/supraseal/exec.sh new file mode 100755 index 000000000..365bdce06 --- /dev/null +++ b/extern/supraseal/exec.sh @@ -0,0 +1,33 @@ +#!/bin/bash -e + +# Copyright Supranational LLC + +set -x + +RUNTIME="" # Compile for all sector sizes +SECTOR_SIZE="" +while getopts 'b:' flag +do + case "${flag}" in + b) SECTOR_SIZE="${OPTARG}";; + esac +done + +if [[ -z $SECTOR_SIZE ]]; then + echo "Please specify a sector size. e.g exec.sh -b 32GiB" + exit 1 +fi + +if [[ "$SECTOR_SIZE" != "32GiB" && "$SECTOR_SIZE" != "512MiB" ]]; then + RUNTIME="-r" +fi + +./build.sh $RUNTIME + +cd demos/rust +#touch build.rs +env RUSTFLAGS="-C target-cpu=native" \ + cargo +nightly build --release +sudo ./target/release/supra-seal-demo $SECTOR_SIZE + +cd ../.. diff --git a/extern/supraseal/nvme/nvme.hpp b/extern/supraseal/nvme/nvme.hpp new file mode 100644 index 000000000..ff1bf6c76 --- /dev/null +++ b/extern/supraseal/nvme/nvme.hpp @@ -0,0 +1,59 @@ +// Copyright Supranational LLC + +#ifndef __NVME_HPP__ +#define __NVME_HPP__ + +extern "C" { +#include "spdk/stdinc.h" +#include "spdk/nvme.h" +#include "spdk/vmd.h" +#include "spdk/nvme_zns.h" +#include "spdk/env.h" +#include "spdk/string.h" +#include "spdk/log.h" +} +#include +#include +#include +#include +#include +#include "ring_t.hpp" + +//using namespace std; + +const static size_t BLOCK_SIZE = PAGE_SIZE; + +extern int g_spdk_error; +extern std::mutex print_mtx; + +typedef std::chrono::high_resolution_clock::time_point timestamp_t; + +#define SPDK_ERROR(op) \ + { int rc; \ + if ((rc = (op)) != 0) { \ + g_spdk_error = rc; \ + printf("SPDK error encountered: %d at %s:%d\n", rc, __FILE__, __LINE__); \ + return rc; \ + } \ + } +#define SPDK_ASSERT(op) \ + { int rc; \ + if ((rc = (op)) != 0) { \ + printf("SPDK error encountered: %d at %s:%d\n", rc, __FILE__, __LINE__); \ + assert (rc == 0); \ + } \ + } + +class nvme_controllers_t; +class nvme_controller_t; +class nvme_namespace_t; +class nvme_qpair_t; + +#include "nvme_namespace_t.hpp" +#include "nvme_qpair_t.hpp" +#include "nvme_io_tracker_t.hpp" +#include "nvme_controller_t.hpp" +#include "sequential_io_t.hpp" +#include "spdk_ptr_t.hpp" + +#endif diff --git a/extern/supraseal/nvme/nvme_controller_t.hpp b/extern/supraseal/nvme/nvme_controller_t.hpp new file mode 100644 index 000000000..9b65c1cda --- /dev/null +++ b/extern/supraseal/nvme/nvme_controller_t.hpp @@ -0,0 +1,418 @@ +// Copyright Supranational LLC + +#include + +#ifndef __NVME_CONTROLLER_T_HPP__ +#define __NVME_CONTROLLER_T_HPP__ + +struct nvme_health_info { + uint8_t critical_warning; + int16_t temperature; // Converting to Celsius in the conversion function + uint8_t available_spare; + uint8_t available_spare_threshold; + uint8_t percentage_used; + uint64_t data_units_read; + uint64_t data_units_written; + uint64_t host_read_commands; + uint64_t host_write_commands; + uint64_t controller_busy_time; + uint64_t power_cycles; + uint64_t power_on_hours; + uint64_t unsafe_shutdowns; + uint64_t media_errors; + uint64_t num_error_info_log_entries; + uint32_t warning_temp_time; + uint32_t critical_temp_time; + int16_t temp_sensors[8]; // Converting to Celsius in the conversion function +}; + +class nvme_controller_t { + friend nvme_controllers_t; + friend nvme_namespace_t; + friend nvme_qpair_t; + +public: + static const size_t queue_size = 1024; + +private: + std::string name; + struct spdk_nvme_ctrlr* ctrlr; + std::vector namespaces; + std::vector qpairs; + + + static void get_log_page_completion(void *cb_arg, const struct spdk_nvme_cpl *cpl) { + if (spdk_nvme_cpl_is_error(cpl)) { + printf("WARNING: SPDK get log page failed\n"); + } + std::mutex* mtx = (std::mutex*)cb_arg; + mtx->unlock(); + } + +public: + nvme_controller_t(const char* _name, + struct spdk_nvme_ctrlr* _ctrlr) { + name = _name; + ctrlr = _ctrlr; + } + + std::string get_name() { + return name; + } + + size_t get_sector_count(size_t ns_id) { + return namespaces[ns_id].get_sector_count(); + } + + size_t get_page_count(size_t ns_id) { + return namespaces[ns_id].get_page_count(); + } + + // Get controller temp in degrees C + int get_temp() { + std::mutex mtx; + mtx.lock(); + static struct spdk_nvme_health_information_page health_page; + int rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION, + SPDK_NVME_GLOBAL_NS_TAG, &health_page, + sizeof(health_page), 0, + get_log_page_completion, &mtx); + if (rc != 0) { + printf("WARNING: could not read controller temperature\n"); + return 0; + } + while (!mtx.try_lock()) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + usleep(100); + } + return (int)health_page.temperature - 273; + } + + // Get controller health information page + struct spdk_nvme_health_information_page get_health_page() { + std::mutex mtx; + mtx.lock(); + static struct spdk_nvme_health_information_page health_page; + int rc = spdk_nvme_ctrlr_cmd_get_log_page(ctrlr, SPDK_NVME_LOG_HEALTH_INFORMATION, + SPDK_NVME_GLOBAL_NS_TAG, &health_page, + sizeof(health_page), 0, + get_log_page_completion, &mtx); + if (rc != 0) { + printf("WARNING: could not read controller health page\n"); + return health_page; + } + while (!mtx.try_lock()) { + spdk_nvme_ctrlr_process_admin_completions(ctrlr); + usleep(100); + } + return health_page; + } + + void cleanup() { + for (auto it: qpairs) { + it->cleanup(); + delete(it); + } + } + + int register_namespaces() { + // Each controller has one or more namespaces. An NVMe namespace is + // basically equivalent to a SCSI LUN. The controller's IDENTIFY data + // tells us how many namespaces exist on the controller. For Intel(R) + // P3X00 controllers, it will just be one namespace. + // Note that in NVMe, namespace IDs start at 1, not 0. + + for (int nsid = spdk_nvme_ctrlr_get_first_active_ns(ctrlr); nsid != 0; + nsid = spdk_nvme_ctrlr_get_next_active_ns(ctrlr, nsid)) { + struct spdk_nvme_ns* ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (ns == NULL) { + continue; + } + if (!spdk_nvme_ns_is_active(ns)) { + continue; + } + namespaces.emplace(namespaces.end(), ns); + //auto it = namespaces.emplace(namespaces.end(), ns); + //it->print(); + } + return 0; + } + + int alloc_qpairs(size_t count) { + // Allocate an I/O qpair that we can use to submit read/write requests + // to namespaces on the controller. NVMe controllers typically support + // many qpairs per controller. Any I/O qpair allocated for a controller + // can submit I/O to any namespace on that controller. + + // The SPDK NVMe driver provides no synchronization for qpair accesses - + // the application must ensure only a single thread submits I/O to a + // qpair, and that same thread must also check for completions on that + // qpair. This enables extremely efficient I/O processing by making all + // I/O operations completely lockless. + struct spdk_nvme_io_qpair_opts opts; + spdk_nvme_ctrlr_get_default_io_qpair_opts(ctrlr, &opts, sizeof(opts)); + opts.io_queue_requests = queue_size; + //printf("Allocating %d io_queue_requests\n", opts.io_queue_requests); + opts.delay_cmd_submit = true; + + for (size_t i = 0; i < count; i++) { + struct spdk_nvme_qpair* qpair = + spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, &opts, sizeof(opts)); + if (qpair == NULL) { + return 1; + } + qpairs.push_back(new nvme_qpair_t(qpair)); + } + return 0; + } + + static void io_complete(void *arg, const struct spdk_nvme_cpl *completion) { + nvme_io_tracker_t* io = (nvme_io_tracker_t*)arg; + + // See if an error occurred. If so, display information + // about it, and set completion value so that I/O + // caller is aware that an error occurred. + if (spdk_nvme_cpl_is_error(completion)) { + spdk_nvme_qpair_print_completion(io->qpair->get_qpair(), + (struct spdk_nvme_cpl *)completion); + fprintf(stderr, "I/O error status: %s\n", + spdk_nvme_cpl_get_status_string(&completion->status)); + fprintf(stderr, "I/O failed, aborting run\n"); + exit(1); + } + if (io->completion_cb) { + if (io->completion_cb(io->completion_arg) != 0) { + fprintf(stderr, "I/O callback failed, aborting run\n"); + exit(1); + } + } + } + + // Note: user needs to call process_completions to poll for completed + // io. The buffer must be reserved in advance using reserve_buf. The + // buffer will be returned to the pool when the IO completes. + // buf_gid is a global id. + int write(nvme_io_tracker_t* io, size_t ns_id, size_t qpair_id, size_t offset, + completion_cb_t cb = nullptr, void *cb_arg = nullptr) { + io->ns = &namespaces[ns_id]; + io->qpair = qpairs[qpair_id]; + io->completion_cb = cb; + io->completion_arg = cb_arg; + + uint32_t sector_size = io->ns->get_sector_size(); + uint32_t sectors_per_block = BLOCK_SIZE / sector_size; + + SPDK_ERROR(spdk_nvme_ns_cmd_write(io->ns->get_ns(), + io->qpair->get_qpair(), + io->buf, + offset * sectors_per_block, // LBA start + io->len() / sector_size, + io_complete, + io, 0)); + io->qpair->incr_ops(); + return 0; + } + + // Note: user needs to call process_completions to poll for completed + // io. The buffer must be reserved in advance using reserve_buf. The + // buffer will be returned to the pool when the IO completes. + // buf_id is a global id. + int read(nvme_io_tracker_t* io, size_t ns_id, size_t qpair_id, size_t offset, + completion_cb_t cb = nullptr, void *cb_arg = nullptr) { + io->ns = &namespaces[ns_id]; + io->qpair = qpairs[qpair_id]; + io->completion_cb = cb; + io->completion_arg = cb_arg; + + uint32_t sector_size = io->ns->get_sector_size(); + uint32_t sectors_per_block = BLOCK_SIZE / sector_size; + + SPDK_ERROR(spdk_nvme_ns_cmd_read(io->ns->get_ns(), + io->qpair->get_qpair(), + io->buf, + offset * sectors_per_block, // LBA start + io->len() / sector_size, + io_complete, + io, 0)); + io->qpair->incr_ops(); + return 0; + } + + size_t get_outstanding_io_ops(size_t qpair) { + return qpairs[qpair]->get_outstanding_io_ops(); + } + + int process_completions(size_t qpair) { + return qpairs[qpair]->process_completions(); + + } + + int process_all_completions(size_t qpair) { + int completions = 0; + while (qpairs[qpair]->get_outstanding_io_ops() > 0) { + completions =+ qpairs[qpair]->process_completions(); + } + return completions; + } +}; + + +class nvme_controllers_t { + struct spdk_nvme_transport_id trid = {}; + + std::set allowed_nvme; + std::vector controllers; + //size_t total_buffer_count; + + static bool probe_cb(void* cb_ctx, + const struct spdk_nvme_transport_id* trid, + struct spdk_nvme_ctrlr_opts* opts) { + nvme_controllers_t* me = (nvme_controllers_t* )cb_ctx; + if (me->allowed_nvme.find(trid->traddr) != me->allowed_nvme.end()) { + printf("Attaching to %s\n", trid->traddr); + return true; + } else { + printf("NOT Attaching to %s\n", trid->traddr); + return false; + } + } + + static void attach_cb(void* cb_ctx, + const struct spdk_nvme_transport_id* trid, + struct spdk_nvme_ctrlr* ctrlr, + const struct spdk_nvme_ctrlr_opts* opts) { + nvme_controllers_t* me = (nvme_controllers_t* )cb_ctx; + + //printf("Attached to %s\n", trid->traddr); + + // spdk_nvme_ctrlr is the logical abstraction in SPDK for an NVMe + // controller. During initialization, the IDENTIFY data for the + // controller is read using an NVMe admin command, and that data + // can be retrieved using spdk_nvme_ctrlr_get_data() to get + // detailed information on the controller. Refer to the NVMe + // specification for more details on IDENTIFY for NVMe controllers. + nvme_controller_t* controller = new nvme_controller_t(trid->traddr, ctrlr); + controller->register_namespaces(); + me->controllers.push_back(controller); + } + +public: + nvme_controllers_t(std::set _allowed_nvme) { + allowed_nvme = _allowed_nvme; + } + + ~nvme_controllers_t() { + for (auto it: controllers) { + it->cleanup(); + delete it; + } + } + + size_t size() { + return controllers.size(); + } + + std::vector get_health_pages() { + std::vector health_pages; + for (auto it: controllers) { + health_pages.push_back(it->get_health_page()); + } + return health_pages; + } + + std::vector get_health_info() { + std::vector health_infos; + auto health_pages = get_health_pages(); + + for (const auto& page : health_pages) { + nvme_health_info info = {}; + + // Convert the health page to our simplified format + info.critical_warning = page.critical_warning.raw; + info.temperature = page.temperature - 273; // Convert Kelvin to Celsius + info.available_spare = page.available_spare; + info.available_spare_threshold = page.available_spare_threshold; + info.percentage_used = page.percentage_used; + + // Take first value from pairs for simplified interface + info.data_units_read = page.data_units_read[0]; + info.data_units_written = page.data_units_written[0]; + info.host_read_commands = page.host_read_commands[0]; + info.host_write_commands = page.host_write_commands[0]; + info.controller_busy_time = page.controller_busy_time[0]; + info.power_cycles = page.power_cycles[0]; + info.power_on_hours = page.power_on_hours[0]; + info.unsafe_shutdowns = page.unsafe_shutdowns[0]; + info.media_errors = page.media_errors[0]; + info.num_error_info_log_entries = page.num_error_info_log_entries[0]; + + info.warning_temp_time = page.warning_temp_time; + info.critical_temp_time = page.critical_temp_time; + + // Convert temperature sensors from Kelvin to Celsius + for (int i = 0; i < 8; i++) { + info.temp_sensors[i] = page.temp_sensor[i] - 273; + } + + health_infos.push_back(info); + } + + return health_infos; + } + + void print_temperatures() { + auto now = std::chrono::system_clock::now(); + std::time_t time = std::chrono::system_clock::to_time_t(now); + + std::cout << "NVME Controller temperatures (C) " << std::ctime(&time); + for (auto it: controllers) { + std::cout << " " << it->get_name() << ": " << it->get_temp() << std::endl; + } + } + + nvme_controller_t &operator[](size_t i) { + return *controllers[i]; + } + + // Remove controller from the list + void remove(size_t i) { + controllers.erase(controllers.begin() + i, controllers.begin() + i + 1); + } + + int init(size_t qpair_count) { + SPDK_ERROR(probe()); + SPDK_ERROR(alloc_qpairs(qpair_count)); + return 0; + } + + int probe() { + spdk_nvme_trid_populate_transport(&trid, SPDK_NVME_TRANSPORT_PCIE); + snprintf(trid.subnqn, sizeof(trid.subnqn), "%s", SPDK_NVMF_DISCOVERY_NQN); + + int rc = spdk_nvme_probe(&trid, this, probe_cb, attach_cb, NULL); + if (rc != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + return 0; + } + + int alloc_qpairs(size_t count) { + int rc; + for (auto it: controllers) { + if ((rc = it->alloc_qpairs(count)) != 0) { + return rc; + } + } + return 0; + } + + static bool sort_function(nvme_controller_t *i, nvme_controller_t *j) { + return i->get_name() < j->get_name(); + } + void sort() { + std::sort(controllers.begin(), controllers.end(), sort_function); + } +}; + +#endif diff --git a/extern/supraseal/nvme/nvme_io_tracker_t.hpp b/extern/supraseal/nvme/nvme_io_tracker_t.hpp new file mode 100644 index 000000000..c4a642658 --- /dev/null +++ b/extern/supraseal/nvme/nvme_io_tracker_t.hpp @@ -0,0 +1,43 @@ +// Copyright Supranational LLC + +#ifndef __NVME_IO_TRACKER_T_HPP__ +#define __NVME_IO_TRACKER_T_HPP__ + +// Track NVME IO operations +class nvme_io_tracker_t; +typedef int (*completion_cb_t)(void *arg); + +class nvme_controller_t; +class nvme_namespace_t; +class nvme_qpair_t; + +class nvme_io_tracker_t { +public: + friend class nvme_controller_t; + + // Must be set by caller of read/write + uint8_t* buf; + + // Will bet set internally + nvme_controller_t* controller; + nvme_namespace_t* ns; + nvme_qpair_t* qpair; + + // Completion callback + void* completion_arg; + completion_cb_t completion_cb; + + nvme_io_tracker_t() { + controller = nullptr; + ns = nullptr; + qpair = nullptr; + buf = nullptr; + completion_cb = nullptr; + } + + size_t len() { + return PAGE_SIZE; + } +}; + +#endif diff --git a/extern/supraseal/nvme/nvme_namespace_t.hpp b/extern/supraseal/nvme/nvme_namespace_t.hpp new file mode 100644 index 000000000..f04ba88a5 --- /dev/null +++ b/extern/supraseal/nvme/nvme_namespace_t.hpp @@ -0,0 +1,39 @@ +// Copyright Supranational LLC + +#ifndef __NVME_NAMESPACE_T__ +#define __NVME_NAMESPACE_T__ + +class nvme_namespace_t { + struct spdk_nvme_ns* ns; + uint32_t sector_size; + +public: + nvme_namespace_t(struct spdk_nvme_ns* _ns) { + ns = _ns; + sector_size = spdk_nvme_ns_get_sector_size(ns); + } + + struct spdk_nvme_ns* get_ns() { + return ns; + } + + size_t get_page_count() { + return spdk_nvme_ns_get_size(ns) / PAGE_SIZE; + } + + size_t get_sector_count() { + return spdk_nvme_ns_get_size(ns) / get_sector_size(); + } + + uint32_t get_sector_size() { + return sector_size; + } + + void print() { + printf(" Namespace ID: %d size: %juGB\n", + spdk_nvme_ns_get_id(ns), + spdk_nvme_ns_get_size(ns) / 1000000000); + } +}; + +#endif diff --git a/extern/supraseal/nvme/nvme_qpair_t.hpp b/extern/supraseal/nvme/nvme_qpair_t.hpp new file mode 100644 index 000000000..4597ae89d --- /dev/null +++ b/extern/supraseal/nvme/nvme_qpair_t.hpp @@ -0,0 +1,58 @@ +// Copyright Supranational LLC + +#ifndef __NVME_QPAIR_T_HPP__ +#define __NVME_QPAIR_T_HPP__ + +class nvme_qpair_t { + struct spdk_nvme_qpair* qpair; + size_t outstanding_io_ops; + +public: + nvme_qpair_t(struct spdk_nvme_qpair* _qpair) { + qpair = _qpair; + outstanding_io_ops = 0; + } + ~nvme_qpair_t() {} + + struct spdk_nvme_qpair* get_qpair() { + return qpair; + } + void incr_ops() { + outstanding_io_ops++; + } + size_t get_outstanding_io_ops() { + return outstanding_io_ops; + } + + void cleanup() { + // Free the I/O qpair. This typically is done when an application exits. + // But SPDK does support freeing and then reallocating qpairs during + // operation. It is the responsibility of the caller to ensure all + // pending I/O are completed before trying to free the qpair. + spdk_nvme_ctrlr_free_io_qpair(qpair); + } + + int process_completions() { + // Poll for completions. 0 here means process all available completions. + // In certain usage models, the caller may specify a positive integer + // instead of 0 to signify the maximum number of completions it should + // process. This function will never block - if there are no + // completions pending on the specified qpair, it will return immediately. + + // When the write I/O completes, write_complete() will submit a new I/O + // to read LBA 0 into a separate buffer, specifying read_complete() as its + // completion routine. When the read I/O completes, read_complete() will + // print the buffer contents and set sequence.is_completed = 1. That will + // break this loop and then exit the program. + if (outstanding_io_ops > 0) { + // Returns the number of completions process, -ERRNO for error + int completions = spdk_nvme_qpair_process_completions(qpair, 0); + assert(completions >= 0); + outstanding_io_ops -= completions; + return completions; + } + return 0; + } +}; + +#endif diff --git a/extern/supraseal/nvme/ring_t.cpp b/extern/supraseal/nvme/ring_t.cpp new file mode 100644 index 000000000..fea8e88ea --- /dev/null +++ b/extern/supraseal/nvme/ring_t.cpp @@ -0,0 +1,12 @@ +// Copyright Supranational LLC + +#include "ring_t.hpp" +#include "nvme.hpp" + +void ring_spdk_free(void *ptr) { + spdk_free(ptr); +} + +void* ring_spdk_alloc(size_t bytes) { + return spdk_dma_zmalloc(bytes, PAGE_SIZE, NULL); +} diff --git a/extern/supraseal/nvme/ring_t.hpp b/extern/supraseal/nvme/ring_t.hpp new file mode 100644 index 000000000..c658298eb --- /dev/null +++ b/extern/supraseal/nvme/ring_t.hpp @@ -0,0 +1,574 @@ +// Copyright Supranational LLC + +#ifndef __RING_T_HPP__ +#define __RING_T_HPP__ + +#include +#include +#include +#include +#include +#include + +void ring_spdk_free(void *); +void* ring_spdk_alloc(size_t bytes); + +// Single producer / Single consumer lock free fifo +template +class mt_fifo_t { + std::vector store; + + // If we allocated the contents store the pointer + T *contents; + + // 'head' tracks the next unused element. + std::atomic head; + // 'tail' tracks the last used element + std::atomic tail; + +public: + mt_fifo_t() : head(0), tail(0) { + contents = nullptr; + } + ~mt_fifo_t() { +#ifndef NO_SPDK + ring_spdk_free(contents); +#endif + } + + int create(const char *name, size_t count) { + return create(count); + } + + int create(size_t _count) { + // Create a pool to hold the desired size + store.resize(_count + 1); + return 0; + } + + size_t capacity() { + return store.size() - 1; + } + + int enqueue_nocheck(T *obj) { + //assert (!is_full()); + size_t h = head; + store[h] = obj; + if (h == store.size() - 1) { + head = 0; + } else { + head = h + 1; + } + return 0; + } + + int enqueue(T *obj) { + assert (!is_full()); + size_t h = head; + store[h] = obj; + if (h == store.size() - 1) { + head = 0; + } else { + head = h + 1; + } + return 0; + } + + T* dequeue() { + if (size() != 0) { + size_t t = tail; + T *obj = store[t]; + if (t == store.size() - 1) { + tail = 0; + } else { + tail = t + 1; + } + return obj; + } + return nullptr; + } + + // Fill the pool + // One element of store is left empty since the pool can hold + // size() - 1 of usable data. +#ifndef NO_SPDK + int fill() { + contents = (T*)ring_spdk_alloc(sizeof(T) * (store.size() - 1)); + if (contents == nullptr) { + return 1; + } + for (size_t i = 0; i < store.size() - 1; i++) { + store[i] = &contents[i]; + } + head = store.size() - 1; + + return 0; + } +#endif + + // Number of used entries in the ring + size_t size() { + // Load values so we can perform a consistent calculation + size_t h = head; + size_t t = tail; + if (h >= t) { + return h - t; + } else { + return (store.size() + h) - t; + } + } + + // Get entry at index + T& operator[](size_t i) { + return *store[i]; + } + + bool is_full() { + return size() == store.size() - 1; + } + + inline size_t free_count() { + return capacity() - size(); + } + + void print() { + size_t h = head; + size_t t = tail; + printf("mt_ring_t: tail %ld, head %ld\n", t, h); + } +}; + +// Non-multithread safe pool using a ring buffer +template +class pool_t { + std::vector store; + + // If we allocated the contents store the pointer + T *contents; + + // 'head' tracks the next unused element. + size_t head; + // 'tail' tracks the last used element + size_t tail; + +public: + pool_t() { + contents = nullptr; + head = 0; + tail = 0; + } + ~pool_t() { +#ifndef NO_SPDK + ring_spdk_free(contents); +#endif + } + + int create(size_t _count) { + // Create a pool to hold the desired size + store.resize(_count + 1); + return 0; + } + + int enqueue(T *obj) { + assert (!is_full()); + store[head++] = obj; + if (head >= store.size()) { + head = 0; + } + return 0; + } + + T* dequeue() { + if (size() != 0) { + T *obj = store[tail++]; + if (tail >= store.size()) { + tail = 0; + } + return obj; + } + return nullptr; + } + + // Dequeue a block of contiguous elements + // Returns null if not enough elements or elements are non-contiguous + T** dequeue_bulk(size_t count) { + if (size() >= count && tail + count <= store.size()) { + T **obj = &store[tail]; + tail += count; + if (tail >= store.size()) { + tail -= store.size(); + } + return obj; + } + return nullptr; + } + + // Fill the pool + // One element of store is left empty since the pool can hold + // size() - 1 of usable data. +#ifndef NO_SPDK + int fill() { + contents = (T*)ring_spdk_alloc(sizeof(T) * (store.size() - 1)); + if (contents == nullptr) { + return 1; + } + for (size_t i = 0; i < store.size() - 1; i++) { + store[i] = &contents[i]; + } + head = store.size() - 1; + + return 0; + } +#endif + + // Number of used entries in the ring + size_t size() { + if (head >= tail) { + return head - tail; + } else { + return (store.size() + head) - tail; + } + } + + // Get entry at index + T& operator[](size_t i) { + return *store[i]; + } + + bool is_full() { + return size() == store.size() - 1; + } + + size_t capacity() { + return store.size() - 1; + } + + inline size_t free_count() { + return capacity() - size(); + } +}; + +template +class mtx_fifo_t { + pool_t pool; + std::mutex mtx; + +public: + int create(size_t _count) { + std::unique_lock lock(mtx); + return pool.create(_count); + } + + int enqueue(T *obj) { + std::unique_lock lock(mtx); + return pool.enqueue(obj); + } + + T* dequeue() { + std::unique_lock lock(mtx); + return pool.dequeue(); + } + + // Number of used entries in the ring + size_t size() { + std::unique_lock lock(mtx); + return pool.size(); + } + + // Get entry at index + T& operator[](size_t i) { + std::unique_lock lock(mtx); + return pool[i]; + } + + bool is_full() { + std::unique_lock lock(mtx); + return pool.is_full(); + } + + size_t capacity() { + std::unique_lock lock(mtx); + return pool.capacity(); + } + + inline size_t free_count() { + std::unique_lock lock(mtx); + return pool.free_count(); + } +}; + +// Ring buffer for data from disk +// Safe for single producer / single consumer. +// On the producer side, the flow is: +// - Advance the head to reserve an element. The element is marked as invalid. +// - Initiate a disk read for the data +// - Sometime later the disk DMA completes and we are notified. At this point +// the data is expected to be in memory. +// - Mark the entry as valid, indicating it may be read +// On the consumer side +// - Data is consumed from the tail. Once consumed tail is advanced +// - Data is only consumed when marked as valid. +typedef std::atomic ring_buffer_valid_t; + +template +class ring_buffer_t { +public: + typedef T* T_ptr; + + const unsigned int VALID_THRESHOLD = _VALID_THRESHOLD; + // Number of entries + static const size_t count = SIZE; + +private: + // Array of entries + T_ptr entries; // 4k pages - 1536 pages to saturate drives + // Store which entries are valid + ring_buffer_valid_t* valid; + + // pointers move right -----> + // tail head_valid head + // | | | + // ----------------------------------------------------------------------- + // | | | | | | | | | | | | | | | + // ----------------------------------------------------------------------- + // 0 1 1 1 1 1 1 1 1 1 0 1 0 0 + // valid + + // 'head' tracks the next unused element. + size_t head; + // 'head_valid' tracks the point at which all previous elements are valid + size_t head_valid; + // 'tail' tracks the last used element + size_t tail; + // track size + size_t cur_size; + +public: + ring_buffer_t() { + entries = nullptr; + valid = nullptr; + head = 0; + head_valid = 0; + tail = 0; + cur_size = 0; + } + + ~ring_buffer_t() { + delete [] valid; + valid = nullptr; + } + + // Usable size will be count - 1 + // 'entries' is an array of count entries + int create(T *_entries) { + entries = _entries; + + valid = new ring_buffer_valid_t[count]; + for (size_t i = 0; i < count; i++) { + valid[i] = 0; + } + return 0; + } + + // Number of elements of storage + inline size_t storage() { + return count; + } + + inline size_t capacity() { + return count - 1; + } + +public: + inline size_t size() { + //return sub(head, tail); + return cur_size; + } + + inline size_t free_count() { + //return capacity() - size(); + return capacity() - cur_size; + } + + inline bool is_full() { + return cur_size == capacity(); + } + + inline T *get_entry(size_t idx) { + return &entries[idx]; + } + + // Reserve the next free element for use, advance head + // Do not perform the safety check for a full buffer + inline T *reserve_nocheck(size_t &idx) { + // Store index + idx = head; + // Advance head + size_t next_head = incr(head); + valid[next_head] = 0; + head = next_head; + cur_size++; + return &entries[idx]; + } + + // Reserve the next free element for use, advance head + inline T *reserve(size_t &idx) { + if (is_full()) { + return nullptr; + } + // Store index + idx = head; + // Advance head + size_t next_head = incr(head); + valid[next_head] = 0; + head = next_head; + cur_size++; + return &entries[idx]; + } + + // Reserve the next free element for use, advance head + // Do not perform the safety check for a full buffer + inline void reserve_batch_nocheck(size_t count, size_t &idx, T** batch) { + // Store index + idx = head; + for (size_t i = 0; i < count; i++) { + batch[i] = &entries[head]; + // Advance head + size_t next_head = incr(head); + valid[next_head] = 0; + head = next_head; + } + cur_size += count; + } + + // Mark element as valid + inline void incr_valid(size_t idx, uint64_t amount = 1) { + valid[idx].fetch_add(amount); + } + + inline ring_buffer_valid_t get_valid(size_t idx) { + return valid[idx].load(); + } + inline bool is_valid(size_t idx) { + return valid[idx].load() >= VALID_THRESHOLD; + } + + inline ring_buffer_valid_t* get_valid_ptr(size_t idx) { + return &valid[idx]; + } + + inline size_t get_head() { + return head; + } + + inline size_t get_tail() { + return tail; + } + + inline bool is_tail_valid() { + return valid[tail].load() >= VALID_THRESHOLD; + } + + inline size_t incr(size_t idx) { + idx = (idx == count - 1) ? 0 : idx + 1; + return idx; + } + + inline size_t decr(size_t idx) { + return (idx == 0) ? count - 1 : idx - 1; + } + + // Returns a - b, taking into account wraparound + inline size_t sub(size_t a, size_t b) { + if (a >= b) { + return a - b; + } + return (count + a) - b; + } + + // Returns a + b, taking into account wraparound + inline size_t add(size_t a, size_t b) { + size_t res = a + b; + if (res >= count) { + res -= count; + } + return res; + } + + // Advance the head_valid pointer + inline size_t advance_valid() { + size_t cnt = 0; + while (valid[head_valid].load() >= VALID_THRESHOLD) { + cnt++; + head_valid = incr(head_valid); + } + return cnt; + } + + // Release the tail element to unused state + inline void release() { + // Advance tail + //assert (size() > 0); + tail = incr(tail); + cur_size--; + } + + // Release valid tail elements to unused state + inline size_t release_valid() { + size_t count = 0; + // Advance tail + while (valid[tail].load() >= VALID_THRESHOLD) { + valid[tail] = 0; + count++; + tail = incr(tail); + cur_size--; + } + return count; + } + + // Print debug information + void print() { + printf("count %ld, tail %ld, head_valid %ld, head %ld, size %ld, full %d, free_count %ld\n", + count, tail, head_valid, head, size(), is_full(), free_count()); + } +}; + +template +class ring_counter_t { +private: + ABS_TYPE _abs; + size_t _idx; + +public: + ring_counter_t(ABS_TYPE& abs) : _abs(abs) { + _idx = 0; + } + + ABS_TYPE& abs() { + return _abs; + } + size_t idx() { + return _idx / RING_DIVISOR; + } + size_t offset() { + return _idx % RING_DIVISOR; + } + void operator ++(int) { + _abs++; + _idx++; + if (_idx == RING_SIZE) { + _idx = 0; + } + } + ring_counter_t& operator+=(const size_t rhs) { + _abs += rhs; + _idx += rhs; + if (_idx >= RING_SIZE) { + _idx -= RING_SIZE; + } + return *this; + } +}; + +#endif + diff --git a/extern/supraseal/nvme/sequential_io_t.hpp b/extern/supraseal/nvme/sequential_io_t.hpp new file mode 100644 index 000000000..5997ce3d9 --- /dev/null +++ b/extern/supraseal/nvme/sequential_io_t.hpp @@ -0,0 +1,62 @@ +// Copyright Supranational LLC + +#ifndef __SEQUENTIAL_IO_T_HPP__ +#define __SEQUENTIAL_IO_T_HPP__ + +// Simple interface to read sequential data from an NVME drive + +class sequential_io_t { + nvme_controller_t& controller; + + struct cb_t { + nvme_io_tracker_t io; + sequential_io_t* me; + }; + pool_t cb_pool; + + static int completion_cb(void *arg) { + cb_t* cb = (cb_t *)arg; + SPDK_ERROR(cb->me->cb_pool.enqueue(cb)); + return 0; + } + +public: + sequential_io_t(nvme_controller_t& _controller): + controller(_controller) { + // Allocate io trackers + SPDK_ASSERT(cb_pool.create(nvme_controller_t::queue_size)); + SPDK_ASSERT(cb_pool.fill()); + for (size_t i = 0; i < cb_pool.size(); i++) { + cb_pool[i].me = this; + } + } + + // buf must be pages sized and pinned + int rw(bool read, size_t pages, uint8_t *buf, size_t offset = 0) { + size_t page = 0; + while (page < pages) { + // Initiate ops + while (page < pages) { + cb_t *cb = cb_pool.dequeue(); + if (cb == nullptr) { + break; + } + cb->io.buf = &(buf[page * PAGE_SIZE]); + if (read){ + SPDK_ERROR(controller.read(&cb->io, 0, 0, page + offset, + completion_cb, cb)); + } else { + SPDK_ERROR(controller.write(&cb->io, 0, 0, page + offset, + completion_cb, cb)); + } + page++; + } + // Complete reads + controller.process_completions(0); + } + controller.process_all_completions(0); + return 0; + } +}; + +#endif diff --git a/extern/supraseal/nvme/spdk_ptr_t.hpp b/extern/supraseal/nvme/spdk_ptr_t.hpp new file mode 100644 index 000000000..ef552bc5e --- /dev/null +++ b/extern/supraseal/nvme/spdk_ptr_t.hpp @@ -0,0 +1,51 @@ +// Copyright Supranational LLC + +#ifndef __SPDK_PTR_T_HPP__ +#define __SPDK_PTR_T_HPP__ + +// Allocator for spdk data +template +class spdk_ptr_t { + T* ptr; + size_t count; + +public: + spdk_ptr_t() : ptr(nullptr), count(0) {} + + spdk_ptr_t(size_t nelems) : ptr(nullptr) { + alloc(nelems); + } + ~spdk_ptr_t() { + if (ptr) { + spdk_free(ptr); + } + } + + void alloc(size_t nelems) { + free(); + if (nelems) { + size_t bytes = nelems * sizeof(T); + ptr = (T*)spdk_dma_zmalloc(bytes, PAGE_SIZE, NULL); + assert (ptr != nullptr); + count = nelems; + } + } + + void free() { + if (ptr) { + spdk_free(ptr); + ptr = nullptr; + count = 0; + } + } + + size_t size() { return count; } + + inline operator const T*() const { return ptr; } + inline operator T*() const { return ptr; } + inline operator void*() const { return (void*)ptr; } + inline const T& operator[](size_t i) const { return ptr[i]; } + inline T& operator[](size_t i) { return ptr[i]; } +}; + +#endif diff --git a/extern/supraseal/nvme/streaming_node_reader_nvme.cpp b/extern/supraseal/nvme/streaming_node_reader_nvme.cpp new file mode 100644 index 000000000..31d775909 --- /dev/null +++ b/extern/supraseal/nvme/streaming_node_reader_nvme.cpp @@ -0,0 +1,252 @@ +// Copyright Supranational LLC + +#include +#include +#include "../sealing/constants.hpp" +#include "../nvme/nvme.hpp" +#include "../sealing/data_structures.hpp" +#include "../util/stats.hpp" +#include "../util/util.hpp" +#include "../pc1/node_rw_t.hpp" +#include "streaming_node_reader_nvme.hpp" + +typedef batch_t node_io_batch_t; +int g_spdk_error = 0; + +template +struct streaming_node_reader_opaque_t { + // Fixed size FIFOs for requests to the parent reader + mt_fifo_t node_read_fifo; + node_rw_t* node_reader; + + spdk_ptr_t> local_buffer; + std::vector node_ios; +}; + +template streaming_node_reader_t:: +streaming_node_reader_t(nvme_controllers_t* _controllers, size_t qpair, + size_t block_offset, int core_num, size_t idle_sleep) + : controllers(_controllers), terminator(false) + { + num_slots = 0; + opaque = new streaming_node_reader_opaque_t(); + + // Streaming reads + SPDK_ASSERT(opaque->node_read_fifo.create("node_read_fifo", 4 * nvme_controller_t::queue_size)); + opaque->node_reader = new node_rw_t + (terminator, *controllers, opaque->node_read_fifo, + qpair, block_offset); + + reader_thread = std::thread([&, core_num, idle_sleep]() { + set_core_affinity(core_num); + assert(opaque->node_reader->process(idle_sleep) == 0); + }); + } + +template streaming_node_reader_t:: +~streaming_node_reader_t() { + terminator = true; + reader_thread.join(); + delete opaque->node_reader; + delete opaque; +} + +template void streaming_node_reader_t:: +alloc_slots(size_t _num_slots, size_t slot_node_count, bool _packed) { + packed = _packed; + // Round up to an even number of pages + if (packed) { + pages_per_slot = (slot_node_count + C::NODES_PER_PAGE - 1) / C::NODES_PER_PAGE; + num_slots = _num_slots; + } else { + pages_per_slot = slot_node_count; + num_slots = _num_slots; + } + + // Allocate storage + opaque->local_buffer.alloc(num_slots * pages_per_slot); + + // Allocate one node_io per page + opaque->node_ios.resize(num_slots * pages_per_slot); +} + +template void streaming_node_reader_t:: +free_slots() { + opaque->local_buffer.free(); + opaque->node_ios.clear(); +} + +template uint8_t* streaming_node_reader_t:: +get_full_buffer(size_t &bytes) { + bytes = num_slots * pages_per_slot * sizeof(page_t); + return (uint8_t*)&opaque->local_buffer[0]; +} + +template uint8_t* streaming_node_reader_t:: +get_slot(size_t slot) { + return (uint8_t*)&opaque->local_buffer[slot * pages_per_slot]; +} + +template uint8_t* streaming_node_reader_t:: +load_layers(size_t slot, uint32_t layer, uint64_t node, + size_t node_count, size_t num_layers, + std::atomic* valid, size_t* valid_count) { + assert (packed); + assert (slot < num_slots); + node_io_batch_t* node_ios = &opaque->node_ios[slot * pages_per_slot]; + page_t* pages = &opaque->local_buffer[slot * pages_per_slot]; + + size_t total_pages = num_layers * node_count / C::NODES_PER_PAGE; + assert (total_pages <= pages_per_slot); + + // Valid counter + valid->store(0); + + node_id_t node_to_read(layer, node); + + size_t idx = 0; + uint32_t cur_layer = layer; + for (size_t i = 0; i < num_layers; i++) { + while (opaque->node_read_fifo.free_count() < node_count) { + usleep(100); + } + for (size_t j = 0; j < node_count; j += C::NODES_PER_PAGE) { + node_io_t& io = node_ios[idx].batch[0]; + io.type = node_io_t::type_e::READ; + io.node = node_to_read; + io.valid = valid; + io.tracker.buf = (uint8_t*)&pages[idx]; + + SPDK_ASSERT(opaque->node_read_fifo.enqueue(&node_ios[idx])); + + node_to_read += C::NODES_PER_PAGE; + idx++; + } + // Increment the layer + cur_layer++; + node_to_read = node_id_t(cur_layer, node); + } + *valid_count = total_pages; + + return (uint8_t*)pages; +} + +template int streaming_node_reader_t:: +load_nodes(size_t slot, std::vector>& nodes) { + assert (!packed); + page_t* pages = &opaque->local_buffer[slot * pages_per_slot]; + node_io_batch_t* node_ios = &opaque->node_ios[slot * pages_per_slot]; + + assert (nodes.size() <= pages_per_slot); + std::atomic valid(0); + for (size_t i = 0; i < nodes.size(); i++) { + if (!opaque->node_read_fifo.is_full()) { + node_io_t& io = node_ios[i].batch[0]; + io.type = node_io_t::type_e::READ; + io.node = node_id_t(nodes[i].first, nodes[i].second); + io.valid = &valid; + io.tracker.buf = (uint8_t*)&pages[i]; + + SPDK_ERROR(opaque->node_read_fifo.enqueue(&node_ios[i])); + } + } + while (valid < nodes.size()) {} + return 0; +} + +template node_t& streaming_node_reader_t:: +get_node(size_t slot, std::vector>& nodes, + size_t idx, size_t sector_slot) { + assert (!packed); + page_t* pages = &opaque->local_buffer[slot * pages_per_slot]; + size_t node = nodes[idx].second; + node_t& n = pages[idx]. + parallel_nodes[node % C::NODES_PER_PAGE] + .sectors[sector_slot]; + // From NVMe the node needs to still be byte reversed + n.reverse_l(); + return n; +} + +#ifdef RUNTIME_SECTOR_SIZE +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +#endif +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; +template class streaming_node_reader_t; diff --git a/extern/supraseal/nvme/streaming_node_reader_nvme.hpp b/extern/supraseal/nvme/streaming_node_reader_nvme.hpp new file mode 100644 index 000000000..75a493900 --- /dev/null +++ b/extern/supraseal/nvme/streaming_node_reader_nvme.hpp @@ -0,0 +1,74 @@ +// Copyright Supranational LLC + +#ifndef __STREAMING_LAYER_READER_NVME_HPP__ +#define __STREAMING_LAYER_READER_NVME_HPP__ + +#include +#include "../sealing/data_structures.hpp" + +class nvme_controllers_t; +template struct streaming_node_reader_opaque_t; + +// Encapsulate the SPDK portion of reading layers from NVMe +template +class streaming_node_reader_t { + streaming_node_reader_opaque_t* opaque; + + nvme_controllers_t* controllers; + + std::atomic terminator; + std::thread reader_thread; + + // Packed indicates nodes within a single layer will be contiguous + bool packed; + size_t num_slots; + size_t pages_per_slot; + +public: + streaming_node_reader_t(nvme_controllers_t* _controllers, size_t qpair, + size_t block_offset, int core_num, size_t idle_sleep); + + ~streaming_node_reader_t(); + + bool data_is_big_endian() { + return false; + } + + // Allocate resource to perform N reads, each of size slot_node_count. These + // will be indexed by slot_id + // packed - indicates whether allocation should assume packed or unpacked node reads + void alloc_slots(size_t N, size_t slot_node_count, bool _packed); + + uint8_t* get_slot(size_t slot); + + uint8_t* get_full_buffer(size_t &bytes); + + void free_slots(); + + //////////////////////////////////////// + // Used for PC2 + //////////////////////////////////////// + + uint8_t* load_layers(size_t slot, uint32_t layer, uint64_t node, + size_t node_count, size_t num_layers, + std::atomic* valid, size_t* valid_count); + + //////////////////////////////////////// + // Used for C1 + //////////////////////////////////////// + + // Load a vector of node IDs into the local buffer + // The nodes are a vector of layer, node_id pairs + // Since the nodes may be non-consecutive each node will use + // an entire page in the buffer. + int load_nodes(size_t slot, std::vector>& nodes); + + // Retrieve a sector and node from the local buffer + // nodes - the vector of nodes originally read into the local buffer + // idx - the index of the node to retrieve + // sector_slot - the slot to retrive + node_t& get_node(size_t slot, std::vector>& nodes, + size_t idx, size_t sector_slot); +}; + +#endif diff --git a/extern/supraseal/pc1/README.md b/extern/supraseal/pc1/README.md new file mode 100644 index 000000000..d7771d90d --- /dev/null +++ b/extern/supraseal/pc1/README.md @@ -0,0 +1,155 @@ +# Filecoin Optimized Sealing + +This directory contains pc1 optimized for throughput per dollar. Improved throughput per dollar is accomplished in several ways: +- **Replace memory with NVMe** - This greatly reduces memory requirements, thereby reducing cost. The layers being hashed are stored in a relatively small in-memory cache, with the rest of the nodes stored on NVMe. +- **User space IO** - Using SPDK (https://spdk.io/) reduces overhead and latency to NVMe drives. +- **Increase core utilization** - In this implementation we are able to hash 4 sectors per core instead of 4 cores per sector with minimal latency impact. This dramatically reduces the amount of cores required on the system. + +# Performance + +This software supports sealing up to 128 sectors simultaneously and takes approximately 3.5 hrs, depending on the host. On a Threadripper PRO 5975WX (32 cores) with 12 NVMe drives attached it can perform PC1 for 64 sectors in 3 hours 30 minutes while requiring only 256 GB of memory. Note this latency is within a few percent of the theoretical best case 3 hours 26 minutes when operating at the all core boost frequency. + +# Architecture and Design Considerations + +The documentation that follows is not meant to be complete, but instead provides a high level overview of the various system components and how they interact. + +## Stacked DRG Background +Each layer of the graph consists of *N* 32 byte nodes that are created through a sequential hashing process. For a 32GB sector there are 1 billion nodes *per layer* and the hashing process is repeated 10 times for a total of 11 layers. Each node in the graph references previous nodes from the current layer as well as the layer before. This creates a large IO requirement as approximately 13 reads are required to perform the hashing for each node. + +## Design Considerations + +In order to maximize throughput per dollar we need to ensure the platform resources are as utilized as possible. The PC1 operation requires serial computing and large amounts of data, therefore we need fast capable cores, lots of storage, and bandwidth to move that data around. + +PC1 is dominated by SHA-256 hashing of an enormous amount of data from random locations. The way it is done in rust-fil-proofs is to store the graph's current working layer and the previous layer in RAM, use a couple of cores to prefetch data to a local shared cache, and have a single core perform the hashing. Storage Providers (SP) typically use many core servers with 1-2 TB of RAM which enables them to perform PC1 on 15-30 sectors in parallel. There are two main issues with this approach, the cores are underutilized and the RAM requirement is very high. In a server, cores and RAM are the most expensive components. + +Starting with the hashing first, we need a system that can perform SHA-256 with low latency. Fortunately, the last few generations of both AMD and Intel cores feature the SHA Extensions, which are a set of instructions that accelerate SHA-256 hashing. Diving further into SHA-256, there are 64 rounds per block that are provided with data from the message schedule each round. The SHA Extensions were designed with the intention of the rounds being the bottleneck and the message schedule instructions to be off the critical path. Now considering only the performance of the sha256rnds2 instruction, we can accurately model the compute requirements. + +Each node in the graph requires 20 blocks of SHA-256 to determine its value. With 20 blocks * 64 rounds per block / 2 rounds per instruction, that requires 640 serial executions of sha256rnds2. The latest AMD processor microarchitectures (Zen3, Zen4) designed the instruction with a latency of 4 cycles and throughput of 1. This means for each node it will take 640 * 4 cycles = 2560 cycles to complete. If we expand that out for an entire 32GB graph, we have 11 layers * 230 nodes * 2560 cycles = 30,236,569,763,840 cycles (30 trillion!). To put this in time perspective, we divide the cycles by the core frequency in Hz to arrive at the number of seconds. For example a typical server in the 3.5 GHz range would take 8639 seconds (144 minutes), approximately 2 and a half hours. If we use a high end client part we can hit turbo frequencies closer to 6GHz, and at that speed it would only take 84 minutes. We now have a minimal latency bound to seal a 32GB sector. + +Latency analysis is important to understand what the platform is capable of, although we are more concerned about optimizing for throughput in this exercise. Observe again the 4:1 latency:throughput for the SHA rounds instruction. Rather than let the pipelined acceleration circuits sit idle for 3 of the 4 serial cycles, we can fill them with different data streams. This is called a "multi-buffer" approach, where multiple independent data buffers are processed in parallel. This approach however makes things more nuanced since register pressure becomes an issue. Even though there are plenty of physical registers in the core, architecturally sha256rnds2 instructions only have 16 xmm registers. Assembly code for two buffers consumes 16 registers in the most performant form, therefore this limits our scaling to x2. An interesting experiment is to compare the latency of a x2 assembly implementation vs. a single buffer. Empirically we find that there is almost no latency degradation and we have the benefit of doubling the amount of data hashed in a given time without using any more resources. Since the optimal point for a 4 cycle latency instruction is a x4 implementation, we have a problem with the architectural register limit. However, this limitation can be overcome with hyper-threading. Each core is capable of running another thread in parallel to the current thread, with its own set of resources (registers). Empirically what we have found is not perfect scaling as with the x2. This makes sense because the CPU has limits on how many instructions can be issued per cycle, retired per cycle, and on which port the instruction can be executed on. The good news is performance is super linear in throughput and we find that running 2 x2 threads on a core does provide the best result (4200 cycles for 4 buffers vs the 2560 for one). + +Having determined from a SHA-256 perspective that we are optimal at running 4 sectors at once per core, we now need to figure out how to feed the data to those cores and how many we can do in parallel. This requires going back to the graph to determine what data is needed. In the common case there are six parents from the current layer (one of which is the previous node) and 8 expander parents (from the previous layer). Looking deeper into the graph we can analyze the expected distance of a base parent from the current node. If we create buckets for each distance using a logarithmic scale, we find a relatively equal percentage for each bucket (2-9, 10-99, 100-999, 1000-9999, etc). This means that if we keep a small cache of recent nodes, say 1M, there is a greater than 50% chance of a base parent being resident. Of the fourteen parents, we can be expected to have 3 or 4 in the cache and the rest will need to be fetched. + +Our overall approach of replacing RAM with NVMe disks to lower cost means we are getting approximately 4 nodes from RAM and the other 10 from NVMe. If we are doing 4 buffers per core in 4200 cycles, and we assume that the cores are running at 4 GHz, then it will take 1.05 microseconds. Looking at it another way, the cores can process 1M nodes per second per buffer. Assuming 10 random reads per node, that means we need 10M IOPS per buffer. The current state of the art NVMe drives top out around 1M random read IOPS with careful tuning. To feed a single core's four buffers would take around 40 NVMe drives to service. Most motherboards max out at supporting 10-25 drives. Clearly a straightforward approach will not work here. + +The solution we have is to interleave buffers stored on NVMe and to spread the nodes across many drives. To do this we work with a fundamental 4KB block of data. If each block of data supports many buffers, we can limit the number of random reads required. Now instead of needing 40M IOPS per core, we need only 10M IOPS total for all cores. With about 12 NVMe drives we are able to reach that 10M IOPS requirement. To do this requires eliminating the filesystem and bypassing the operating system to directly control the drives. It also requires careful coordination across the processor and the majority of this code is dedicated to that. + +## Stacked DRG - Efficient Data Layout For High Throughput PC1 + +This software supports sealing 1-32, 64, and 128 32GB CC sectors in parallel. In order to support a high level of parallelism without increasing the cost of the system we ensure that the resources of the system are well utilized. One way to improve resource utilization is to lay out the data in a way that is efficient for IO operations like disk reads and writes. The current PC1 software writes each layer of a sector sequentially to disk. When performing PC1 for 64 sectors in parallel with this data layout, each node requires 13 reads (there are 13 parents to read) to perform the hash. However, disk reads are natively 4KB in size and only 32B from each of those reads is used, so much of the data from the read is wasted. + +To address this inefficiency we lay the data out in a more usable way. Since all sectors will use the same parent graph, we know that they will require the same pattern of reads. This opens the door to sharing each read across all sectors we're hashing. This effectively 'ammortizes' the cost of the read across all of the sectors being sealed in parallel. + +To achieve this we organize the data as shown below. + +![layout in page](doc/layout_within_a_page.png "Sector layout within a page") + +If *n* = 64, then we put all the node 1 data together, then all of the node 2 data, etc. So 32 bytes of sector 0 node 1, then 32 bytes of sector 1 node 1, up to sector 63. In the case of 64 sectors, node 1 and node 2 fit in the first 4KB page, then node 3 and node 4 in the next, etc. + +With this layout, reading the data for a specific node (i.e. a single 4KB read) gets us the data for all of the sectors we are sealing (up to 128 sectors) in a single read, exactly what we want for improving page read utilization. + +## Multiple Disks + +However, it turns out that a single disk is not sufficient for our storage and IO needs here, either in terms of storage capacity or performance. + +From a capacity point of view we need to store 32GB * 11 layers for each sector we want to seal. For 64 sectors this adds up to 22 TB. + +We also need to support the random read IOPS necessary for good performance. Each node that gets sealed needs to consume 14 parents. One of those parents is the previous node, which we can reuse since it was just hashed. That leaves 13 parents, of which about 3 are close enough in the layer that we can use an in-memory cache to service them. That leaves about 10 random reads per node. Fortunately we get all 64 sectors in that read, so our IOPS needs don't change when performing PC1 for more sectors in parallel. + +In order to perform PC1 in 3.5 hours we need approximately 10 million IOPS. + +The image below shows an example layout with 4 disks. The data is arranged by placing consecutive pages on consecutive disks, so page 0 is on disk 0, page 1 disk 1, page 2 disk 2, page 3 disk 3, then page 4 disk 0, etc. This gives us random read IOPS scaling that is approximately linear with the number of disks. + +![layout_across_controllers](doc/layout_across_controllers.png "Layout across disks") + +## Software Architecture + +Finally, we show the high level software architecture below. The basic challenge we need to overcome is providing data to the CPU cores performing SHA hashing at a rate that keeps them fully utilized. This is a big challenge since the latency of reading from NVMe is very long relatively speaking. However, we know what needs to be read well in advance, so we solve the latency problem by reading well in advance and buffering the data. + +In the end the majority of the software is dedicated to delivering data to the hashing cores on time, with extremely low overhead. At 10 million IOPS a single core has fewer than 500 cycles to dedicate to each page! + +![architecture](doc/architecture.png "Software architecture") + +The drawing shows a number of structures +- **Threads** - depicted by the wavy vertical boxes +- **Buffers** - depicted by a series of small horizontal boxes +- **Disks** - depicted by cylinders +- **Data flow** - depicted with arrows + +### Buffers + +There are a few major buffers to be aware of. +- **parents_buffer** - Holds parent node data read from disk. Data is DMA'd into the buffer by the disk. A valid bit indicates when the data is ready for consumption. +- **parent_pointers** - Holds pointers to the parent data, either to the parents buffer or to the node buffer, which acts as an in-memory cache. +- **node_buffer** - Stores hashed nodes that are ready to to be written to disk. Also acts as an in-memory cache for parent nodes, reducing the disk IO requirements. +- **sync buffers** - Various buffers, stores synchronization data to ensure data in the buffers is not deallocated until all consumers have used it. +- **local data buffers** - Small buffers between coordinators and hashers that store parent data in a cache friendly contiguous buffer. + +All of the major buffers are allocated in pinned memory using huge pages. Pinning enables data to be DMA'd over PCIe, freeing up cores to do other work. Huge pages reduces the number of TLB entries needed, dramatically reducing the TLB misses that would normally occur with a large data set. + +### Disk IO + +The foundation of the storage portion is SPDK. SPDK allows for very low level user space access to disks, eliminating system call and kernel overhead. Within those disks there are no files, simply raw blocks (pages). All sector/node data is stored this way, reading and writing in 4KB blocks at a time. This is shown on the right side of the drawing, where the `Parent Reader` and `Node Writer` threads handle the raw disk IO. Read and write requests are sent to these threads, which then service them using the attached drives. + +Also on the right is the cached `Parent graph`. Unlike sector/node data, the throughput needs are low enough that it is simply read from the linux filesystem. + +Each disk IO thread is affinitized to a dedicated core. + +### Orchestrator + +The orchestrator manages the overall data flow. It reads the parent graph, initiates node reads and writes, and manages the parent and node buffers. To keep the buffers coherent it tracks coordinator and hasher progress to make sure data in `parents_buffer` is kept around until it is no longer needed, nodes are written to disk once they are hashed, and data in the `node_buffer` can be used as an in memory cache. + +The orchestrator thread is affinitized to a dedicated core. + +### Coordinators + +Coordinators run per core complex (CCX) and place node data in the L3 cache so that it is ready for hashers to consume. Since we use the `node_buffer` as a cache the node data is fairly scattered in memory i.e. it is not easily predictable by the hardware prefetcher. As a result cache misses significantly impact performance if not managed carefully. + +The coordinators gather the node parent data from the `parents_buffer` and `parent_pointers` into a small local contiguous ring buffer that is used by the hashers. + +Each coordinator thread is affinitized to a dedicated core. + +### Hashers + +The hashers consume the data provided by the coordinators and perform the SHA hashing required to compute the node label. They write the data directly into the `node_buffer`, which then flows back out to disk for storage. + +This software hashes 2 sectors per thread, with 2 threads locked to each core using hyperthreading, resulting in 4 sectors being hashed per physical core. + +### Topology + +The system needs to be aware of the CPU caching structure to operate optimally. The image below is from `lstop` and shows the cpu core and caching structure. + +![cpu_topology](doc/cpu_topology.png "CPU Topology") + +This CPU has a 4 core complexes, each with 8 cores. The core complex has a shared L3 cache, and each core has a private L1 and L2 cache. + +For 64 sectors we assign threads as follows (keeping in mind that each `hasher` hashes two sectors): +``` +CCX Core thr0 thr1 (Hyperthread) + 0 0 node_writer + 0 1 parent_reader + 0 2 orchestrator + 0 3 coordinator 0 + 0 4 hasher 0 hasher 1 + 0 5 hasher 2 hasher 3 + 0 6 hasher 4 hasher 5 + 0 7 hasher 6 hasher 7 + 1 8 coordinator 1 + 1 9 hasher 8 hasher 9 + 1 10 hasher 10 hasher 11 + 1 11 hasher 12 hasher 13 + 1 12 hasher 14 hasher 15 + 1 13 hasher 16 hasher 17 + 1 14 hasher 18 hasher 19 + 1 15 hasher 20 hasher 21 + 2 16 coordinator 2 + 2 17 hasher 22 hasher 23 + 2 18 hasher 24 hasher 25 + 2 19 hasher 26 hasher 27 + 2 20 hasher 28 hasher 29 + 2 21 hasher 30 hasher 31 + +``` + +With this configuration each core complex has a coordinator that pulls node data from main memory into the L3 where the hasher threads use it for node labeling. + diff --git a/extern/supraseal/pc1/coordinator_t.hpp b/extern/supraseal/pc1/coordinator_t.hpp new file mode 100644 index 000000000..f9bb27aae --- /dev/null +++ b/extern/supraseal/pc1/coordinator_t.hpp @@ -0,0 +1,636 @@ +// Copyright Supranational LLC + +#ifndef __COORDINATOR_T_HPP__ +#define __COORDINATOR_T_HPP__ + +// Use a mutex to synchronize when data is not ready for hashing (as +// opposed to a spin lock). +#define MUTEX_THREAD_SYNC + +#include "../sha/sha_functions.hpp" +#include "../util/util.hpp" + +template +class coordinator_t { + typedef typename system_buffers_t::parent_ptr_batch_t parent_ptr_batch_t; + typedef typename system_buffers_t::parent_ptr_sync_batch_t parent_ptr_sync_batch_t; + + std::atomic &terminator; + system_buffers_t& system; + + // Coodinator id + size_t id; + // First sector for this coordinator + size_t start_sector; + + // Node range to hash + node_id_t node_start; + node_id_t node_stop; + + queue_stat_t num_available_nodes_stats; + counter_stat_t hasher_data_not_ready_stats; +#ifdef HASHER_TSC + uint64_t cycles_hashing; + uint64_t cycles_other; + uint64_t tsc_start_cycle; +#endif + + // Replica IDs per hashing thread + struct per_thread_t { + replica_id_buffer_t replica_id_buffer; + uint32_t* replica_id_ptrs[2]; + uint32_t* pad_0_ptr; + }; + std::vector thr_data; + + // Members for the coordinator function + thread_pool_t* pool; // Contains the pool of threads + size_t num_threads; + std::atomic coord_next_valid; // Next node ID to hash + + // Local node buffer to store the data copied from the parent + // ponters. Each batch includes all 14 parents. + struct coord_node_t { + uint32_t sectors[MAX_HASHERS_PER_COORD * NODES_PER_HASHER][NODE_WORDS]; + }; + struct coord_batch_t { + coord_node_t parents[COORD_BATCH_SIZE * PARENT_COUNT]; + }; + typedef ring_buffer_t coord_ring_t; + coord_ring_t coord_ring; + spdk_ptr_t coord_ring_storage; + // Used to count threads that have consumed each batch + std::atomic coord_done_count[COORD_BATCH_COUNT]; +#ifdef MUTEX_THREAD_SYNC + std::mutex coord_ready_mtx[COORD_BATCH_COUNT]; +#endif + + // For near parents in the current layer the data will not be ready yet + // for the coordinator to copy it. In this case coord_ring_offsets will + // store the offset from the node to be hashed for where to get the data + // in the node buffer. Otherwise it will contain zero. + parallel_node_t* coord_ring_ptrs[COORD_BATCH_COUNT][COORD_BATCH_SIZE * PARENT_COUNT]; + +public: + coordinator_t(std::atomic& _terminator, + system_buffers_t& _system, + size_t _id, // Coordinator number + size_t _start_sector, + topology_t::coordinator_t topology, + node_id_t _node_start, + node_id_t _node_stop, + replica_id_buffer_t* replica_id_buffers // One per sector + ) : + terminator(_terminator), + system(_system), + coord_next_valid(_node_start) + { + id = _id; + start_sector = _start_sector; + node_start = _node_start; + node_stop = _node_stop; + + // printf("Constructing coordinator %ld, sector %ld, num_hashers %ld\n", + // id, start_sector, topology.num_hashers); + + thr_data.resize(topology.num_hashers); + for (size_t i = 0; i < topology.num_hashers; i++) { + thr_data[i].replica_id_buffer = replica_id_buffers[i]; + thr_data[i].replica_id_ptrs[0] = thr_data[i].replica_id_buffer.ids[0]; + thr_data[i].replica_id_ptrs[1] = thr_data[i].replica_id_buffer.cur_loc[0]; + thr_data[i].pad_0_ptr = thr_data[i].replica_id_buffer.pad_0[0]; + } + +#ifdef HASHER_TSC + cycles_hashing = 0; + cycles_other = 0; +#endif + + // Set up the ring buffer for local storage + coord_ring_storage.alloc(COORD_BATCH_COUNT); + SPDK_ASSERT(coord_ring.create(coord_ring_storage)); + for (size_t i = 0; i < COORD_BATCH_COUNT; i++) { + coord_done_count[i] = 0; + } + +#ifdef MUTEX_THREAD_SYNC + for (size_t i = 0; i < COORD_BATCH_COUNT; i++) { + coord_ready_mtx[i].lock(); + } +#endif + + // Set up hashing threads. + num_threads = topology.num_hashers; + // printf("Coord %ld start_sector %ld sectors %ld num_threads %ld\n", + // id, start_sector, topology.num_sectors(), num_threads); + + pool = new thread_pool_t(num_threads); + size_t sector = start_sector; + for (size_t i = 0; i < num_threads; i++) { + int thr_core = topology.get_hasher_core(i); + pool->spawn([&, i, thr_core, sector]() { + // printf("Setting affinity for hasher %ld thread %ld to core %d. sector %ld\n", + // id, i, thr_core, sector); + set_core_affinity(thr_core); + run_thread(i, sector); + }); + sector += NODES_PER_HASHER; + } + + num_available_nodes_stats.init("hasher available", 0); + hasher_data_not_ready_stats.init("hasher_data_not_ready"); + } + + ~coordinator_t() {} + + int init() { + return 0; + } + + void clear_stats() { + num_available_nodes_stats.clear(); + hasher_data_not_ready_stats.clear(); + } + void snapshot() { + num_available_nodes_stats.snapshot(); + hasher_data_not_ready_stats.snapshot(); + } + void print() { + num_available_nodes_stats.print(); + hasher_data_not_ready_stats.print(); + } + + // Hash one node + void hash(node_id_t node, coord_node_t* parent_nodes, + parallel_node_t** local_ptrs, parallel_node_t* hash_out, + size_t offset_id, size_t thread_id, per_thread_t* replicaId +#ifdef VERIFY_HASH_RESULT + , uint32_t* sealed_data +#endif + ) { + // Update the location in replica ID + uint32_t cur_layer = node.layer() + 1; + replicaId->replica_id_buffer.cur_loc[0][0] = cur_layer; + replicaId->replica_id_buffer.cur_loc[0 + 1][0] = cur_layer; + replicaId->replica_id_buffer.cur_loc[0][2] = node.node(); + replicaId->replica_id_buffer.cur_loc[0 + 1][2] = node.node(); + + // Create parent pointers for the hasher + parent_ptr_batch_t ptr_batch; + assert (PARENT_PTR_BATCH_SIZE == PARENT_COUNT); + for (size_t i = 0; i < PARENT_COUNT; i++) { + if (local_ptrs[i] != nullptr) { + ptr_batch.batch[i].ptr = (parallel_node_t*)&local_ptrs[i]->sectors[offset_id]; + } else { + ptr_batch.batch[i].ptr = (parallel_node_t*)&parent_nodes[i].sectors[thread_id * NODES_PER_HASHER]; + } + } + + parallel_node_t** cur_parent_ptr = &ptr_batch.batch[0].ptr; + uint32_t** cur_data_buf; + size_t blocks; + size_t repeat; + if (node.node() == 0) { + cur_data_buf = &replicaId->pad_0_ptr; + blocks = NODE_0_BLOCKS; + repeat = NODE_0_REPEAT; + } else { + cur_data_buf = (uint32_t**)cur_parent_ptr; + blocks = NODE_GT_0_BLOCKS; + if (cur_layer == 1) + repeat = LAYER_1_REPEAT; + else + repeat = LAYERS_GT_1_REPEAT; + } + + // // { + // if (node.node() == 0) { + // unique_lock lck(print_mtx); + // printf("blocks %ld, repeat %ld\n", blocks, repeat); + // for (size_t j = 0; j < PARENT_PTR_BATCH_SIZE; j++) { + // //printf("Printing parent %ld\n", j); + // if (ptr_batch.batch[j].ptr == nullptr) { + // printf("H %ld T %ld N %8lx.%2x P %02ld: nullptr\n", + // offset_id, thread_id, node.id(), node.node(), j); + // } else { + // char prefix[64]; + // snprintf(prefix, 64, "H %ld N %ld %8lx.%2d P %02ld ", + // offset_id, thread_id, node.id(), node.node(), j); + // //size_t node_idx = batch->batch[j].node % C::NODES_PER_PAGE; + // print_node(ptr_batch.batch[j].ptr, 2, prefix); + // } + // //printf("Received data for buf %d\n", io->buf_id); + // } + // } + + for (size_t j = 0; j < PARENT_PTR_BATCH_SIZE; j++) { + __builtin_prefetch(&ptr_batch.batch[j].ptr->sectors[0].limbs[0], 0, 3); + __builtin_prefetch(&ptr_batch.batch[j].ptr->sectors[0 + 1].limbs[0], 0, 3); + } + +#ifdef HASHER_TSC + uint64_t tsc = get_tsc(); + cycles_other += (tsc - tsc_start_cycle); + tsc_start_cycle = tsc; +#endif + + // Hash the node + sha_ext_mbx2(&hash_out->sectors[offset_id].limbs[0], + &(replicaId->replica_id_ptrs[0]), cur_data_buf, + 0, blocks, repeat); + +#ifdef HASHER_TSC + tsc = get_tsc(); + cycles_hashing += (tsc - tsc_start_cycle); + tsc_start_cycle = tsc; +#endif + + // // Periodically print the hash result + // if (offset_id == 0 && thread_id == 0 && + // (node.node() & ((NODE_COUNT / 4) - 1)) == 0) { + // unique_lock lck(print_mtx); + // printf("H %3ld T %3ld node %08lx hasher out %p: ", + // offset_id, thread_id, node.id(), &hash_out->sectors[offset_id].limbs[0]); + // print_digest_reorder(&hash_out->sectors[offset_id].limbs[0]); + // } + +#ifdef VERIFY_HASH_RESULT + if (offset_id == 18 && + (htonl(hash_out->sectors[offset_id][0]) != sealed_data[node.node() * NODE_WORDS] || + htonl(hash_out->sectors[offset_id + 1][0]) != sealed_data[node.node() * NODE_WORDS])) { + unique_lock lck(print_mtx); + + printf("\nMISMATCH: Hasher %ld thr %ld Node %x layer %d id %lx thr_offset_id %ld hash_out->sectors %p %p\n", + id, thread_id, node.node(), node.layer() + 1, node.id(), + offset_id, + hash_out->sectors, hash_out->sectors[offset_id]); + print_digest_reorder(hash_out->sectors[offset_id]); + print_digest_reorder(hash_out->sectors[offset_id + 1]); + printf("Expected:\n"); + print_digest_reorder(&sealed_data[node.node() * NODE_WORDS]); + + printf("blocks %ld, repeat %ld\n", blocks, repeat); + for (size_t j = 0; j < PARENT_PTR_BATCH_SIZE; j++) { + //printf("Printing parent %ld\n", j); + if (ptr_batch.batch[j].ptr == nullptr) { + printf("H %ld T %ld N %8lx.%2x P %02ld: nullptr\n", + offset_id, thread_id, node.id(), node.node(), j); + } else { + char prefix[64]; + snprintf(prefix, 64, "H %ld N %ld %8lx.%2d P %02ld ", + offset_id, thread_id, node.id(), node.node(), j); + //size_t node_idx = batch->batch[j].node % C::NODES_PER_PAGE; + //print_node((parallel_node_t*)ptr_batch.batch[j].ptr->sectors[0], 2, prefix); + printf("%s %p: ", prefix, ptr_batch.batch[j].ptr->sectors[0]); + print_digest_reorder(ptr_batch.batch[j].ptr->sectors[0]); + printf("%s %p: ", prefix, ptr_batch.batch[j].ptr->sectors[1]); + print_digest_reorder(ptr_batch.batch[j].ptr->sectors[1]); + + } + //printf("Received data for buf %d\n", io->buf_id); + } + printf("H %ld T %ld node %08lx hasher out %p: ", + offset_id, thread_id, node.id(), hash_out->sectors[offset_id]); + print_digest_reorder(hash_out->sectors[offset_id]); + + + sleep(5); + abort(); + } +#endif + } + + // Run a hashing thread + // thr_count - thread count within this coordinator + // sector - sector to hash + __attribute__ ((noinline)) void run_thread(size_t thr_count, size_t sector) { + // Absolute node count + node_id_t thr_node = node_start; + // Index into local coordinator ring buffer + size_t coord_idx = 0; + // Index into node buffer + size_t node_idx = 0; + // The sector offset within buffers + size_t offset_id = sector; + // Count the number of times data is not ready + size_t data_not_ready = 0; + // Mechanism to reset counters after starting to clear startup noise + bool data_reset = false; + +#ifdef VERIFY_HASH_RESULT + // The CWD should contain a symlink to a cached benchy run to check results + const char* sealed_file_template = "../cache_benchy_run_32G/sc-02-data-layer-%d.dat"; + int sealed_data_fd = 0; + uint32_t* sealed_data = nullptr; +#endif + +#ifdef HASHER_TSC + tsc_start_cycle = get_tsc(); +#endif + + per_thread_t* replicaId = &thr_data[thr_count]; + + // { + // unique_lock lck(print_mtx); + + // printf("Starting hasher %ld thread %ld offset_id %ld\n", id, thr_count, offset_id); + // print_digest(&(replicaId->replica_id_buffer.ids[0][0])); + // print_digest(&(replicaId->replica_id_buffer.cur_loc[0][0])); + // print_digest(&(replicaId->replica_id_buffer.pad_0[0][0])); + // print_digest(&(replicaId->replica_id_buffer.pad_1[0][0])); + // print_digest(&(replicaId->replica_id_buffer.padding[0][0])); + // printf("\n"); + // } + + while (thr_node < node_stop) { + // Wait for the next node to be ready + uint64_t valid_nodes; + valid_nodes = coord_next_valid.load(DEFAULT_MEMORY_ORDER); + if (valid_nodes <= thr_node.id()) { + data_not_ready++; + +#ifdef MUTEX_THREAD_SYNC + coord_ready_mtx[coord_idx].lock(); + coord_ready_mtx[coord_idx].unlock(); +#endif + + while ((valid_nodes = coord_next_valid.load(DEFAULT_MEMORY_ORDER)) <= thr_node.id()) {} + } + + // Clear counters after startup + if (valid_nodes > (node_start + C::GetNumNodes() / 128) && !data_reset) { + data_not_ready = 0; +#ifdef HASHER_TSC + cycles_other = 0; + cycles_hashing = 0; +#endif + data_reset = true; + } + + while (thr_node.id() < valid_nodes) { + coord_node_t* parent_batch = coord_ring.get_entry(coord_idx)->parents; + parallel_node_t** local_ptr_batch = coord_ring_ptrs[coord_idx]; + +#ifdef VERIFY_HASH_RESULT + if (thr_node.node() == 0) { + if (sealed_data_fd != 0) { + munmap(sealed_data, C::GetSectorSize()); + close(sealed_data_fd); + } + char sealed_file_name[256]; + uint32_t cur_layer = thr_node.layer() + 1; + sprintf(sealed_file_name, sealed_file_template, cur_layer); + sealed_data_fd = open(sealed_file_name, O_RDONLY); + assert (sealed_data_fd != -1); + sealed_data = (uint32_t*)mmap(NULL, C::GetSectorSize(), PROT_READ, + MAP_PRIVATE, sealed_data_fd, 0); + if (sealed_data == MAP_FAILED) { + perror("mmap"); + exit(1); + } + assert (sealed_data != MAP_FAILED); + } +#endif + + for (size_t i = 0; i < COORD_BATCH_SIZE; i++) { + size_t node_in_page = thr_node.node() % C::NODES_PER_PAGE; + coord_node_t* parent_nodes = &parent_batch[i * PARENT_COUNT]; + parallel_node_t* hash_out = &system.node_buffer.get_entry(node_idx)-> + batch[0].parallel_nodes[node_in_page]; + hash(thr_node, parent_nodes, + &local_ptr_batch[i * PARENT_COUNT], + hash_out, offset_id, thr_count, replicaId +#ifdef VERIFY_HASH_RESULT + , sealed_data +#endif + ); + + thr_node++; + if (thr_node.node() % C::NODES_PER_PAGE == 0) { + node_idx = system.node_buffer.incr(node_idx); + } + } + + // Indicate that we're done + coord_done_count[coord_idx].fetch_add(1, DEFAULT_MEMORY_ORDER); + coord_idx = coord_ring.incr(coord_idx); + } + } +// #ifdef HASHER_TSC +// printf("Hasher %ld thr %ld: data_not_ready %ld, cycles_hashing %lu, cycles_other %lu\n", +// id, thr_count, data_not_ready, cycles_hashing, cycles_other); +// #else +// printf("Hasher %ld thr %ld: data_not_ready %ld\n", +// id, thr_count, data_not_ready); +// #endif + } + + // Perform the coordinator functions + int run() { + // Absolute node count + node_id_t node(node_start); + // Completed node count + node_id_t completed_node(node_start); + + // Index into parent_buffer + size_t idx = 0; + // Index into node buffer + size_t node_idx = 0; + // The offset into buffers + size_t offset_id = start_sector; + + // Number of batches between updates back to the storage core + const size_t NODE_COUNT_UPDATE_INTERVAL = 4; + size_t node_update_batch_count = 0; + + size_t data_not_ready = 0; + + //printf("Starting coord %ld thread %d offset_id %ld\n", id, 0, offset_id); + + timestamp_t layer_start = std::chrono::high_resolution_clock::now(); + +#ifdef PRINT_STALLS + size_t no_action_run_length = 0; +#endif + + // Node count within the coordinator local data batch + size_t coord_batch_count = 0; + // Index in the local data ring buffer + size_t coord_batch_idx = 0; + coord_batch_t* current_batch = coord_ring.reserve(coord_batch_idx); + parallel_node_t** coord_ring_ptrs_batch = coord_ring_ptrs[coord_batch_idx]; + + while(node < node_stop) { + bool advanced = false; + bool done = false; + + bool cur_valid = system.parent_buffer.is_valid(idx); + while (!done && cur_valid) { + +#ifdef STATS + // Periodically count the number of available nodes in the parent buffer + if (id == 0 && (node.id() & STATS_MASK) == 0) { + size_t count = 0; + size_t count_idx = idx; + size_t cur_head = system.parent_buffer.get_head(); + while (system.parent_buffer.is_valid(count_idx) && count_idx != cur_head) { + count_idx = system.parent_buffer.incr(count_idx); + count++; + } + num_available_nodes_stats.record(count); + } +#endif + // Precompute next indices where the instructions can be hidden + size_t next_idx = system.parent_buffer.incr(idx); + cur_valid = system.parent_buffer.is_valid(next_idx); + size_t next_node_idx = system.node_buffer.incr(node_idx); + done = node.id() + 1 == node_stop.id(); + + if (node.node() == 0 && node.layer() > 0) { + if (id == 0) { + timestamp_t stop = std::chrono::high_resolution_clock::now(); + uint64_t secs = std::chrono::duration_cast< + std::chrono::seconds>(stop - layer_start).count(); + printf("Layer took %ld seconds\n", secs); + layer_start = stop; + } + } + + // Copy the parent pointer data into the local buffer + parent_ptr_batch_t* ptr_batch = &system.parent_ptrs[idx]; + parent_ptr_sync_batch_t* ptr_sync_batch = &system.parent_ptr_syncs[idx]; + for (size_t j = 0; j < PARENT_COUNT; j++) { + coord_node_t* dst = ¤t_batch->parents[coord_batch_count * PARENT_COUNT + j]; + parallel_node_t* src = ptr_batch->batch[j].ptr; + + if (src == nullptr) { + // Do nothing + coord_ring_ptrs_batch[coord_batch_count * PARENT_COUNT + j] = nullptr; + } else if (ptr_sync_batch->batch[j].node_buffer_idx == parent_ptr_sync_t::LOCAL_NODE) { + // Send the offset down to the hashers + coord_ring_ptrs_batch[coord_batch_count * PARENT_COUNT + j] = src; + } else { + memcpy(dst, &src->sectors[offset_id], num_threads * NODES_PER_HASHER * NODE_SIZE); + coord_ring_ptrs_batch[coord_batch_count * PARENT_COUNT + j] = nullptr; + } + } + + // // Print the input data + // if (id == 1 && node.id() == 0x115f9a05) { + // unique_lock lck(print_mtx); + // parent_ptr_sync_batch_t* ptr_sync_batch = &system.parent_ptr_syncs[idx]; + // printf("\nCoordinator printing parents\n"); + // for (size_t j = 0; j < PARENT_PTR_BATCH_SIZE; j++) { + // if (ptr_batch->batch[j].ptr == nullptr) { + // printf("C N %8lx.%2x P %02ld: nullptr\n", node.id(), node.node(), j); + // } else if (ptr_sync_batch->batch[j].node_buffer_idx == parent_ptr_sync_t::LOCAL_NODE) { + // // Send the offset down to the hashers + // printf("C N %8lx.%2x P %02ld: local_node\n", node.id(), node.node(), j); + // } else { + // char prefix[64]; + // snprintf(prefix, 64, "C N %8lx.%2d P %02ld $ Nidx %8x %p: ", node.id(), node.node(), j, + // ptr_sync_batch->batch[j].node_buffer_idx, ptr_batch->batch[j].ptr->sectors[18]); + // //size_t node_idx = batch->batch[j].node % C::NODES_PER_PAGE; + // //print_node(ptr_batch->batch[j].ptr, 2, prefix); + // printf("%s\n", prefix); + // print_digest_reorder(ptr_batch->batch[j].ptr->sectors[18]); + // print_digest_reorder(ptr_batch->batch[j].ptr->sectors[19]); + // } + // //printf("Received data for buf %d\n", io->buf_id); + // } + // } + + // Once the batch is completed make it available to the hashers + coord_batch_count++; + if (coord_batch_count == COORD_BATCH_SIZE) { +#ifdef MUTEX_THREAD_SYNC + coord_ready_mtx[coord_batch_idx].unlock(); +#endif + coord_next_valid += COORD_BATCH_SIZE; + // Recover a buffer if needed + while (coord_ring.is_full()) { + if (coord_done_count[coord_ring.get_tail()] == num_threads) { + coord_done_count[coord_ring.get_tail()] = 0; +#ifdef MUTEX_THREAD_SYNC + coord_ready_mtx[coord_ring.get_tail()].lock(); +#endif + coord_ring.release(); + completed_node += COORD_BATCH_SIZE; + break; + } + } + current_batch = coord_ring.reserve(coord_batch_idx); + assert (current_batch != nullptr); + coord_ring_ptrs_batch = coord_ring_ptrs[coord_batch_idx]; + + coord_batch_count = 0; + node_update_batch_count++; + } + + // Advance to the next node + advanced = true; + node++; + idx = next_idx; + if (node.node() % C::NODES_PER_PAGE == 0) { + node_idx = next_node_idx; + } + if (node_update_batch_count == NODE_COUNT_UPDATE_INTERVAL) { + // Batch update - this is expensive. + system.coordinator_node[id]->store(completed_node); + node_update_batch_count = 0; + } + } + if (advanced) { +#ifdef PRINT_STALLS + no_action_run_length = 0; +#endif + } else { + assert (cur_valid == false); + hasher_data_not_ready_stats.record(); + data_not_ready++; +#ifdef PRINT_STALLS + no_action_run_length++; + if (no_action_run_length == 1024) { + printf("Hasher detected stall node %lu, node_idx %lu, parent_buf_valid %d %p\n", node.id(), idx, + system.parent_buffer.is_valid(idx), system.parent_buffer.get_valid_ptr(idx)); + } +#endif + } + } + // Wait for the remaining batches to finish + while(completed_node < node_stop) { + assert (coord_ring.size() > 0); + while (coord_done_count[coord_ring.get_tail()] < num_threads) {} + coord_done_count[coord_ring.get_tail()] = 0; + coord_ring.release(); + completed_node += COORD_BATCH_SIZE; + } + + // printf("Coordinator %ld: data_not_ready %ld\n", id, data_not_ready); + system.coordinator_node[id]->store(completed_node); + + if (id == 0) { + timestamp_t stop = std::chrono::high_resolution_clock::now(); + uint64_t secs = std::chrono::duration_cast< + std::chrono::seconds>(stop - layer_start).count(); + printf("Layer took %ld seconds\n", secs); + } + + return 0; + } +}; + +template +inline void coordinator_clear_stats(coordinator_t* coordinator) { + coordinator->clear_stats(); +} +template +inline void coordinator_snapshot(coordinator_t* coordinator) { + coordinator->snapshot(); +} +template +inline void coordinator_print(coordinator_t* coordinator) { + coordinator->print(); +} + +#endif diff --git a/extern/supraseal/pc1/doc/architecture.png b/extern/supraseal/pc1/doc/architecture.png new file mode 100644 index 000000000..04b63f7e2 Binary files /dev/null and b/extern/supraseal/pc1/doc/architecture.png differ diff --git a/extern/supraseal/pc1/doc/cpu_topology.png b/extern/supraseal/pc1/doc/cpu_topology.png new file mode 100644 index 000000000..daa938cf4 Binary files /dev/null and b/extern/supraseal/pc1/doc/cpu_topology.png differ diff --git a/extern/supraseal/pc1/doc/layout_across_controllers.png b/extern/supraseal/pc1/doc/layout_across_controllers.png new file mode 100644 index 000000000..9bb28e736 Binary files /dev/null and b/extern/supraseal/pc1/doc/layout_across_controllers.png differ diff --git a/extern/supraseal/pc1/doc/layout_within_a_page.png b/extern/supraseal/pc1/doc/layout_within_a_page.png new file mode 100644 index 000000000..9897093e4 Binary files /dev/null and b/extern/supraseal/pc1/doc/layout_within_a_page.png differ diff --git a/extern/supraseal/pc1/node_rw_t.hpp b/extern/supraseal/pc1/node_rw_t.hpp new file mode 100644 index 000000000..39f7b75cd --- /dev/null +++ b/extern/supraseal/pc1/node_rw_t.hpp @@ -0,0 +1,254 @@ +// Copyright Supranational LLC + +#ifndef __NODE_RW_T_HPP__ +#define __NODE_RW_T_HPP__ + +#include "../util/stats.hpp" + +/////////////////////////////////////////////////////////////////////////// +// +// On disk node storage and addressing +// +// | block | +// | n0 | n1 | n2 | n3 | +// ctrl0 | 00 | 01 | 02 | 03 | 16 | 17 | 18 | 19 | +// ctrl1 | 04 | 05 | 06 | 07 | ... +// ctrl2 | 08 | 09 | 10 | 11 | +// ctrl3 | 12 | 13 | 14 | 15 | +// +// block = node / NODES_PER_PAGE +// ctrl = block % NUM_CTRLS +// +/////////////////////////////////////////////////////////////////////////// + +template +inline void nvme_node_indexes(size_t num_controllers, size_t node, + size_t &ctrl_id, size_t &block_on_controller) { + size_t block = node / C::NODES_PER_PAGE; + block_on_controller = block / num_controllers; + ctrl_id = block - block_on_controller * num_controllers; +} + +// Process read/write IO requests +// Templated with a config and batch_t specialization +template +class node_rw_t { + std::atomic& terminator; + nvme_controllers_t& controllers; + +private: + // FIFO of requests + mt_fifo_t& parent_read_fifo; + // Qpair to use for reads/writes + size_t qpair_id; + // Block offset for all reads/writes + size_t block_offset; + + // Stats counters + uint64_t ios_issued; + uint64_t ios_completed; + uint64_t parent_read_fifo_empty; + uint64_t disk_queues_full; + + queue_stat_t parent_read_fifo_avail_stats; + queue_stat_t min_free_stats; + queue_stat_t num_issue_stats; + +public: + node_rw_t(std::atomic& _terminator, + nvme_controllers_t& _controllers, + mt_fifo_t& _parent_read_fifo, + size_t _qpair_id, + size_t _block_offset + ): + terminator(_terminator), + controllers(_controllers), + parent_read_fifo(_parent_read_fifo) + { + qpair_id = _qpair_id; + block_offset = _block_offset; + } + + void cleanup() { + } + + int init() { + parent_read_fifo_avail_stats.init("r:rw_fifo_avail", 0); + min_free_stats.init("r:min_free", 0); + num_issue_stats.init("r:num_issue", 0); + return 0; + } + void clear_stats() { + parent_read_fifo_avail_stats.clear(); + min_free_stats.clear(); + num_issue_stats.clear(); + } + void snapshot() { + parent_read_fifo_avail_stats.snapshot(); + min_free_stats.snapshot(); + num_issue_stats.snapshot(); + } + void print() { + parent_read_fifo_avail_stats.print(); + min_free_stats.print(); + num_issue_stats.print(); + } + +private: + static int completion_cb(void *arg) { + node_io_t* io = (node_io_t *)arg; + // Set the valid bit + io->valid->fetch_add(1, DEFAULT_MEMORY_ORDER); + return 0; + } + +public: + // Process IO requests + int process(size_t idle_sleep = 0, size_t duty_cycle = 0) { + // Reset stats + ios_issued = 0; + ios_completed = 0; + parent_read_fifo_empty = 0; + disk_queues_full = 0; + + // Data collection + size_t outstanding_counters[controllers.size()]; + size_t total_counters[controllers.size()]; + size_t samples = 0; + const size_t interval = 250; + size_t delay_count = 0; + for (size_t ctrl_id = 0; ctrl_id < controllers.size(); ctrl_id++) { + outstanding_counters[ctrl_id] = 0; + total_counters[ctrl_id] = 0; + } + + size_t iter_count = 0; + + // Run + while (!terminator) { + // Track whether we are able to do any work + bool dequed_any = false; + + // Determine open disk io slots so we know we can dispatch + size_t min_free = nvme_controller_t::queue_size - + controllers[0].get_outstanding_io_ops(qpair_id); + for (size_t ctrl_id = 1; ctrl_id < controllers.size(); ctrl_id++) { + size_t free_slots = nvme_controller_t::queue_size - + controllers[ctrl_id].get_outstanding_io_ops(qpair_id); + min_free = std::min(min_free, free_slots); + } + // min_free = best_disk_free; + if (delay_count == interval) { + for (size_t ctrl_id = 0; ctrl_id < controllers.size(); ctrl_id++) { + outstanding_counters[ctrl_id] += + controllers[ctrl_id].get_outstanding_io_ops(qpair_id); + } + samples++; + delay_count = 0; + } + delay_count++; + + + // Determine the number of batches to process + size_t available = parent_read_fifo.size(); + size_t num_batches = std::min(min_free / B::BATCH_SIZE, available); + + parent_read_fifo_avail_stats.record(available); + min_free_stats.record(min_free); + num_issue_stats.record(num_batches); + + for (size_t i = 0; i < num_batches; i++) { + B* req_batch = parent_read_fifo.dequeue(); + assert (req_batch != nullptr); + dequed_any = true; + + for (size_t j = 0; j < B::BATCH_SIZE; j++) { + node_io_t* req = &req_batch->batch[j]; + nvme_io_tracker_t* io = &req->tracker; + + if (req->type == node_io_t::type_e::NOP) { + // do nothing + continue; + } + + // Compute the strided index on disk + size_t ctrl_id, strided_block; + nvme_node_indexes(controllers.size(), req->node, ctrl_id, strided_block); + + // Initiate the IO + // printf("%s node %lx strided_block %lx ctrl %ld qpair %ld\n", + // req->type == node_io_t::type_e::READ ? "Reading" : "Writing", + // req->node, strided_block, ctrl_id, qpair_id); + if (req->type == node_io_t::type_e::READ) { + SPDK_ERROR(controllers[ctrl_id].read(io, 0, qpair_id, strided_block + block_offset, + completion_cb, req)); + } else { + SPDK_ERROR(controllers[ctrl_id].write(io, 0, qpair_id, strided_block + block_offset, + completion_cb, req)); + } + ios_issued++; + total_counters[ctrl_id]++; + } + } + if (min_free < B::BATCH_SIZE) { + disk_queues_full++; + } else if (!dequed_any) { + parent_read_fifo_empty++; + } + + // Process completions + size_t ios_completed_now = 0; + for (size_t ctrl_id = 0; ctrl_id < controllers.size(); ctrl_id++) { + ios_completed_now += controllers[ctrl_id].process_completions(qpair_id); + } + ios_completed += ios_completed_now; + + if ((!dequed_any && ios_completed_now == 0 && idle_sleep > 0) || + iter_count == duty_cycle) { + usleep(idle_sleep); + iter_count = 0; + } + iter_count++; + } + for (size_t ctrl_id = 0; ctrl_id < controllers.size(); ctrl_id++) { + ios_completed += controllers[ctrl_id].process_all_completions(qpair_id); + } + + // { + // unique_lock lck(print_mtx); + + // print_stats(); + + // printf("Average outstanding ops %ld samples\n", samples); + // for (size_t ctrl_id = 0; ctrl_id < controllers.size(); ctrl_id++) { + // printf(" %ld: %0.2lf\n", ctrl_id, + // (double)outstanding_counters[ctrl_id] / (double)samples); + // } + // } + // printf("Total ops\n"); + // for (size_t ctrl_id = 0; ctrl_id < controllers.size(); ctrl_id++) { + // printf(" %ld: %ld\n", ctrl_id, total_counters[ctrl_id]); + // } + + return 0; + } + void print_stats() { + printf("node_rw_t ios issued %ld completed %ld parent_read_fifo_empty %ld disk_queues_full %ld\n", + ios_issued, ios_completed, parent_read_fifo_empty, disk_queues_full); + } +}; + +template +inline void rw_clear_stats(node_rw_t *rw) { + rw->clear_stats(); +} +template +inline void rw_snapshot(node_rw_t *rw) { + rw->snapshot(); +} +template +inline void rw_print(node_rw_t *rw) { + rw->print(); +} + +#endif diff --git a/extern/supraseal/pc1/orchestrator_t.hpp b/extern/supraseal/pc1/orchestrator_t.hpp new file mode 100644 index 000000000..69970c1ac --- /dev/null +++ b/extern/supraseal/pc1/orchestrator_t.hpp @@ -0,0 +1,845 @@ +// Copyright Supranational LLC + +#ifndef __ORCHESTRATOR_T_HPP__ +#define __ORCHESTRATOR_T_HPP__ + +template +class orchestrator_t { + typedef typename system_buffers_t::node_io_batch_t node_io_batch_t; + typedef typename system_buffers_t::node_batch_t node_batch_t; + typedef typename system_buffers_t::node_buffer_iterator_t node_buffer_iterator_t; + typedef typename system_buffers_t::page_batch_ptr_t page_batch_ptr_t; + typedef typename system_buffers_t::node_batch_ptr_t node_batch_ptr_t; + + // Nodes to process + node_id_t node_start; + node_id_t node_stop; + + // Terminator to shut down threads + std::atomic &terminator; + + // Reference to the system class + system_buffers_t& system; + + // Record the previously hashed node for setting up parent pointers + parallel_node_t* prev_hashed_node; + + // Absolute count of node buffer tail node + node_id_t tail_node; + // Tail node of the parent node buffer + node_id_t tail_node_parents; + // The oldest node fully written to disk + node_buffer_iterator_t tail_node_write; + // The next node to write to disk + node_buffer_iterator_t head_node_write; + // The oldest node being hashed by the hashers + node_id_t min_node_hash; + + // Stat counters + uint64_t cached_parents; + uint64_t reads_issued; + uint64_t writes_issued; + uint64_t parent_read_fifo_full; + uint64_t io_req_pool_empty; + uint64_t parent_buffer_full; + uint64_t node_buffer_full; + uint64_t from_hashers_empty; + uint64_t no_action; + uint64_t node_write_fifo_full; + + // TSC +#ifdef TSC + uint64_t tsc_start; + uint64_t loop_cycles; + uint64_t parent_cycles; + uint64_t min_node_cycles; + uint64_t read_batch_cycles; + uint64_t write_cycles; + uint64_t release_parent_cycles; + uint64_t advance_written_cycles; + uint64_t advance_tail_cycles; + uint64_t noaction_cycles; + uint64_t rb_check_buf_cycles; + uint64_t rb_reserve_buf_cycles; + uint64_t rb_special_case_cycles; + uint64_t rb_cache_params_cycles; + uint64_t rb_parents_cycles; + uint64_t rb_send_cycles; +#endif + + int parents_fd; + uint32_t* parents_buf; + parent_iter_t parent_iter; + +public: + orchestrator_t(std::atomic &_terminator, + system_buffers_t& _system, + node_id_t _node_start, + node_id_t _node_stop, + const char* parents_file) : + terminator(_terminator), + system(_system), + tail_node(_node_start), + tail_node_parents(_node_start), + tail_node_write(_node_start), + head_node_write(_node_start), + parent_iter(_node_start) + { + prev_hashed_node = nullptr; + node_start = _node_start; + node_stop = _node_stop; + + printf("Opening parents file %s\n", parents_file); + parents_fd = open(parents_file, O_RDONLY); + if (parents_fd == -1) { + printf("Could not open parents file %s\n", parents_file); + exit(1); + } + struct stat statbuf; + fstat(parents_fd, &statbuf); + if ((size_t)statbuf.st_size != parent_iter_t::bytes(C::GetNumNodes())) { + printf("Found size %ld bytes for parents file %s. Expected %ld bytes.\n", + statbuf.st_size, parents_file, parent_iter_t::bytes(C::GetNumNodes())); + exit(1); + } + + parents_buf = (uint32_t*)mmap(NULL, parent_iter_t::bytes(C::GetNumNodes()), + PROT_READ, MAP_PRIVATE, parents_fd, 0); + if (parents_buf == MAP_FAILED) { + perror("mmap failed for parents file"); + exit(1); + } + if (((uintptr_t)parents_buf & 0xFFF) != 0) { + printf("Error: parents buffer is not page aligned\n"); + exit(1); + } + parent_iter.set_buf(parents_buf); + +#ifdef TSC + loop_cycles = 0; + parent_cycles = 0; + min_node_cycles = 0; + read_batch_cycles = 0; + write_cycles = 0; + release_parent_cycles = 0; + advance_written_cycles = 0; + advance_tail_cycles = 0; + noaction_cycles = 0; + rb_check_buf_cycles = 0; + rb_reserve_buf_cycles = 0; + rb_special_case_cycles = 0; + rb_cache_params_cycles = 0; + rb_parents_cycles = 0; + rb_send_cycles = 0; +#endif + } + + ~orchestrator_t() { + munmap(parents_buf, parent_iter_t::bytes(C::GetNumNodes())); + close(parents_fd); + } + + int init() { + return 0; + } + + void print_state() { + printf("System tail_node %lu, tail_node_parents %lu, tail_node_write %lu, " + "head_node_write %lu, min_node_hash %lu, head_node %lu\n", + tail_node.id(), tail_node_parents.id(), tail_node_write.abs().id(), + head_node_write.abs().id(), min_node_hash.id(), parent_iter.get_node().id()); + } + + // Issue a batch of reads. A batch in this case is PAGE_BATCH_SIZE sized + // and consumes one node and all parents, which are sent as a batch to the IO. + __attribute__ ((noinline)) + size_t read_batch(node_id_t min_hash_node, node_buffer_iterator_t& tail_node_write, + bool &advanced, size_t remaining_nodes) { + // Maximum number of reads we will do + const size_t MAX_READ_BATCH = C::NODES_PER_PAGE * 2; + // To simplify the logic always read on page boundaries. This way we don't + // have to manage pages with some nodes hashed and some not. + const size_t BATCH_INCREMENT = C::NODES_PER_PAGE; + +#ifdef TSC + uint64_t tsc; + uint64_t tsc_start = get_tsc(); +#endif + + advanced = false; + + //////////////////////////////////////////////////////////// + // Determine how many nodes to read + //////////////////////////////////////////////////////////// + + size_t parent_read_fifo_free = system.parent_read_fifo.free_count(); + size_t node_buffer_free = system.node_buffer.free_count(); + size_t parent_buffer_free = system.parent_buffer.free_count(); + + // Update event counters + if (parent_read_fifo_free < MAX_READ_BATCH) { + parent_read_fifo_full++; + system.parent_read_fifo_full_stats.record(); + } + if (node_buffer_free * C::NODES_PER_PAGE < MAX_READ_BATCH) { + node_buffer_full++; + system.node_buffer_full_stats.record(); + } + if (parent_buffer_free < MAX_READ_BATCH) { + parent_buffer_full++; + system.parent_buffer_full_stats.record(); + } + + // Determine the number of batches to read + size_t min_free = std::min(parent_read_fifo_free, node_buffer_free * C::NODES_PER_PAGE); + min_free = std::min(min_free, parent_buffer_free); + size_t max_batch = std::min(min_free, remaining_nodes); + max_batch = std::min(max_batch, MAX_READ_BATCH); + // Round down to the increment size + max_batch &= ~(BATCH_INCREMENT - 1); + + if (max_batch == 0) { + return 0; + } + +#ifdef TSC + tsc = get_tsc(); + rb_check_buf_cycles += tsc - tsc_start; + tsc_start = tsc; +#endif + + //////////////////////////////////////////////////////////// + // Reserve buffer entries + //////////////////////////////////////////////////////////// + + // Reserve all parent buffers + size_t parent_buffer_id; + page_batch_ptr_t page_batch_ptrs[max_batch]; + system.parent_buffer.reserve_batch_nocheck(max_batch, parent_buffer_id, page_batch_ptrs); + + // Reserve node buffer entries + size_t num_node_buffers = max_batch / C::NODES_PER_PAGE; + node_batch_ptr_t node_batch_ptrs[num_node_buffers]; + size_t cur_node_buffer_id; + system.node_buffer.reserve_batch_nocheck(num_node_buffers, cur_node_buffer_id, node_batch_ptrs); + +#ifdef TSC + tsc = get_tsc(); + rb_reserve_buf_cycles += tsc - tsc_start; + tsc_start = tsc; +#endif + + //////////////////////////////////////////////////////////// + // Determine caching parameters + //////////////////////////////////////////////////////////// + + // To determine if we can reference cached data we need to know if the parent + // is in the node buffer. + // Don't cache nodes less than the parent buffer cache size from the tail or it can + // prevent forward progress with reading nodes. + size_t cache_skid_size = PARENT_BUFFER_BATCHES; + + // Number of nodes in the buffer + node_id_t node = parent_iter.get_node(); // node we're reading parents for + size_t node_buffer_count = system.node_buffer.size() * C::NODES_PER_PAGE - max_batch; + node_id_t cache_min_node = node - node_buffer_count; + node_id_t cache_min_cacheable_node = + std::min(node_id_t(cache_min_node + cache_skid_size), // Keep space from the tail + // Always reference the cache for nodes not yet written to disk + tail_node_write.abs()); + + // printf("Reading parents for node %lx: node_buffer_count %ld cache_min_node %08lx " + // "cache_min_cacheable_node %08lx, head_node_write %lx\n", + // node.id(), node_buffer_count, cache_min_node.id(), cache_min_cacheable_node.id(), + // tail_node_write.abs().id()); + +#ifdef TSC + tsc = get_tsc(); + rb_cache_params_cycles += tsc - tsc_start; + tsc_start = tsc; +#endif + + //////////////////////////////////////////////////////////// + // Process the nodes + //////////////////////////////////////////////////////////// + size_t total_reads_issued = 0; + for (size_t i = 0; i < max_batch; + i++, parent_buffer_id = system.parent_buffer.incr(parent_buffer_id)) { + + node_id_t node = parent_iter.get_node(); // node we're reading parents for + size_t node_in_node_buffer_page = node.node() % C::NODES_PER_PAGE; + parallel_node_t* cur_node_buffer = + &node_batch_ptrs[i / C::NODES_PER_PAGE]->batch[0].parallel_nodes[node_in_node_buffer_page]; + if (i > 0 && node_in_node_buffer_page == 0) { + cur_node_buffer_id = system.node_buffer.incr(cur_node_buffer_id); + } + + // Get the parent pointers for this batch + typename system_buffers_t::page_batch_t* page_batch = + page_batch_ptrs[i]; + // Batch of IO requests going to disks + typename system_buffers_t::page_io_batch_t* io_batch = + &system.parent_buffer_io[parent_buffer_id]; + // Batch of parent pointers - aligns with parent buffers + typename system_buffers_t::parent_ptr_batch_t* ptr_batch = + &system.parent_ptrs[parent_buffer_id]; + typename system_buffers_t::parent_ptr_sync_batch_t* ptr_sync_batch = + &system.parent_ptr_syncs[parent_buffer_id]; + +#ifdef TSC + tsc = get_tsc(); + rb_reserve_buf_cycles += tsc - tsc_start; + tsc_start = tsc; +#endif + + size_t parent_start_idx = 0; + // Special case: For the first node the first layer there are no parents + if (node.id() == 0) { + ptr_batch->batch[0].ptr = nullptr; + ptr_sync_batch->batch[0].node_buffer_idx = parent_ptr_sync_t::NOT_NODE_BUFFER; + parent_iter++; + for (size_t j = 0; j < PAGE_BATCH_SIZE; j++) { + ptr_batch->batch[j + 1].ptr = nullptr; + ptr_sync_batch->batch[j + 1].node_buffer_idx = parent_ptr_sync_t::NOT_NODE_BUFFER; + parent_iter++; + } + // Increment the valid counter for the batch + system.parent_buffer.incr_valid(parent_buffer_id, system.parent_buffer.VALID_THRESHOLD); + prev_hashed_node = cur_node_buffer; + continue; + } + else if (node.node() == 0) { + // Special case: For the first node any layer there are no base parents + ptr_batch->batch[0].ptr = nullptr; + ptr_sync_batch->batch[0].node_buffer_idx = parent_ptr_sync_t::NOT_NODE_BUFFER; + parent_iter++; + for (size_t j = 0; j < PARENT_COUNT_BASE; j++) { + ptr_batch->batch[j + 1].ptr = nullptr; + ptr_sync_batch->batch[j + 1].node_buffer_idx = parent_ptr_sync_t::NOT_NODE_BUFFER; + parent_iter++; + } + parent_start_idx = PARENT_COUNT_BASE; + } else { + // For the first parent pointer we point to the previous hashed node + assert (parent_iter.get_parent() == 0); + + ptr_batch->batch[0].ptr = prev_hashed_node; + assert((system_buffers_t::node_batch_t::BATCH_SIZE == 1)); + ptr_sync_batch->batch[0].node_buffer_idx = parent_ptr_sync_t::LOCAL_NODE; + // Since this is within COORD_BATCH_NODE_COUNT we don't set a reference count + + // Increment the parent pointer + parent_iter++; + } + +#ifdef TSC + tsc = get_tsc(); + rb_special_case_cycles += tsc - tsc_start; + tsc_start = tsc; +#endif + + //////////////////////////////////////////////////////////// + // Process the parents for a node + //////////////////////////////////////////////////////////// + + node_id_t parent_node; + size_t node_in_page; + size_t reads_issued = 0; + for (size_t j = parent_start_idx; j < PAGE_BATCH_SIZE; j++) { + assert (node == parent_iter.get_node()); + + // printf("Reading node %ld parent %ld prev_node %p\n", + // node, parent_iter.get_parent(), prev_node); + node_io_t *io = &io_batch->batch[j]; + + if (node.layer() == 0 && parent_iter.is_prev_layer()) { + // Special case: There are no expander parents for the first layer so just map them + // to the previous node. + parent_node = node; + parent_node--; + } else { + parent_node = *parent_iter; + } + + // Use the node buffer for cached parents + // In this case we set the IO type to NOP and update the synchronization structures. + bool use_cache = parent_node >= cache_min_cacheable_node; + + // Parent nodes that are near to the node being hashed will be treated differently. + // When the coordinator is creating the contiguous data buffer it will need to defer + // copying very close parents since they are not yet hashed. + bool is_local = (node - parent_node) < COORD_BATCH_NODE_COUNT; + + if (use_cache) { + // Compute the entry in the node buffer where the data will be + node_id_t parent_node_cache_offset = parent_node - cache_min_node; + size_t node_buffer_entry = + system.node_buffer.add(system.node_buffer.get_tail(), + parent_node_cache_offset.id() / C::NODES_PER_PAGE); + if (parent_node_cache_offset.id() >= node_buffer_count + i) { + printf("Using cache for node %lx parent %lx parent_cache_offset %08lx " + "entry %ld subentry %ld %p\n", + node.id(), parent_node.id(), parent_node_cache_offset.id(), node_buffer_entry, + parent_node.node() % C::NODES_PER_PAGE, ptr_batch->batch[j + 1].ptr); + } + assert(parent_node_cache_offset.id() < node_buffer_count + i); + io->type = node_io_t::type_e::NOP; + + // Address correct node in the page + node_in_page = parent_node.node() % C::NODES_PER_PAGE; + assert(node_batch_t::BATCH_SIZE == 1); + + // Store in j + 1 in the batch since the ptr batch size is one larger than page batch + // and the zeroeth entry references the previous hashed node. + + // It's a bit expensive to access the entry through the ring buffer since it stores + // pointers and requires a dereference. Instead we can compute the node buffer pointer. + node_batch_t* node_buffer_ptr = system.node_buffer_store; + node_batch_t* tmp_batch = node_buffer_ptr + node_buffer_entry; + parallel_node_t* tmp_node = &tmp_batch->batch[0].parallel_nodes[node_in_page]; + ptr_batch->batch[j + 1].ptr = tmp_node; + + if (is_local) { + ptr_sync_batch->batch[j + 1].node_buffer_idx = parent_ptr_sync_t::LOCAL_NODE; + } else { + ptr_sync_batch->batch[j + 1].node_buffer_idx = node_buffer_entry; + + // This is a bit expensive since we need to increment a random node's reference count + system.node_buffer_sync[NODE_IDX_TO_SYNC_IDX(node_buffer_entry)].reference_count++; + } + + // printf("Using cache for node %lx parent %ld parent_cache_offset %08lx " + // "entry %ld subentry %ld %p\n", + // node.id(), parent_node.id(), parent_node_cache_offset.id(), node_buffer_entry, + // parent_node.node() % C::NODES_PER_PAGE, ptr_batch->batch[j + 1].ptr); + } else { + io->node = parent_node.id(); + io->type = node_io_t::type_e::READ; + + // Address correct node in the page + node_in_page = parent_node.node() % C::NODES_PER_PAGE; + // Store in j + 1 since the ptr batch size is one larger than page batch + // and the zeroeth entry references the previous hashed node. + ptr_batch->batch[j + 1].ptr = &page_batch->batch[j].parallel_nodes[node_in_page]; + ptr_sync_batch->batch[j + 1].node_buffer_idx = parent_ptr_sync_t::NOT_NODE_BUFFER; + +#ifdef NO_DISK_READS + io->type = node_io_t::type_e::NOP; + io->valid->fetch_add(1, DEFAULT_MEMORY_ORDER); +#endif + + // printf("Reading N %lx P# %ld: P %2d.%08x (%lx) read_idx %lx read ptr %p\n", + // node.id(), parent_iter.get_parent(), parent_node.layer(), parent_node.node(), + // parent_node.id(), cur_node_buffer_id, ptr_batch->batch[j + 1].ptr); + reads_issued++; + } + + // Advance the node/parent counters + parent_iter++; + } + total_reads_issued += reads_issued; + +#ifdef TSC + tsc = get_tsc(); + rb_parents_cycles += tsc - tsc_start; + tsc_start = tsc; +#endif + + // Increment the valid point to cover the cached entries + size_t num_cached_in_batch = system.parent_buffer.VALID_THRESHOLD - reads_issued; + system.parent_buffer.incr_valid(parent_buffer_id, num_cached_in_batch); + cached_parents += num_cached_in_batch; + +#ifdef NO_DISK_READS + assert (system.parent_buffer.is_valid(parent_buffer_id)); +#endif + + prev_hashed_node = cur_node_buffer; + + // Send +#ifndef NO_DISK_READS + SPDK_ERROR(system.parent_read_fifo.enqueue_nocheck(io_batch)); +#endif + } + advanced = true; + +#ifdef TSC + tsc = get_tsc(); + rb_send_cycles += tsc - tsc_start; + tsc_start = tsc; +#endif + + return total_reads_issued; + } + + // Write nodes to disk. Currently writes a single page at a time. + // head_node_write - The next node to write to disk + // min_node_hash - The next node to hash + // head_node_write_idx - The page index in the node buffer of head_node_write + size_t write_nodes(node_buffer_iterator_t& head_node_write, node_id_t min_node_hash) { + size_t head_page_write = head_node_write.abs().id() / C::NODES_PER_PAGE; + size_t min_page_hash = min_node_hash.id() / C::NODES_PER_PAGE; + if (!(head_page_write < min_page_hash)) { + // No nodes to write + return 0; + } + + if (system.node_write_fifo.is_full()) { + node_write_fifo_full++; + return 0; + } + + // Batch of IO requests going to disks + node_io_batch_t* io_batch = &system.node_buffer_io[head_node_write.idx()]; + assert(node_batch_t::BATCH_SIZE == 1); + node_io_t *io = &io_batch->batch[0]; + // Use the full node ID here since we need to write out all layers + io->node = head_node_write.abs().id(); + assert(system.node_buffer.get_valid(head_node_write.idx()) == 0); + + // printf("Write node %ld, node idx %ld to block %ld\n", + // head_node_write.abs().id(), head_node_write.idx(), head_node_write.abs().id()); + // char prefix[32]; + // snprintf(prefix, 32, "Write %8lx: ", head_node_write.abs().id()); + // parallel_node_t *node = &system.node_buffer.get_entry(head_node_write.idx())->batch[0].nodes[0]; + // for (size_t i = 0; i < C::NODES_PER_PAGE; i++) { + // print_node(&node[i], 2, prefix); + // } + + //io->valid->fetch_add(1, DEFAULT_MEMORY_ORDER); // TODO: perform the actual write + // Send + SPDK_ERROR(system.node_write_fifo.enqueue(io_batch)); + + return C::NODES_PER_PAGE; + } + + __attribute__ ((noinline)) int process(bool print_timing = true) { + timestamp_t start = std::chrono::high_resolution_clock::now(); + + cached_parents = 0; + reads_issued = 0; + writes_issued = 0; + parent_read_fifo_full = 0; + io_req_pool_empty = 0; + parent_buffer_full = 0; + node_buffer_full = 0; + from_hashers_empty = 0; + no_action = 0; + node_write_fifo_full = 0; + + size_t outstanding_writes = 0; + + // Limit the time spent on any one activity to keep queues balanced + size_t ACTION_BATCH = 8; + size_t actions_remaining; + + node_id_t node = parent_iter.get_node(); + + // Keep track of actions taken last iteration +#ifdef PRINT_STALLS + bool last_iter_read_nodes = false; + bool last_iter_wrote_nodes = false; + bool last_iter_released_parents = false; + bool last_iter_advanced_written = false; + bool last_iter_advanced_tail_node = false; + + size_t no_action_run_length = 0; +#endif + +#ifdef TSC + uint64_t tsc; + tsc_start = get_tsc(); + uint64_t loop_cycles_update = 0; + uint64_t parent_cycles_update = 0; + uint64_t min_node_cycles_update = 0; + uint64_t read_batch_cycles_update = 0; + uint64_t write_cycles_update = 0; + uint64_t release_parent_cycles_update = 0; + uint64_t advance_written_cycles_update = 0; + uint64_t advance_tail_cycles_update = 0; +#endif + + + while (node < node_stop + || tail_node_write.abs() < node_stop + || outstanding_writes > 0) { + +#ifdef TSC + tsc = get_tsc(); + loop_cycles_update = tsc - tsc_start; + tsc_start = tsc; +#endif + + node_id_t next_node = parent_iter.get_node(); + +#ifdef TSC + tsc = get_tsc(); + parent_cycles_update = tsc - tsc_start; + tsc_start = tsc; +#endif + +#ifdef STATS + system.record_stats(); + // if (parent_iter.get_node().node() > 0 && + // (parent_iter.get_node().id() & STATS_MASK) == 0 && + // parent_iter.get_node() < node_stop) { + // system.print_periodic_stats(); + // } +#endif + + ////////////////////////////////////////////////////////////////////// + // Parent reading + ////////////////////////////////////////////////////////////////////// + + // Determine the oldest node that has been hashed + min_node_hash = system.coordinator_node[0]->load(DEFAULT_MEMORY_ORDER); + for (size_t i = 1; i < system.coordinators.size(); i++) { + node_id_t n = system.coordinator_node[i]->load(DEFAULT_MEMORY_ORDER); + min_node_hash = std::min(min_node_hash, n); + } + + // And factoring in writing the nodes to disk, the oldest node we need + // to keep in the node buffer. + node_id_t min_node = std::min(min_node_hash, tail_node_write.abs()); + + system.head_minus_hashed_stats.record(parent_iter.get_node() - min_node_hash); + system.hashed_minus_written_stats.record(min_node_hash - tail_node_write.abs()); + system.written_minus_tail_stats.record(tail_node_write.abs() - tail_node); + +#ifdef PRINT_STALLS + last_iter_read_nodes = false; + last_iter_wrote_nodes = false; + last_iter_released_parents = false; + last_iter_advanced_written = false; + last_iter_advanced_tail_node = false; +#endif +#ifdef TSC + tsc = get_tsc(); + min_node_cycles_update = tsc - tsc_start; + tsc_start = tsc; +#endif + + // Initiate new reads of parent nodes + bool advanced = false; + size_t reads_issued_now = 0; + if (parent_iter.get_node() < node_stop) { + reads_issued_now = read_batch(min_node_hash, tail_node_write, + advanced, node_stop.id() - parent_iter.get_node().id()); +#ifdef PRINT_STALLS + last_iter_read_nodes = advanced; +#endif + if (reads_issued_now > 0) { + reads_issued += reads_issued_now; + } + } +#ifdef TSC + tsc = get_tsc(); + read_batch_cycles_update = advanced ? tsc - tsc_start : 0; + tsc_start = tsc; +#endif + + // Initiate a write of hashed nodes to disk + bool initiated_writes = false; + actions_remaining = ACTION_BATCH; + while (actions_remaining-- > 0 && head_node_write.abs() < min_node_hash) { + size_t writes_issued = write_nodes(head_node_write, min_node_hash); + head_node_write += writes_issued; + outstanding_writes += writes_issued / C::NODES_PER_PAGE; + writes_issued += writes_issued / C::NODES_PER_PAGE; + if (writes_issued > 0) { +#ifdef PRINT_STALLS + last_iter_wrote_nodes = true; +#endif + initiated_writes = true; + } + } +#ifdef TSC + tsc = get_tsc(); + write_cycles_update = tsc - tsc_start; + tsc_start = tsc; +#endif + // Release parent buffers as soon as they are no longer needed by hashers + actions_remaining = ACTION_BATCH; + while (actions_remaining-- > 0 && tail_node_parents < min_node_hash) { + typename system_buffers_t::parent_ptr_sync_batch_t* ptr_sync_batch = + &system.parent_ptr_syncs[system.parent_buffer.get_tail()]; + for (size_t j = 0; j < PARENT_PTR_BATCH_SIZE; j++) { + if (ptr_sync_batch->batch[j].is_node_buffer()) { + system.node_buffer_sync[NODE_IDX_TO_SYNC_IDX(ptr_sync_batch->batch[j].node_buffer_idx)] + .consumed_count++; + } + } + system.parent_buffer.release(); + tail_node_parents++; +#ifdef PRINT_STALLS + last_iter_released_parents = true; +#endif + } +#ifdef TSC + tsc = get_tsc(); + release_parent_cycles_update = tsc - tsc_start; + tsc_start = tsc; +#endif + + // Advance the written node tail as nodes are written to disk + actions_remaining = ACTION_BATCH; + while (actions_remaining-- > 0 && + tail_node_write.idx() != head_node_write.idx() && + system.node_buffer.get_valid(tail_node_write.idx())) { + outstanding_writes--; + tail_node_write += C::NODES_PER_PAGE; +#ifdef PRINT_STALLS + last_iter_advanced_written = true; +#endif + } + +#ifdef TSC + tsc = get_tsc(); + advance_written_cycles_update = tsc - tsc_start; + tsc_start = tsc; +#endif + + // Release the node buffer entries only as needed (when the node buffer + // is full) to maximize caching. + actions_remaining = ACTION_BATCH; + size_t free_count = system.node_buffer.free_count(); + while (actions_remaining-- > 0 && + tail_node < min_node && + tail_node < tail_node_write.abs() && + free_count < nvme_controller_t::queue_size * 512) { + if ((system.node_buffer.get_tail() & NODE_BUFFER_SYNC_BATCH_MASK) == 0) { + // Crossing into a new sync batch so need to check counters + size_t sync_idx = NODE_IDX_TO_SYNC_IDX(system.node_buffer.get_tail()); + if (system.node_buffer_sync[sync_idx].reference_count == + system.node_buffer_sync[sync_idx].consumed_count) { + system.node_buffer_sync[sync_idx].reference_count = 0; + system.node_buffer_sync[sync_idx].consumed_count = 0; + } else { + // This batch is not finished yet + break; + } + } + + // Release buffers once all nodes are used in the page + if ((tail_node.node() % C::NODES_PER_PAGE) == C::NODES_PER_PAGE - 1) { + system.node_buffer.release(); + } + tail_node++; +#ifdef PRINT_STALLS + last_iter_advanced_tail_node = true; +#endif + } + +#ifdef TSC + tsc = get_tsc(); + advance_tail_cycles_update = tsc - tsc_start; + tsc_start = tsc; +#endif + node = next_node; + + if (parent_iter.get_node().id() - tail_node.id() >= + NODE_BUFFER_BATCHES * C::NODES_PER_PAGE) { + printf("Unexpected tail/parent node values: %lu - %lu = %lu > %lu\n", + parent_iter.get_node().id(), tail_node.id(), + parent_iter.get_node().id() - tail_node.id(), + NODE_BUFFER_BATCHES * C::NODES_PER_PAGE); + exit(1); + } + if (!initiated_writes) { + from_hashers_empty++; + } + + if (reads_issued_now == 0) { + no_action++; +#ifdef PRINT_STALLS + no_action_run_length++; + if (no_action_run_length == 10000) { + printf("Detected stall "); + print_state(); + //system.print_periodic_stats(); + printf(" parent_tail %ld, valid %d %d %p\n", + system.parent_buffer.get_tail(), system.parent_buffer.is_tail_valid(), + system.parent_buffer.is_valid(system.parent_buffer.get_tail()), + system.parent_buffer.get_valid_ptr(system.parent_buffer.get_tail())); + } +#endif + +#ifdef TSC + noaction_cycles += loop_cycles_update; + noaction_cycles += parent_cycles_update; + noaction_cycles += min_node_cycles_update; + noaction_cycles += read_batch_cycles_update; + noaction_cycles += write_cycles_update; + noaction_cycles += release_parent_cycles_update; + noaction_cycles += advance_written_cycles_update; + noaction_cycles += advance_tail_cycles_update; +#endif + } else { +#ifdef PRINT_STALLS + no_action_run_length = 0; +#endif + +#ifdef TSC + loop_cycles += loop_cycles_update; + parent_cycles += parent_cycles_update; + min_node_cycles += min_node_cycles_update; + read_batch_cycles += read_batch_cycles_update; + write_cycles += write_cycles_update; + release_parent_cycles += release_parent_cycles_update; + advance_written_cycles += advance_written_cycles_update; + advance_tail_cycles += advance_tail_cycles_update; +#endif + } + } + + timestamp_t stop = std::chrono::high_resolution_clock::now(); + + uint64_t nodes_rw = reads_issued + writes_issued; + if (print_timing) { + uint64_t ms = std::chrono::duration_cast< + std::chrono::milliseconds>(stop - start).count(); + uint64_t mbytes = (nodes_rw * BLOCK_SIZE) >> 20; + printf("Reading/writing took %ld ms for %ld pages / %ld MB (%0.2lf MB/s), " + "%0.2lf IOPs\n", + ms, nodes_rw, mbytes, + (double)mbytes / ((double)ms / 1000.0), + (double)nodes_rw / ((double)ms / 1000.0)); + } +// printf("orchestrator_t outstanding_writes %ld\n", outstanding_writes); +// #ifdef TSC +// printf("orchestrator_t loop_cycles %lu\n", loop_cycles); +// printf("orchestrator_t parent_cycles %lu\n", parent_cycles); +// printf("orchestrator_t min_node_cycles %lu\n", min_node_cycles); +// printf("orchestrator_t read_batch_cycles %lu\n", read_batch_cycles); +// printf("orchestrator_t rb_check_buf_cycles %lu\n", rb_check_buf_cycles); +// printf("orchestrator_t rb_reserve_buf_cycles %lu\n", rb_reserve_buf_cycles); +// printf("orchestrator_t rb_special_case_cycles %lu\n", rb_special_case_cycles); +// printf("orchestrator_t rb_cache_params_cycles %lu\n", rb_cache_params_cycles); +// printf("orchestrator_t rb_parents_cycles %lu\n", rb_parents_cycles); +// printf("orchestrator_t rb_send_cycles %lu\n", rb_send_cycles); +// printf("orchestrator_t write_cycles %lu\n", write_cycles); +// printf("orchestrator_t release_parent_cycles %lu\n", release_parent_cycles); +// printf("orchestrator_t advance_written_cycles %lu\n", advance_written_cycles); +// printf("orchestrator_t advance_tail_cycles %lu\n", advance_tail_cycles); +// printf("orchestrator_t noaction_cycles %lu\n", noaction_cycles); +// #endif + +// print_stats(); + return 0; + } + + void print_stats() { + printf("orchestrator_t reads_issued %ld writes_issued %ld cached_parents %ld\n", + reads_issued, writes_issued, cached_parents); + printf("orchestrator_t parent_read_fifo_full %ld, io_req_pool_empty %ld, " + "parent_buffer_full %ld, node_buffer_full %ld, from_hashers_empty %ld, " + "no_action %ld, node_write_fifo_full %ld\n", + parent_read_fifo_full, io_req_pool_empty, + parent_buffer_full, node_buffer_full, from_hashers_empty, + no_action, node_write_fifo_full); + + printf("parent_buffer "); system.parent_buffer.print(); + } +}; + +#endif diff --git a/extern/supraseal/pc1/parent_iter_t.hpp b/extern/supraseal/pc1/parent_iter_t.hpp new file mode 100644 index 000000000..ef67be0e5 --- /dev/null +++ b/extern/supraseal/pc1/parent_iter_t.hpp @@ -0,0 +1,72 @@ +// Copyright Supranational LLC + +#ifndef __PARENT_ITER_T_HPP__ +#define __PARENT_ITER_T_HPP__ + +// Class to iterate through a cached parent graph. + +template +struct parent_iter_t { + node_id_t

_node; + uint32_t _parent; + uint32_t* parent_buf; + uint32_t* parent_ptr; + + parent_iter_t(node_id_t

start) : + _node(start), _parent(0) {} + + // Size in bytes of the parent graph + static size_t bytes(size_t node_count) { + return node_count * PARENT_COUNT * PARENT_SIZE; + } + + void set_buf(uint32_t* buf) { + parent_buf = buf; + parent_ptr = buf; + } + + void operator ++(int) { + _parent++; + parent_ptr++; + if (_parent == PARENT_COUNT) { + _node++; + _parent = 0; + } + // Advance to the next layer + bool restart = _node.node() == 0 && _parent == 0; + if (restart) { + printf("Starting layer %d\n", _node.layer()); + parent_ptr = parent_buf; + } + } + node_id_t

operator *() { + uint32_t layer = (_node.layer() == 0 ? 0 : + (is_prev_layer() ? _node.layer() - 1 : _node.layer())); + node_id_t

parent_id(layer, *parent_ptr); + return parent_id; + } + uint64_t id() { + return _node.id(); + } + uint32_t node() { + return _node.node(); + } + uint32_t layer() { + return _node.layer(); + } + uint32_t parent() { + return _parent; + } + + node_id_t

get_node() { + return _node; + } + size_t get_parent() { + return _parent; + } + bool is_prev_layer() { + return get_parent() >= PARENT_COUNT_BASE; + } +}; + +#endif diff --git a/extern/supraseal/pc1/pc1.cpp b/extern/supraseal/pc1/pc1.cpp new file mode 100644 index 000000000..459452381 --- /dev/null +++ b/extern/supraseal/pc1/pc1.cpp @@ -0,0 +1,255 @@ +// Copyright Supranational LLC + +#include +#include +#include // file read +#include // printing +#include +#include // htonl + +// Enable profiling +//#define PROFILE + +// Enable data collection in the orchestrator using the timestamp counter +//#define TSC + +// Enable data collection in the hasher using the timestamp counter +//#define HASHER_TSC + +// Enable more general statistics collection +//#define STATS + +// Disable reading parents from disk (will not produce the correct result) +//#define NO_DISK_READS + +// Print a message if the orchestrator is stalled for too long +//#define PRINT_STALLS + +// Verify that hashed result matches a known good sealing +//#define VERIFY_HASH_RESULT + +#include "pc1.hpp" +#include "../util/util.hpp" + +#include "../util/stats.hpp" +#include "../sealing/constants.hpp" +#include "../nvme/nvme.hpp" +#include "../sealing/data_structures.hpp" + +// Forward declarations +template class coordinator_t; +template class node_rw_t; +template class orchestrator_t; + +const size_t STATS_PERIOD = 1<<22; +const size_t STATS_MASK = STATS_PERIOD - 1; + +extern std::mutex print_mtx; + +#include "../util/debug_helpers.hpp" +#include "system_buffers_t.hpp" +#include "parent_iter_t.hpp" +#include "orchestrator_t.hpp" +#include "node_rw_t.hpp" +#include "coordinator_t.hpp" + +template +int do_pc1(nvme_controllers_t* controllers, + topology_t& topology, + uint64_t block_offset, + const uint32_t* replica_ids, + const char* parents_filename) { + topology_t::sector_config_t* sector_config = + topology.get_sector_config(C::PARALLEL_SECTORS); + if (sector_config == nullptr) { + printf("No configuration provided for %ld sectors\n", C::PARALLEL_SECTORS); + exit(1); + } + + size_t layer_count = C::GetNumLayers(); + size_t node_count = C::GetSectorSize() / NODE_SIZE; + + thread_pool_t pool(3 + sector_config->num_coordinators()); + std::atomic terminator(false); + + node_id_t node_start = node_count * 0; + //node_id_t node_stop(node_count * 0 + node_count / 32); + node_id_t node_stop(node_count * layer_count); + + system_buffers_t system(*sector_config); + SPDK_ERROR(system.init(controllers->size())); + + // Parent reader + node_rw_t::page_io_batch_t> parent_reader + (terminator, *controllers, system.parent_read_fifo, + topology.pc1_qpair_reader, block_offset); + SPDK_ERROR(parent_reader.init()); + system.parent_reader = &parent_reader; + + // Node writer + node_rw_t::node_io_batch_t> node_writer + (terminator, *controllers, system.node_write_fifo, + topology.pc1_qpair_writer, block_offset); + SPDK_ERROR(node_writer.init()); + + // Orchestrator + orchestrator_t orchestrator + (terminator, system, node_start, node_stop, parents_filename); + SPDK_ERROR(orchestrator.init()); + system.orchestrator = &orchestrator; + + // Replica ID hashing buffers for all sectors + replica_id_buffer_t replica_id_bufs[C::PARALLEL_SECTORS] __attribute__ ((aligned (4096))); + std::memset(replica_id_bufs, 0, sizeof(replica_id_buffer_t) * C::PARALLEL_SECTORS); + + for (size_t i = 0; i < sector_config->num_hashers(); ++i) { + for (size_t j = 0; j < NODES_PER_HASHER; ++j) { + for (size_t k = 0; k < NODE_WORDS; k++) { + size_t idx = (i * NODES_PER_HASHER * NODE_WORDS + + j * NODE_WORDS); + replica_id_bufs[i].ids[j][k] = htonl(replica_ids[idx + k]); + } + replica_id_bufs[i].pad_0[j][0] = 0x80000000; // byte 67 + replica_id_bufs[i].pad_1[j][7] = 0x00000200; // byte 125 + replica_id_bufs[i].padding[j][0] = 0x80000000; // byte 67 + replica_id_bufs[i].padding[j][7] = 0x00002700; // byte 125 + } + } + + channel_t ch; + pool.spawn([&]() { + size_t core_num = topology.pc1_reader; + set_core_affinity(core_num); + assert(parent_reader.process(topology.pc1_reader_sleep_time) == 0); + ch.send(0); + }); + pool.spawn([&]() { + size_t core_num = topology.pc1_writer; + set_core_affinity(core_num); + assert(node_writer.process(topology.pc1_writer_sleep_time, 10) == 0); + ch.send(0); + }); + + size_t sector = 0; + size_t hasher_count = 0; + for (size_t coord_id = 0; coord_id < sector_config->num_coordinators(); coord_id++) { + size_t core_num = sector_config->get_coordinator_core(coord_id); + pool.spawn([&, sector_config, coord_id, core_num, sector, hasher_count]() { + set_core_affinity(core_num); + coordinator_t coordinator(terminator, system, + coord_id, sector, + sector_config->coordinators[coord_id], + node_start, node_stop, + &replica_id_bufs[hasher_count]); + system.coordinators[coord_id] = &coordinator; + assert(coordinator.run() == 0); + ch.send(0); + }); + sector += sector_config->coordinators[coord_id].num_sectors(); + hasher_count += sector_config->coordinators[coord_id].num_hashers; + } + + timestamp_t start = std::chrono::high_resolution_clock::now(); + size_t core_num = topology.pc1_orchestrator; + set_core_affinity(core_num); + + orchestrator.process(true); + + // Wait for completions + for (size_t i = 0; i < sector_config->num_coordinators(); i++) { + ch.recv(); // each coordinator + } + terminator = true; + ch.recv(); // rw handler + ch.recv(); // node_writer handler + + timestamp_t stop = std::chrono::high_resolution_clock::now(); + uint64_t secs = std::chrono::duration_cast< + std::chrono::seconds>(stop - start).count(); + printf("Sealing took %ld seconds\n", secs); + + return 0; +} + +#ifdef RUNTIME_SECTOR_SIZE +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +#endif +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); +template int do_pc1 (nvme_controllers_t*, topology_t&, uint64_t, const uint32_t*, const char*); diff --git a/extern/supraseal/pc1/pc1.hpp b/extern/supraseal/pc1/pc1.hpp new file mode 100644 index 000000000..4c1776727 --- /dev/null +++ b/extern/supraseal/pc1/pc1.hpp @@ -0,0 +1,18 @@ +// Copyright Supranational LLC + +#ifndef __PC1_HPP__ +#define __PC1_HPP__ + +#include "../sealing/topology_t.hpp" +#include "../sealing/data_structures.hpp" +#include "../nvme/nvme.hpp" +#include "node_rw_t.hpp" + +template +int do_pc1(nvme_controllers_t* controllers, + topology_t& topology, + uint64_t block_offset, + const uint32_t* replica_ids, + const char* parents_filename); + +#endif diff --git a/extern/supraseal/pc1/replica_id.cpp b/extern/supraseal/pc1/replica_id.cpp new file mode 100644 index 000000000..536466e71 --- /dev/null +++ b/extern/supraseal/pc1/replica_id.cpp @@ -0,0 +1,37 @@ +// Copyright Supranational LLC + +#include // uint* +#include // memcpy +#include "replica_id.hpp" // header +#include "../sha/sha_functions.hpp" // SHA-256 functions + +// Create replica ID +void create_replica_id(uint32_t* replica_id, + const uint8_t* prover_id, + const uint8_t* sector_id, + const uint8_t* ticket, + const uint8_t* comm_d, + const uint8_t* porep_seed) { + + uint8_t buf[192] = {0}; + + std::memcpy(buf, prover_id, 32); + std::memcpy(buf + 32, sector_id, 8); + std::memcpy(buf + 40, ticket, 32); + std::memcpy(buf + 72, comm_d, 32); + std::memcpy(buf + 104, porep_seed, 32); + + // Add padding and length (1088 bits -> 0x440) + buf[136] = 0x80; + buf[190] = 0x04; + buf[191] = 0x40; + + // Initialize digest + std::memcpy(replica_id, SHA256_INITIAL_DIGEST, 32); + + // Hash buffer, takes 3 SHA-256 blocks + blst_sha256_block(replica_id, buf, 3); + + // Top two bits cutoff due to keeping in field + replica_id[7] &= 0xFFFFFF3F; +} diff --git a/extern/supraseal/pc1/replica_id.hpp b/extern/supraseal/pc1/replica_id.hpp new file mode 100644 index 000000000..231f7b2f8 --- /dev/null +++ b/extern/supraseal/pc1/replica_id.hpp @@ -0,0 +1,16 @@ +// Copyright Supranational LLC + +#ifndef __REPLICA_ID_HPP__ +#define __REPLICA_ID_HPP__ + +#include // uint* + +// Create replica ID +void create_replica_id(uint32_t* replica_id, + const uint8_t* prover_id, + const uint8_t* sector_id, + const uint8_t* ticket, + const uint8_t* comm_d, + const uint8_t* porep_seed); + +#endif // __REPLICA_ID_HPP__ diff --git a/extern/supraseal/pc1/system_buffers_t.hpp b/extern/supraseal/pc1/system_buffers_t.hpp new file mode 100644 index 000000000..bb29cd71b --- /dev/null +++ b/extern/supraseal/pc1/system_buffers_t.hpp @@ -0,0 +1,248 @@ +// Copyright Supranational LLC + +#ifndef __SYSTEM_BUFFERS_T_HPP__ +#define __SYSTEM_BUFFERS_T_HPP__ + +// Shared buffers for communicating between various system processes + +template +class system_buffers_t { +public: + // Type to store parent node data coming from disk + typedef batch_t, PAGE_BATCH_SIZE> page_batch_t; + typedef page_batch_t* page_batch_ptr_t; + // Type to store nodes that have been hashed, one per batch of parents + typedef batch_t, 1> node_batch_t; + typedef node_batch_t* node_batch_ptr_t; + + // Ring buffer for pages. + typedef ring_buffer_t parent_buffer_t; + + typedef batch_t, PARENT_PTR_BATCH_SIZE> parent_ptr_batch_t; + typedef batch_t parent_ptr_sync_batch_t; + + typedef batch_t node_io_batch_t; + typedef batch_t page_io_batch_t; + + typedef ring_buffer_t node_buffer_t; + typedef ring_counter_t, NODE_BUFFER_BATCHES * C::NODES_PER_PAGE, + C::NODES_PER_PAGE> node_buffer_iterator_t; + + + // Ring buffer for the parent pages + parent_buffer_t parent_buffer; + // Contiguous storage for page buffers + spdk_ptr_t parent_buffer_store; + + // Parallel array to the ring buffer hold IO meta data + spdk_ptr_t parent_buffer_io; + + // Parent pointers. This array is parallel to parent_buffer and indexed in the same way + spdk_ptr_t parent_ptrs; + // Parent synchronization structures. This array is parallel to parent_buffer and + // indexed in the same way + spdk_ptr_t parent_ptr_syncs; + + // Ring buffer for sealed nodes + node_buffer_t node_buffer; + + // Contiguous storage for the node buffer + spdk_ptr_t node_buffer_store; + // Parallel array to the node buffer hold IO meta data + spdk_ptr_t node_buffer_io; + // Parallel array to the node buffer hold synchronization meta data. This is + // is stored per node buffer batch to reduce synchronization overhead. + struct node_sync_t { + // Count of references to the node by parent pointers + uint16_t reference_count; + // Count of consumed references by hashers + uint16_t consumed_count; + }; + spdk_ptr_t node_buffer_sync; + + // Fixed size FIFOs for requests to the parent reader + mt_fifo_t parent_read_fifo; + + // Fixed size FIFOs for requests to the note writer + mt_fifo_t node_write_fifo; + + // Number of NVME controllers to use + size_t num_controllers; + + // Hashing status, from hashing threads to storage core. + // Records the latest hashed node + std::vector>*> coordinator_node; + + // Coordinator pointers + std::vector*> coordinators; + + // Pointer to the parent reader + node_rw_t* parent_reader; + + orchestrator_t* orchestrator; + queue_stat_t parent_buffer_stats; + queue_stat_t node_buffer_stats; + queue_stat_t read_fifo_stats; + queue_stat_t write_fifo_stats; + queue_stat_t head_minus_hashed_stats; + queue_stat_t hashed_minus_written_stats; + queue_stat_t written_minus_tail_stats; + counter_stat_t parent_buffer_full_stats; + counter_stat_t node_buffer_full_stats; + counter_stat_t parent_read_fifo_full_stats; + +public: + system_buffers_t(topology_t::sector_config_t& topology) : + coordinator_node(topology.num_coordinators(), nullptr), + coordinators(topology.num_coordinators(), nullptr), + parent_buffer_full_stats("parent_buffer_full"), + node_buffer_full_stats("node_buffer_full"), + parent_read_fifo_full_stats("parent_read_fifo_full") + { + parent_reader = nullptr; + orchestrator = nullptr; + } + + ~system_buffers_t() { + for (size_t i = 0; i < coordinator_node.size(); i++) { + delete coordinator_node[i]; + } + } + + int init(size_t num_disks) { + // FIFO depth from storage core to the disk IO core(s) + size_t disk_fifo_padding = 16; + size_t disk_fifo_depth = (num_disks * + nvme_controller_t::queue_size / page_batch_t::BATCH_SIZE * + disk_fifo_padding); + SPDK_ERROR(parent_read_fifo.create("parent_read_fifo", disk_fifo_depth)); + //printf("parent_read_fifo depth %ld batches\n", disk_fifo_depth); + num_controllers = num_disks; + + // Allocate the parent_buffer + parent_buffer_store.alloc(PARENT_BUFFER_BATCHES); + SPDK_ERROR(parent_buffer.create(parent_buffer_store)); + + // Allocate an equal number of IO buffers + parent_buffer_io.alloc(PARENT_BUFFER_BATCHES); + + // Allocate an equal number of parent pointer batches and syncs + parent_ptrs.alloc(PARENT_BUFFER_BATCHES); + parent_ptr_syncs.alloc(PARENT_BUFFER_BATCHES); + + // Set up parent buffer io structs to point into the parent buffer array + for (size_t i = 0; i < PARENT_BUFFER_BATCHES; i++) { + for (size_t j = 0; j < page_batch_t::BATCH_SIZE; j++) { + parent_buffer_io[i].batch[j].valid = parent_buffer.get_valid_ptr(i); + parent_buffer_io[i].batch[j].type = node_io_t::type_e::READ; + page_t* parent_buffer_page = &parent_buffer.get_entry(i)->batch[j]; + parent_buffer_io[i].batch[j].tracker.buf = (uint8_t*)parent_buffer_page; + } + } + + // Allocate the node writer fifo + SPDK_ERROR(node_write_fifo.create("node_write_fifo", disk_fifo_depth)); + //printf("node_write_fifo depth %ld batches\n", disk_fifo_depth); + + // Allocate the node_buffer + node_buffer_store.alloc(NODE_BUFFER_BATCHES); + SPDK_ERROR(node_buffer.create(node_buffer_store)); + + // Allocate an equal number of IO buffers + node_buffer_io.alloc(NODE_BUFFER_BATCHES); + + for (size_t i = 0; i < NODE_BUFFER_BATCHES; i++) { + for (size_t j = 0; j < node_io_batch_t::BATCH_SIZE; j++) { + node_buffer_io[i].batch[j].valid = node_buffer.get_valid_ptr(i); + node_buffer_io[i].batch[j].type = node_io_t::type_e::WRITE; + page_t* node_buffer_page = &node_buffer.get_entry(i)->batch[j]; + node_buffer_io[i].batch[j].tracker.buf = + (uint8_t*)node_buffer_page; + } + } + + // Allocate an equal number of node buffer sync entries, one per node batch + node_buffer_sync.alloc(NODE_BUFFER_SYNC_BATCHES); + for (size_t i = 0; i < NODE_BUFFER_SYNC_BATCHES; i++) { + node_buffer_sync[i].reference_count = 0; + node_buffer_sync[i].consumed_count = 0; + } + + for (size_t i = 0; i < coordinator_node.size(); i++) { + coordinator_node[i] = new std::atomic>(); + } + + parent_buffer_stats.init("parent_buffer", parent_buffer.capacity()); + node_buffer_stats.init("node_buffer", node_buffer.capacity()); + read_fifo_stats.init("read_fifo", parent_read_fifo.capacity()); + write_fifo_stats.init("write_fifo", node_write_fifo.capacity()); + head_minus_hashed_stats.init("head_minus_hashed", 0); + hashed_minus_written_stats.init("hashed_minus_written", 0); + written_minus_tail_stats.init("written_minus_tail", 0); + + return 0; + } + + void clear_stats() { +#ifdef STATS + parent_buffer_stats.clear(); + node_buffer_stats.clear(); + read_fifo_stats.clear(); + write_fifo_stats.clear(); + head_minus_hashed_stats.clear(); + hashed_minus_written_stats.clear(); + written_minus_tail_stats.clear(); + hasher_clear_stats(hashers[0]); + if (parent_reader != nullptr) { + rw_clear_stats(parent_reader); + } +#endif + } + void record_stats() { +#ifdef STATS + parent_buffer_stats.record(parent_buffer.size()); + node_buffer_stats.record(node_buffer.size()); + read_fifo_stats.record(parent_read_fifo.size()); + write_fifo_stats.record(node_write_fifo.size()); +#endif + } + + void print_periodic_stats() { +#ifdef STATS + parent_buffer_stats.snapshot(); + node_buffer_stats.snapshot(); + read_fifo_stats.snapshot(); + write_fifo_stats.snapshot(); + head_minus_hashed_stats.snapshot(); + hashed_minus_written_stats.snapshot(); + written_minus_tail_stats.snapshot(); + hasher_snapshot(hashers[0]); + if (parent_reader != nullptr) { + rw_snapshot(parent_reader); + } + parent_buffer_full_stats.snapshot(); + node_buffer_full_stats.snapshot(); + parent_read_fifo_full_stats.snapshot(); + + parent_buffer_stats.print(); + node_buffer_stats.print(); + read_fifo_stats.print(); + write_fifo_stats.print(); + head_minus_hashed_stats.print(); + hashed_minus_written_stats.print(); + written_minus_tail_stats.print(); + hasher_print(hashers[0]); + if (parent_reader != nullptr) { + rw_print(parent_reader); + } + parent_buffer_full_stats.print(); + node_buffer_full_stats.print(); + parent_read_fifo_full_stats.print(); +#endif + } +}; + +#endif diff --git a/extern/supraseal/pc1/tree_builder.hpp b/extern/supraseal/pc1/tree_builder.hpp new file mode 100644 index 000000000..2f6fcba4a --- /dev/null +++ b/extern/supraseal/pc1/tree_builder.hpp @@ -0,0 +1,220 @@ +// Copyright Supranational LLC + +#ifndef __TREE_BUILDER_HPP__ +#define __TREE_BUILDER_HPP__ + +#include "../sealing/constants.hpp" +#include "../poseidon/poseidon.hpp" +#include +#include "../util/debug_helpers.hpp" +#include "../util/mmap_t.hpp" +#include +typedef std::chrono::high_resolution_clock::time_point timestamp_t; + +// Multi and single threaded tree builder class using Poseidon for tree-c. +// Options +// arity - tree arity +// coL_arity - column arity +class TreeBuilder { +private: + size_t col_arity_; + size_t arity_; + size_t discard_rows_; + Poseidon col_hasher_, hasher_; + +public: + TreeBuilder(size_t col_arity, size_t arity, size_t discard_rows) : + col_arity_(col_arity), + arity_(arity), + discard_rows_(discard_rows), + col_hasher_(col_arity), + hasher_(arity) + {} + + // Compute the size to store the tree + size_t size(size_t count, bool cols, bool no_discard = false) { + assert (count % arity_ == 0); + size_t cur_count = count; + size_t total = 0; + + if (cols) { + total += cur_count / col_arity_; + cur_count /= col_arity_; + } + + size_t discards = cols || no_discard ? 0 : discard_rows_; + + while (cur_count > 1) { + if (discards > 0) { + discards--; + } else { + total += cur_count / arity_; + } + cur_count /= arity_; + } + return total; + } + + // out - hash result + // in - pointer to array of arity elements + void HashNode(node_t* out, node_t* in) { + hasher_.Hash((uint8_t*)out, (uint8_t*)in); + } + + // out - hash result + // in - pointer to array of arity elements + void ColHashNode(node_t* out, node_t* in) { + col_hasher_.Hash((uint8_t*)out, (uint8_t*)in); + } + + + // Single threaded tree builder - uses the calling thread + // out - storage for non-discarded tree levels + // in - tree leaves + node_t BuildTree(size_t count, node_t* out, node_t* in, bool cols, + bool no_discard = false) { + if (cols) { + assert (count % col_arity_ == 0); + } + + assert((cols ? count / col_arity_ : count) % arity_ == 0); + + // Pointer to hash output location + node_t* cur_hash_out = out; + node_t* cur_hash_in = in; + + size_t cur_count = count; + + if (cols) { + node_t* tree_start = cur_hash_out; + for (size_t i = 0; i < cur_count; i += col_arity_) { + ColHashNode(cur_hash_out, cur_hash_in); + cur_hash_in += col_arity_; + cur_hash_out++; + } + cur_hash_in = tree_start; + cur_count /= col_arity_; + } + + int discards = cols || no_discard ? -1 : (int)discard_rows_; + node_t* discarded_rows = nullptr; + if (discards > 0) { + discarded_rows = new node_t[count / arity_]; + cur_hash_out = discarded_rows; + } + + while (cur_count > 1) { + node_t* tree_start = cur_hash_out; + for (size_t i = 0; i < cur_count; i += arity_) { + HashNode(cur_hash_out, cur_hash_in); + cur_hash_in += arity_; + cur_hash_out++; + } + cur_hash_in = tree_start; + cur_count /= arity_; + discards--; + if (discards == 1) { + cur_hash_out = discarded_rows; + } else if (discards == 0) { + cur_hash_out = out; + } + } + delete [] discarded_rows; + cur_hash_out--; + + return cur_hash_out[0]; + } + + // Multi-threaded tree builder + // out - storage for non-discarded tree levels + // in - tree leaves + node_t BuildTree(size_t count, node_t* out, node_t* in, thread_pool_t& pool, + bool cols, bool no_discard = false) { + if (cols) { + assert (count % col_arity_ == 0); + } + + assert((cols ? count / col_arity_ : count) % arity_ == 0); + + // The single-threaded tree builder should be called with sectors <= 32KiB + assert (count > (cols ? 128 : 64)); + + // For efficient multithreading, divide the tree into + // a number of chunks that is significantly larger than + // cores then use an atomic work counter to process them. + size_t num_chunks = arity_; + // Create enough chunks to provide good load balancing + size_t min_chunks = pool.size() * 4; + + while (num_chunks < min_chunks) { + num_chunks *= arity_; + } + size_t chunk_size = count / num_chunks; + + pool.par_map(num_chunks, [this, in, out, count, chunk_size, cols, no_discard](size_t chunk) { + node_t* layer_out_start = out; + size_t layer_size = count; + + node_t* cur_hash_out = &out[chunk * chunk_size / (cols ? col_arity_ : arity_)]; + node_t* cur_hash_in = &in[chunk * chunk_size]; + + size_t cur_count = chunk_size; + + if (cols) { + node_t* tree_start = cur_hash_out; + for (size_t i = 0; i < cur_count; i += col_arity_) { + ColHashNode(cur_hash_out, cur_hash_in); + cur_hash_in += col_arity_; + cur_hash_out++; + } + cur_hash_in = tree_start; + cur_count /= col_arity_; + layer_size /= col_arity_; + + layer_out_start += layer_size; + cur_hash_out = &layer_out_start[chunk * cur_count / arity_]; + } + + int discards = cols || no_discard ? -1 : (int)discard_rows_; + node_t* discarded_rows = nullptr; + if (discards > 0) { + discarded_rows = new node_t[count / arity_]; + cur_hash_out = discarded_rows; + } + + while (cur_count > 1) { + node_t* tree_start = cur_hash_out; + for (size_t i = 0; i < cur_count; i += arity_) { + HashNode(cur_hash_out, cur_hash_in); + cur_hash_in += arity_; + cur_hash_out++; + } + cur_hash_in = tree_start; + cur_count /= arity_; + + layer_size /= arity_; + + discards--; + if (discards == 1) { + cur_hash_out = discarded_rows; + } else if (discards == 0) { + cur_hash_out = &out[chunk * cur_count / arity_]; + } else { + layer_out_start += layer_size; + cur_hash_out = &layer_out_start[chunk * cur_count / arity_]; + } + } + + delete [] discarded_rows; + }); + + // Merge the chunks + size_t top_tree_size = size(num_chunks * arity_, false, true); + size_t top_tree_start_idx = size(count, cols) - top_tree_size; + node_t* top_tree_start = &out[top_tree_start_idx]; + + return BuildTree(num_chunks, &top_tree_start[num_chunks], top_tree_start, false, true); + } +}; + +#endif /* __TREE_BUILDER_HPP__ */ diff --git a/extern/supraseal/pc1/tree_c.hpp b/extern/supraseal/pc1/tree_c.hpp new file mode 100644 index 000000000..15dc44b51 --- /dev/null +++ b/extern/supraseal/pc1/tree_c.hpp @@ -0,0 +1,112 @@ +// Copyright Supranational LLC + +#include "tree_builder.hpp" + +// Tree-c builder for Filecoin sealing +template +class TreeC { +public: + node_t BuildTreeC(node_t* leaves, std::string output_path, + thread_pool_t& pool) { + size_t sector_size = P::GetSectorSize(); + size_t cur_nodes = sector_size * P::GetNumLayers() / sizeof(node_t); + + TreeBuilder tree_c(P::GetNumLayers(), P::GetNumTreeRCArity(), P::GetNumTreeRDiscardRows()); + size_t elmts = tree_c.size(cur_nodes / P::GetNumTreeRCFiles(), true); + + node_t final_row[P::GetNumTreeRCFiles()]; + + printf("Building tree-c...\n"); + timestamp_t start = std::chrono::high_resolution_clock::now(); + if (!output_path.empty()) { + const size_t MAX = 256; + char fname[MAX]; + if (P::GetNumTreeRCFiles() > 1) { + const char *tree_r_filename_template = "%s/sc-02-data-tree-c-%ld.dat"; + size_t sub_tree_size = cur_nodes / P::GetNumTreeRCFiles(); + for (size_t i = 0; i < P::GetNumTreeRCFiles(); i++) { + snprintf(fname, MAX, tree_r_filename_template, output_path.c_str(), i); + mmap_t out_file; + out_file.mmap_write(fname, elmts * sizeof(node_t), true); + if (P::GetSectorSizeLg() > 15) + final_row[i] = tree_c.BuildTree(sub_tree_size, &out_file[0], &leaves[i * sub_tree_size], pool, true); + else + final_row[i] = tree_c.BuildTree(sub_tree_size, &out_file[0], &leaves[i * sub_tree_size], true); + } + } else { + const char *tree_r_filename_template = "%s/sc-02-data-tree-c.dat"; + snprintf(fname, MAX, tree_r_filename_template, output_path.c_str()); + mmap_t out_file; + out_file.mmap_write(fname, elmts * sizeof(node_t), true); + if (P::GetSectorSizeLg() > 15) + final_row[0] = tree_c.BuildTree(cur_nodes, &out_file[0], leaves, pool, true); + else + final_row[0] = tree_c.BuildTree(cur_nodes, &out_file[0], leaves, true); + } + } else { + node_t* store = new node_t[elmts]; + if (P::GetSectorSizeLg() > 15) + (void)tree_c.BuildTree(cur_nodes, store, leaves, pool, true); + else + (void)tree_c.BuildTree(cur_nodes, store, leaves, true); + delete [] store; + } + timestamp_t stop = std::chrono::high_resolution_clock::now(); + uint64_t secs = std::chrono::duration_cast< + std::chrono::seconds>(stop - start).count(); + printf("Tree-c took %ld seconds\n", secs); + + if (P::GetNumTreeRCFiles() == 16) { + node_t n2[2], root; + TreeBuilder last(2, 8, 0); + + last.HashNode(&n2[0], &final_row[0]); + last.HashNode(&n2[1], &final_row[8]); + + last.ColHashNode(&root, n2); + return root; + } else if (P::GetNumTreeRCFiles() > 1) { + TreeBuilder last(2, P::GetNumTreeRCFiles(), 0); + node_t root; + last.HashNode(&root, final_row); + return root; + } else { + return final_row[0]; + } + } + + node_t BuildTreeC(std::string layers_cache, std::string output_path, + int num_threads = 0) { + thread_pool_t pool(num_threads); + + mmap_t layer; + layer.mmap_read(layers_cache + std::string("/sc-02-data-layer-1.dat"), + (size_t)-1); + size_t sector_size = layer.get_size(); + size_t num_layers = P::GetNumLayers(); + size_t cur_nodes = num_layers * sector_size / sizeof(node_t); + std::vector merged_layers(cur_nodes); + + for (size_t i = 0; i < cur_nodes / num_layers; i++) { + merged_layers[i * num_layers] = layer[i]; + } + + for (size_t layer_idx = 1; layer_idx < num_layers; layer_idx++) { + std::string layer_filename = std::string("/sc-02-data-layer-") + + std::to_string(layer_idx + 1) + + std::string(".dat"); + mmap_t layer2; + layer2.mmap_read(layers_cache + layer_filename, (size_t)-1); + + for (size_t i = 0; i < cur_nodes / num_layers; i++) { + merged_layers[i * num_layers + layer_idx] = layer2[i]; + } + } + + printf("Building tree-c for sector size %ld\n", P::GetSectorSize()); + + node_t* leaves = &merged_layers[0]; + + return BuildTreeC(leaves, output_path, pool); + } +}; diff --git a/extern/supraseal/pc1/tree_d.hpp b/extern/supraseal/pc1/tree_d.hpp new file mode 100644 index 000000000..24f6e4b7b --- /dev/null +++ b/extern/supraseal/pc1/tree_d.hpp @@ -0,0 +1,129 @@ +// Copyright Supranational LLC + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../sealing/constants.hpp" +#include "../sealing/sector_parameters.hpp" +#include "../sha/sha_functions.hpp" +#include "../util/mmap_t.hpp" + +template +class TreeD { + public: + TreeD(P params, bool copy) : params_(params), copy_(copy) {} + ~TreeD() {} + + void print_digest_hex(const node_t* node) { + uint8_t* digest = (uint8_t*)node; + for (int i = 0; i < 32; ++i) { + std::cout << std::hex << std::setfill('0') << std::setw(2) + << (uint32_t)digest[i]; + } + std::cout << std::endl; + } + + void HashNode(node_t* result, const node_t* input) { + // Padding is fixed here, always hashing two 32B values + static uint8_t padding_block[64] = {0}; + padding_block[0] = 0x80; + padding_block[62] = 0x2; // 0x200 = 512 bits + + std::memcpy(result, SHA256_INITIAL_DIGEST, sizeof(node_t)); + blst_sha256_block((uint32_t*)result, input, 1); + blst_sha256_block((uint32_t*)result, (node_t*)padding_block, 1); + + blst_sha256_emit((uint8_t*)result, (const uint32_t*)result); + result->limbs[7] &= 0x3FFFFFFF; + } + + void BuildCCTree(node_t* comm_d, std::string tree_d_filename) { + size_t arity = params_.GetNumTreeDArity(); + size_t arity_lg = (size_t) log2(arity); + + // Open tree d + size_t cur_nodes = params_.GetSectorSize() / sizeof(node_t); + size_t tree_d_file_size = ((2 * cur_nodes) - 1) * sizeof(node_t); + mmap_t tree_d; + tree_d.mmap_write(tree_d_filename, tree_d_file_size); + + node_t cc[params_.GetNumTreeDLevels() + 1] = {0}; + node_t buf[2] = {0}; + + for (size_t i = 1; i <= params_.GetNumTreeDLevels(); ++i) { + HashNode(&(cc[i]), &(buf[0])); + std::memcpy(&(buf[0]), &(cc[i]), sizeof(node_t)); + std::memcpy(&(buf[1]), &(cc[i]), sizeof(node_t)); + } + + node_t* tree_ptr = &tree_d[0]; + size_t cur_level = 0; + + while (cur_nodes > 0) { + for (size_t i = 0; i < cur_nodes; ++i) { + std::memcpy(tree_ptr, &(cc[cur_level]), sizeof(node_t)); + tree_ptr++; + } + cur_nodes >>= arity_lg; + cur_level++; + } + + std::memcpy(comm_d, &(cc[params_.GetNumTreeDLevels()]), sizeof(node_t)); + } + + void BuildTree(node_t* comm_d, + std::string tree_d_filename, + std::string data_filename) { + size_t arity = params_.GetNumTreeDArity(); + size_t arity_lg = (size_t) log2(arity); + + // Open Data File + mmap_t data; + data.mmap_read(data_filename); + + // Open tree d + size_t cur_nodes = params_.GetSectorSize() / sizeof(node_t); + size_t tree_d_file_size = (cur_nodes - 1) * sizeof(node_t); + + if (copy_) { + tree_d_file_size += params_.GetSectorSize(); + } + mmap_t tree_d; + tree_d.mmap_write(tree_d_filename, tree_d_file_size); + + node_t* tree_ptr = &tree_d[0]; + node_t* in_ptr = &data[0]; + + // Copy all the data file data into tree_d if asked to + // Adjust pointers + if (copy_) { + std::memcpy(&tree_d[0], &data[0], params_.GetSectorSize()); + tree_ptr = &tree_d[0] + cur_nodes; + in_ptr = &tree_d[0]; + } + + while (cur_nodes > 1) { + node_t* start_tree_ptr = tree_ptr; + for (size_t in_idx = 0; in_idx < cur_nodes; in_idx += arity) { + HashNode(tree_ptr, &(in_ptr[in_idx])); + tree_ptr++; + } + cur_nodes >>= arity_lg; + in_ptr = start_tree_ptr; + } + + std::memcpy(comm_d, in_ptr, sizeof(node_t)); + } + + private: + P params_; + bool copy_; +}; diff --git a/extern/supraseal/pc1/tree_r.hpp b/extern/supraseal/pc1/tree_r.hpp new file mode 100644 index 000000000..5a930c591 --- /dev/null +++ b/extern/supraseal/pc1/tree_r.hpp @@ -0,0 +1,158 @@ +// Copyright Supranational LLC + +#include "tree_builder.hpp" + +// Tree-r builder for Filecoin sealing with optional data encoding +template +class TreeR { +public: + void ElementAdd(uint8_t* out, uint8_t* a, uint8_t* b) { + fr_t a_mont; + a_mont.to(a, 32, true); + + fr_t* out_ptr = (fr_t*)out; + out_ptr->to(b, 32, true); + + *out_ptr += a_mont; + + out_ptr->to_scalar(*((fr_t::pow_t*)out)); + } + + // // TODO: This is no faster - presumably disk IO limited + // void ElementAdd(node_t* out, node_t a, node_t b) { + // a.reverse_l(); + // b.reverse_l(); + // fr_t* fra = (fr_t*)&a; + // fr_t* frb = (fr_t*)&b; + // fr_t* frout = (fr_t*)out; + // *frout = *fra + *frb; + // out->reverse_l(); + // } + + node_t BuildTreeR(node_t* leaves, std::string output_path, + thread_pool_t& pool) { + + size_t last_layer_size = P::GetSectorSize(); + size_t sector_size = last_layer_size / sizeof(node_t); + + TreeBuilder tree_r(2, P::GetNumTreeRCArity(), P::GetNumTreeRDiscardRows()); + size_t elmts = tree_r.size(sector_size / P::GetNumTreeRCFiles(), false); + + node_t final_row[P::GetNumTreeRCFiles()]; + + printf("Building tree-r...\n"); + timestamp_t start = std::chrono::high_resolution_clock::now(); + if (!output_path.empty()) { + const size_t MAX = 256; + char fname[MAX]; + if (P::GetNumTreeRCFiles() > 1) { + const char *tree_r_filename_template = "%s/sc-02-data-tree-r-last-%ld.dat"; + size_t sub_tree_size = sector_size / P::GetNumTreeRCFiles(); + for (size_t i = 0; i < P::GetNumTreeRCFiles(); i++) { + snprintf(fname, MAX, tree_r_filename_template, output_path.c_str(), i); + mmap_t out_file; + out_file.mmap_write(fname, elmts * sizeof(node_t), true); + if (P::GetSectorSizeLg() > 15) { + final_row[i] = tree_r.BuildTree(sub_tree_size, &out_file[0], &leaves[i * sub_tree_size], pool, false); + } + else { + final_row[i] = tree_r.BuildTree(sub_tree_size, &out_file[0], &leaves[i * sub_tree_size], false); + } + } + } else { + const char *tree_r_filename_template = "%s/sc-02-data-tree-r-last.dat"; + snprintf(fname, MAX, tree_r_filename_template, output_path.c_str()); + mmap_t out_file; + out_file.mmap_write(fname, elmts * sizeof(node_t), true); + if (P::GetSectorSizeLg() > 15) { + final_row[0] = tree_r.BuildTree(sector_size, &out_file[0], leaves, pool, false); + } + else { + final_row[0] = tree_r.BuildTree(sector_size, &out_file[0], leaves, false); + } + } + } else { + node_t* store = new node_t[elmts]; + if (P::GetSectorSizeLg() > 15) + (void)tree_r.BuildTree(sector_size, store, leaves, pool, false); + else + (void)tree_r.BuildTree(sector_size, store, leaves, false); + delete [] store; + } + timestamp_t stop = std::chrono::high_resolution_clock::now(); + uint64_t secs = std::chrono::duration_cast< + std::chrono::seconds>(stop - start).count(); + printf("Tree-r took %ld seconds\n", secs); + + if (P::GetNumTreeRCFiles() == 16) { + node_t n2[2], root; + TreeBuilder last(2, 8, 0); + + last.HashNode(&n2[0], &final_row[0]); + last.HashNode(&n2[1], &final_row[8]); + + last.ColHashNode(&root, n2); + return root; + } else if (P::GetNumTreeRCFiles() > 1) { + TreeBuilder last(2, P::GetNumTreeRCFiles(), 0); + node_t root; + last.HashNode(&root, final_row); + return root; + } else { + return final_row[0]; + } + } + + // TODO: templatize this too and don't determine sector size in a roundabout way + // TODO: also do the same for tree C + node_t BuildTreeR(std::string last_layer_filename, std::string data_filename, + std::string output_path, int num_threads = 0) { + thread_pool_t pool(num_threads); + + mmap_t last_layer; + last_layer.mmap_read(last_layer_filename, (size_t)-1); + size_t last_layer_size = last_layer.get_size(); + + printf("Building tree-r for sector size %ld\n", P::GetSectorSize()); + + size_t sector_size = last_layer_size / sizeof(node_t); + + node_t* leaves = &last_layer[0]; + + // Encode the data, if provided + mmap_t sealed_file; + node_t* encoded_leaves = nullptr; + if (!data_filename.empty()) { + size_t num_chunks = pool.size() * 4; + size_t chunk_size = (sector_size + num_chunks - 1) / num_chunks; + + std::string sealed_filename = output_path + "/sealed-file"; + sealed_file.mmap_write(sealed_filename, last_layer_size, true); + encoded_leaves = &sealed_file[0]; + + mmap_t data_file; + data_file.mmap_read(data_filename, last_layer_size); + + printf("Encoding data...\n"); + timestamp_t start = std::chrono::high_resolution_clock::now(); + pool.par_map(num_chunks, [&](size_t chunk) { + size_t start = chunk * chunk_size; + size_t stop = std::min(start + chunk_size, sector_size); + for (size_t i = start; i < stop; ++i) { + ElementAdd((uint8_t*)&(encoded_leaves[i]), + (uint8_t*)&(data_file[i]), + (uint8_t*)&(last_layer[i])); + } + }); + timestamp_t stop = std::chrono::high_resolution_clock::now(); + uint64_t secs = std::chrono::duration_cast< + std::chrono::seconds>(stop - start).count(); + + // Use the encoded data for tree building + leaves = &encoded_leaves[0]; + printf("Encoding took %ld seconds\n", secs); + } + + return BuildTreeR(leaves, output_path, pool); + } +}; diff --git a/extern/supraseal/pc2/README.md b/extern/supraseal/pc2/README.md new file mode 100644 index 000000000..2e0c2535e --- /dev/null +++ b/extern/supraseal/pc2/README.md @@ -0,0 +1,47 @@ +# Pre-Commit 2 + +The Pre-Commit 2 (PC2) phase generates the Poseidon based merkle trees over the columns in the graph and the replica. + +## Intended Usage + +The SupraSeal PC2 functions generate Tree C files, Tree R files, Comm R, and the Replica. The Tree C files are always generated in parallel using the graph layers in NVME. If the sectors are CC sectors, then everything else is generated locally as well using the graph layers. There are options the application can utilize depending on if non-CC sectors have the data local to the sealing operation or are stored remotely. + +```mermaid +flowchart TD; + spc2[Start PC2] --> isCC{is CC?}; + spc2 --> TC; + isCC --> |False| chkL{Data is Local?}; + chkL --> |False| SR; + isCC --> |True| LCC; + chkL --> |True| ER; + + subgraph SupraSeal PC2 Local; + LCC[Local CC] --> WL[Write Last Layer to Filesystem]; + LCC[Local CC] --> TR; + TC[Build Parallel Tree C] --> CR; + TR[Build Parallel Tree R] --> CR; + WR[Write Replica to Filesystem]; + CR[Calculate Comm R]; + ER[Encode Replica K+D] --> WR; + SR[Send to Remote]; + WT --> CR; + ER --> TR; + end; + + ERL --> RR[Replica on Remote Filesystem]; + STR --> rtrf[Tree R File on Remote Filesystem]; + SL --> WT; + TR --> trf[Tree R Files]; + CR --> paux[Write paux to Filesystem]; + TC --> tcf[Tree C Files]; + WR --> Repdisk[Replica on Filesystem]; + WL --> Repdisk; + + subgraph SupraSeal PC2 Remote; + SR --> ERL[Encode Replica K+D]; + ERL --> STR[Build Tree R]; + STR --> SL[Send Root to Local]; + end; + + SR --> WT[Wait for Tree R Root]; +``` diff --git a/extern/supraseal/pc2/cuda/cuda_lambda_t.hpp b/extern/supraseal/pc2/cuda/cuda_lambda_t.hpp new file mode 100644 index 000000000..c38b7b91d --- /dev/null +++ b/extern/supraseal/pc2/cuda/cuda_lambda_t.hpp @@ -0,0 +1,48 @@ +// Copyright Supranational LLC + +#ifndef __CUDA_LAMBDA_T_HPP__ +#define __CUDA_LAMBDA_T_HPP__ + +#if __cplusplus < 201103L && !(defined(_MSVC_LANG) && _MSVC_LANG >= 201103L) +# error C++11 or later is required. +#endif +#include +#include +#include +#include + +// Lambda function execution from a cuda stream. Work scheduled using +// 'schedule' will be inserted into the stream queue and executed +// in a separate thread when triggered. +class cuda_lambda_t { + typedef std::function job_t; + thread_pool_t pool; + + struct work_item_t { + job_t work; + cuda_lambda_t* me; + }; + + static void cb(void *userData) { + work_item_t* work = (work_item_t*)userData; + work->me->pool.spawn([work]() { + work->work(); + delete work; + }); + } + +public: + cuda_lambda_t(size_t num_threads = 0) : + pool(num_threads) + {} + + template + void schedule(stream_t &stream, Workable work) { + work_item_t* new_work = new work_item_t; + new_work->me = this; + new_work->work = work; + cudaLaunchHostFunc(stream, cb, new_work); + } +}; + +#endif diff --git a/extern/supraseal/pc2/cuda/file_writer_t.hpp b/extern/supraseal/pc2/cuda/file_writer_t.hpp new file mode 100644 index 000000000..a92e68b8c --- /dev/null +++ b/extern/supraseal/pc2/cuda/file_writer_t.hpp @@ -0,0 +1,45 @@ +// Copyright Supranational LLC + +#ifndef __FILE_WRITER_T_HPP__ +#define __FILE_WRITER_T_HPP__ + +#include "../../util/mmap_t.hpp" +#include "../../util/file_t.hpp" + +template +class file_writer_t { + file_t file_writer; + mmap_t mmap_writer; + bool use_mmap; + +public: + file_writer_t() {} + + int open(std::string _fname, size_t _size, + bool remove_first = false, bool _use_mmap = true) { + use_mmap = _use_mmap; + if (use_mmap) { + return mmap_writer.mmap_write(_fname, _size, remove_first); + } else { + return file_writer.file_write(_fname, _size, remove_first); + } + } + + void write_data(size_t offset, T* buf, size_t size) { + if (use_mmap) { + mmap_writer.write_data(offset, buf, size); + } else { + file_writer.write_data(offset, buf, size); + } + } + + void advise_random() { + if (use_mmap) { + mmap_writer.advise_random(); + } else { + file_writer.advise_random(); + } + } +}; + +#endif diff --git a/extern/supraseal/pc2/cuda/host_ptr_t.hpp b/extern/supraseal/pc2/cuda/host_ptr_t.hpp new file mode 100644 index 000000000..5322ac7fe --- /dev/null +++ b/extern/supraseal/pc2/cuda/host_ptr_t.hpp @@ -0,0 +1,28 @@ +// Copyright Supranational LLC + +#ifndef __HOST_PTR_T_HPP__ +#define __HOST_PTR_T_HPP__ + +// A simple way to allocate a host pointer without having to +// care about freeing it. +template class host_ptr_t { + T* h_ptr; + size_t nelems; +public: + host_ptr_t(size_t _nelems) : h_ptr(nullptr), nelems(_nelems) + { + if (nelems) { + CUDA_OK(cudaMallocHost(&h_ptr, nelems * sizeof(T))); + } + } + ~host_ptr_t() { if (h_ptr) cudaFreeHost((void*)h_ptr); } + + size_t size() { return nelems; } + inline operator const T*() const { return h_ptr; } + inline operator T*() const { return h_ptr; } + inline operator void*() const { return (void*)h_ptr; } + inline const T& operator[](size_t i) const { return h_ptr[i]; } + inline T& operator[](size_t i) { return h_ptr[i]; } +}; + +#endif diff --git a/extern/supraseal/pc2/cuda/pc2.cu b/extern/supraseal/pc2/cuda/pc2.cu new file mode 100644 index 000000000..12d632fd1 --- /dev/null +++ b/extern/supraseal/pc2/cuda/pc2.cu @@ -0,0 +1,1518 @@ +// Copyright Supranational LLC + +#include "../../poseidon/cuda/poseidon.cu" +#include "../../util/debug_helpers.hpp" +#include "host_ptr_t.hpp" + +#ifndef __CUDA_ARCH__ + +#include +#include +#include +#include + +#include +#include +#include "../planner.cpp" +#include "pc2.cuh" +#include "cuda_lambda_t.hpp" +#include "../../util/util.hpp" + +template +pc2_t::pc2_t(topology_t& _topology, + bool _tree_r_only, streaming_node_reader_t& _reader, + size_t _nodes_to_read, size_t _batch_size, + size_t _stream_count, + const char** _data_filenames, const char* _output_dir) : + topology(_topology), + tree_r_only(_tree_r_only), + reader(_reader), + nodes_to_read(_nodes_to_read), + batch_size(_batch_size), + tree_c_address(C::GetNumNodes() / C::GetNumTreeRCFiles(), + C::GetNumTreeRCArity(), NODE_SIZE, 0), + tree_r_address(C::GetNumNodes() / C::GetNumTreeRCFiles(), + C::GetNumTreeRCArity(), NODE_SIZE, C::GetNumTreeRDiscardRows() + 1), + stream_count(_stream_count), + tree_c_partition_roots(C::PARALLEL_SECTORS * C::GetNumTreeRCFiles()), + tree_r_partition_roots(C::PARALLEL_SECTORS * C::GetNumTreeRCFiles()), + gpu_results_c(tree_r_only ? 0 :_batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity() * stream_count), + gpu_results_r(_batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity() * stream_count), + host_buf_storage(num_host_bufs * batch_size * C::PARALLEL_SECTORS), + data_filenames(_data_filenames), + output_dir(_output_dir) +{ + assert (C::GetNumTreeRCArity() == C::GetNumTreeRCArity()); + assert (nodes_to_read % stream_count == 0); + + open_files(); + + // Compute the final offset in the file for GPU data + const size_t cpu_nodes_to_hash = batch_size * stream_count / C::GetNumTreeRCArity() / C::GetNumTreeRCArity(); + tree_address_t final_tree(cpu_nodes_to_hash, C::GetNumTreeRCArity(), sizeof(fr_t), 0); + final_gpu_offset_c = tree_c_address.data_size() - final_tree.data_size(); + final_gpu_offset_r = tree_r_address.data_size() - final_tree.data_size(); + + // Compute an offset table used for multiple partitions + size_t nodes_per_stream = nodes_to_read / stream_count; + size_t layer_offset = nodes_per_stream; + while (layer_offset >= C::GetNumTreeRCArity()) { + layer_offsets_c.push_back(layer_offset); + layer_offset /= C::GetNumTreeRCArity(); + } + + layer_offset = nodes_per_stream; + for (size_t i = 0; i < C::GetNumTreeRDiscardRows() + 1; i++) { + layer_offset /= C::GetNumTreeRCArity(); + } + while (layer_offset >= C::GetNumTreeRCArity()) { + layer_offsets_r.push_back(layer_offset); + layer_offset /= C::GetNumTreeRCArity(); + } + + if (!tree_r_only) + poseidon_columns.resize(ngpus()); + + // Create GPU poseidon hashers and streams + size_t resource_id = 0; + for (size_t i = 0; i < ngpus(); i++) { + auto& gpu = select_gpu(i); + if (!tree_r_only) { + switch (C::GetNumLayers()) { + case 2: + poseidon_columns[i].arity_2 = new PoseidonCuda<3>(gpu); + break; + case 11: + poseidon_columns[i].arity_11 = new PoseidonCuda<12>(gpu); + break; + default: + assert(false); + } + } + poseidon_trees.push_back(new PoseidonCuda(gpu)); + + for (size_t j = 0; j < stream_count / ngpus(); j++) { + resources.push_back(new gpu_resource_t(resource_id, gpu, + nodes_per_stream, batch_size)); + resource_id++; + } + } + + // Register the SPDK page buffer with the CUDA driver + size_t page_buffer_size = 0; + page_buffer = (uint8_t*)reader.get_full_buffer(page_buffer_size); + cudaHostRegister(page_buffer, page_buffer_size, cudaHostRegisterDefault); + + // Set up host side buffers for returning data + host_bufs.resize(num_host_batches * disk_io_batch_size); + host_batches.resize(num_host_batches + num_host_empty_batches); + host_buf_pool_full.create(num_host_batches + num_host_empty_batches); + host_buf_pool_empty.create(num_host_batches + num_host_empty_batches); + host_buf_to_disk.create(num_host_batches + num_host_empty_batches); + + for (size_t i = 0; i < num_host_batches; i++) { + for (size_t j = 0; j < disk_io_batch_size; j++) { + host_batches[i].batch[j] = &host_bufs[i * disk_io_batch_size + j]; + host_batches[i].batch[j]->data = + &host_buf_storage[i * disk_io_batch_size * batch_size * C::PARALLEL_SECTORS + + j * batch_size * C::PARALLEL_SECTORS]; + } + host_buf_pool_full.enqueue(&host_batches[i]); + } + for (size_t i = 0; i < num_host_empty_batches; i++) { + for (size_t j = 0; j < disk_io_batch_size; j++) { + host_batches[i + num_host_batches].batch[j] = nullptr; + } + host_buf_pool_empty.enqueue(&host_batches[i + num_host_batches]); + } +} + +template +pc2_t::~pc2_t() { + while (resources.size() > 0) { + gpu_resource_t* r = resources.back(); + select_gpu(r->gpu); + + delete r; + resources.pop_back(); + } + for (size_t i = 0; i < ngpus(); i++) { + if (!tree_r_only) { + switch (C::GetNumLayers()) { + case 2: + delete poseidon_columns[i].arity_2; + break; + case 11: + delete poseidon_columns[i].arity_11; + break; + } + } + delete poseidon_trees[i]; + } + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + for (auto it : tree_c_files[i]) { + delete it; + } + for (auto it : tree_r_files[i]) { + delete it; + } + } + cudaHostUnregister(page_buffer); +} +/* +template +void pc2_t::get_filenames(const char* output_dir, + std::vector& directories, + std::vector& p_aux_filenames, + std::vector>& tree_c_filenames, + std::vector>& tree_r_filenames, + std::vector& sealed_filenames) { + // Put layer11 / sealed file in a replicas directory if it exists + std::string pc2_replica_output_dir = output_dir; + pc2_replica_output_dir += "/replicas"; + if (!std::filesystem::exists(pc2_replica_output_dir.c_str())) { + pc2_replica_output_dir = output_dir; + } + + const char* p_aux_template; + if (C::PARALLEL_SECTORS == 1) { + p_aux_template = "%s/p_aux"; + } else { + p_aux_template = "%s/%03ld/p_aux"; + } + // Open all tree-c and tree-r files + const char* tree_c_filename_template; + const char* tree_r_filename_template; + if (C::PARALLEL_SECTORS == 1) { + if (C::GetNumTreeRCFiles() > 1) { + tree_c_filename_template = "%s/sc-02-data-tree-c-%ld.dat"; + tree_r_filename_template = "%s/sc-02-data-tree-r-last-%ld.dat"; + } else { + tree_c_filename_template = "%s/sc-02-data-tree-c.dat"; + tree_r_filename_template = "%s/sc-02-data-tree-r-last.dat"; + } + } else { + if (C::GetNumTreeRCFiles() > 1) { + tree_c_filename_template = "%s/%03ld/sc-02-data-tree-c-%ld.dat"; + tree_r_filename_template = "%s/%03ld/sc-02-data-tree-r-last-%ld.dat"; + } else { + tree_c_filename_template = "%s/%03ld/sc-02-data-tree-c.dat"; + tree_r_filename_template = "%s/%03ld/sc-02-data-tree-r-last.dat"; + } + } + // And sealed files + const char* sealed_filename_template; + if (C::PARALLEL_SECTORS == 1) { + sealed_filename_template = "%s/sealed-file"; + } else { + sealed_filename_template = "%s/%03ld/sealed-file"; + } + + directories.push_back(output_dir); + + tree_c_filenames.resize(C::PARALLEL_SECTORS); + tree_r_filenames.resize(C::PARALLEL_SECTORS); + + const size_t MAX = 256; + char fname[MAX]; + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + // Create sector subdirs + if (C::PARALLEL_SECTORS == 1) { + snprintf(fname, MAX, "%s", output_dir); + } else { + snprintf(fname, MAX, "%s/%03ld", output_dir, i); + } + directories.push_back(fname); + + if (C::PARALLEL_SECTORS == 1) { + snprintf(fname, MAX, p_aux_template, output_dir); + } else { + snprintf(fname, MAX, p_aux_template, output_dir, i); + } + p_aux_filenames.push_back(fname); + + if (C::PARALLEL_SECTORS == 1) { + snprintf(fname, MAX, "%s", pc2_replica_output_dir.c_str()); + } else { + snprintf(fname, MAX, "%s/%03ld", pc2_replica_output_dir.c_str(), i); + } + directories.push_back(fname); + + for (size_t j = 0; j < C::GetNumTreeRCFiles(); j++) { + // tree-c + if (C::PARALLEL_SECTORS == 1) { + if (C::GetNumTreeRCFiles() > 1) { + snprintf(fname, MAX, tree_c_filename_template, output_dir, j); + } else { + snprintf(fname, MAX, tree_c_filename_template, output_dir); + } + } else { + if (C::GetNumTreeRCFiles() > 1) { + snprintf(fname, MAX, tree_c_filename_template, output_dir, i, j); + } else { + snprintf(fname, MAX, tree_c_filename_template, output_dir, i); + } + } + tree_c_filenames[i].push_back(fname); + + // tree-r + if (C::PARALLEL_SECTORS == 1) { + if (C::GetNumTreeRCFiles() > 1) { + snprintf(fname, MAX, tree_r_filename_template, output_dir, j); + } else { + snprintf(fname, MAX, tree_r_filename_template, output_dir); + } + } else { + if (C::GetNumTreeRCFiles() > 1) { + snprintf(fname, MAX, tree_r_filename_template, output_dir, i, j); + } else { + snprintf(fname, MAX, tree_r_filename_template, output_dir, i); + } + } + tree_r_filenames[i].push_back(fname); + } + + // Data files for encoding + if (C::PARALLEL_SECTORS == 1) { + snprintf(fname, MAX, sealed_filename_template, pc2_replica_output_dir.c_str()); + } else { + snprintf(fname, MAX, sealed_filename_template, pc2_replica_output_dir.c_str(), i); + } + sealed_filenames.push_back(fname); + } +} + */ + +template +void pc2_t::get_filenames(const char* output_dir, + std::vector& directories, + std::vector& p_aux_filenames, + std::vector>& tree_c_filenames, + std::vector>& tree_r_filenames, + std::vector& sealed_filenames) { + std::string pc2_replica_output_dir = output_dir; + pc2_replica_output_dir += "/replicas"; + if (!std::filesystem::exists(pc2_replica_output_dir.c_str())) { + pc2_replica_output_dir = output_dir; + } + + if (strncmp(output_dir, "//multi//", 9) == 0) { + const char* custom_paths = output_dir + 9; + parse_custom_paths(custom_paths, directories, p_aux_filenames, tree_c_filenames, tree_r_filenames, sealed_filenames); + } else { + generate_default_paths(output_dir, pc2_replica_output_dir, directories, p_aux_filenames, tree_c_filenames, tree_r_filenames, sealed_filenames); + } +} + +template +void pc2_t::parse_custom_paths(const char* custom_paths, + std::vector& directories, + std::vector& p_aux_filenames, + std::vector>& tree_c_filenames, + std::vector>& tree_r_filenames, + std::vector& sealed_filenames) { + const size_t MAX = 256; + char fname[MAX]; + + tree_c_filenames.resize(C::PARALLEL_SECTORS); + tree_r_filenames.resize(C::PARALLEL_SECTORS); + + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + uint32_t len; + memcpy(&len, custom_paths, sizeof(len)); + custom_paths += sizeof(len); + + std::string replicaPath(custom_paths, len); + custom_paths += len; + snprintf(fname, MAX, "%s", replicaPath.c_str()); + sealed_filenames.push_back(fname); + + memcpy(&len, custom_paths, sizeof(len)); + custom_paths += sizeof(len); + std::string cacheDir(custom_paths, len); + custom_paths += len; + + directories.push_back(cacheDir); + + snprintf(fname, MAX, "%s/p_aux", cacheDir.c_str()); + p_aux_filenames.push_back(fname); + + for (size_t j = 0; j < C::GetNumTreeRCFiles(); j++) { + snprintf(fname, MAX, "%s/sc-02-data-tree-c-%ld.dat", cacheDir.c_str(), j); + tree_c_filenames[i].push_back(fname); //// + + snprintf(fname, MAX, "%s/sc-02-data-tree-r-last-%ld.dat", cacheDir.c_str(), j); + tree_r_filenames[i].push_back(fname); + } + } +} + +template +void pc2_t::generate_default_paths(const char* output_dir, + const std::string& pc2_replica_output_dir, + std::vector& directories, + std::vector& p_aux_filenames, + std::vector>& tree_c_filenames, + std::vector>& tree_r_filenames, + std::vector& sealed_filenames) { + directories.push_back(output_dir); + + tree_c_filenames.resize(C::PARALLEL_SECTORS); + tree_r_filenames.resize(C::PARALLEL_SECTORS); + + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + add_paths_for_sector(output_dir, i, pc2_replica_output_dir, directories, p_aux_filenames, tree_c_filenames, tree_r_filenames, sealed_filenames); + } +} + +template +void pc2_t::add_paths_for_sector(const char* output_dir, + size_t sector, + const std::string& pc2_replica_output_dir, + std::vector& directories, + std::vector& p_aux_filenames, + std::vector>& tree_c_filenames, + std::vector>& tree_r_filenames, + std::vector& sealed_filenames) { + const size_t MAX = 256; + char fname[MAX]; + + const char* p_aux_template = (C::PARALLEL_SECTORS == 1) ? "%s/p_aux" : "%s/%03ld/p_aux"; + const char* tree_c_filename_template = (C::PARALLEL_SECTORS == 1) ? + (C::GetNumTreeRCFiles() > 1 ? "%s/sc-02-data-tree-c-%ld.dat" : "%s/sc-02-data-tree-c.dat") : + (C::GetNumTreeRCFiles() > 1 ? "%s/%03ld/sc-02-data-tree-c-%ld.dat" : "%s/%03ld/sc-02-data-tree-c.dat"); + const char* tree_r_filename_template = (C::PARALLEL_SECTORS == 1) ? + (C::GetNumTreeRCFiles() > 1 ? "%s/sc-02-data-tree-r-last-%ld.dat" : "%s/sc-02-data-tree-r-last.dat") : + (C::GetNumTreeRCFiles() > 1 ? "%s/%03ld/sc-02-data-tree-r-last-%ld.dat" : "%s/%03ld/sc-02-data-tree-r-last.dat"); + const char* sealed_filename_template = (C::PARALLEL_SECTORS == 1) ? "%s/sealed-file" : "%s/%03ld/sealed-file"; + + if (C::PARALLEL_SECTORS == 1) { + snprintf(fname, MAX, "%s", output_dir); + } else { + snprintf(fname, MAX, "%s/%03ld", output_dir, sector); + } + directories.push_back(fname); + + if (C::PARALLEL_SECTORS == 1) { + snprintf(fname, MAX, p_aux_template, output_dir); + } else { + snprintf(fname, MAX, p_aux_template, output_dir, sector); + } + p_aux_filenames.push_back(fname); + + if (C::PARALLEL_SECTORS == 1) { + snprintf(fname, MAX, "%s", pc2_replica_output_dir.c_str()); + } else { + snprintf(fname, MAX, "%s/%03ld", pc2_replica_output_dir.c_str(), sector); + } + directories.push_back(fname); + + for (size_t j = 0; j < C::GetNumTreeRCFiles(); j++) { + if (C::PARALLEL_SECTORS == 1) { + snprintf(fname, MAX, tree_c_filename_template, output_dir, j); + } else { + snprintf(fname, MAX, tree_c_filename_template, output_dir, sector, j); + } + tree_c_filenames[sector].push_back(fname); + + if (C::PARALLEL_SECTORS == 1) { + snprintf(fname, MAX, tree_r_filename_template, output_dir, j); + } else { + snprintf(fname, MAX, tree_r_filename_template, output_dir, sector, j); + } + tree_r_filenames[sector].push_back(fname); + } + + if (C::PARALLEL_SECTORS == 1) { + snprintf(fname, MAX, sealed_filename_template, pc2_replica_output_dir.c_str()); + } else { + snprintf(fname, MAX, sealed_filename_template, pc2_replica_output_dir.c_str(), sector); + } + sealed_filenames.push_back(fname); +} + +template +void pc2_t::open_files() { + std::vector directories; + std::vector> tree_c_filenames; + std::vector> tree_r_filenames; + std::vector sealed_filenames; + + get_filenames(output_dir, + directories, + p_aux_filenames, + tree_c_filenames, + tree_r_filenames, + sealed_filenames); + + for (auto it : directories) { + if (!std::filesystem::exists(it)) { + std::filesystem::create_directory(it); + } + } + has_cc_sectors = false; + has_non_cc_sectors = false; + + size_t num_tree_files = C::GetNumTreeRCFiles(); + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + if (!tree_r_only) { + tree_c_files[i].resize(num_tree_files); + } + tree_r_files[i].resize(num_tree_files); + for (size_t j = 0; j < num_tree_files; j++) { + // tree-c + if (!tree_r_only) { + tree_c_files[i][j] = new file_writer_t(); + assert(tree_c_files[i][j]->open(tree_c_filenames[i][j], + tree_c_address.data_size(), true, false) == 0); + tree_c_files[i][j]->advise_random(); + } + + // tree-r + tree_r_files[i][j] = new file_writer_t(); + assert(tree_r_files[i][j]->open(tree_r_filenames[i][j], + tree_r_address.data_size(), true, false) == 0); + tree_r_files[i][j]->advise_random(); + } + + // Data files for encoding + if (data_filenames != nullptr && data_filenames[i] != nullptr) { + data_files[i].mmap_read(data_filenames[i], C::GetSectorSize()); + // If there is a data file present we will encode layer 11 and write the + // sealed data + assert(sealed_files[i].open(sealed_filenames[i], C::GetSectorSize(), true, false) == 0); + has_non_cc_sectors = true; + } else { + // Write the raw layer 11 data + // It would be nice to write different files for encoded vs not encoded data but in + // reality we can't differentiate between CC and sectors that will use remote data. + // So we write them all to 'sealed_data' here. + assert(sealed_files[i].open(sealed_filenames[i], C::GetSectorSize(), true, false) == 0); + has_cc_sectors = true; + } + } +} + +template +void pc2_t::hash() { + thread_pool_t pool(1); + pool.spawn([&]() { + // Affinitize the thread in the pool + set_core_affinity(topology.pc2_hasher_cpu); + }); + + // Use a channel to prevent the GPU from racing ahead of the CPU + channel_t ch; + ch.send(-1); + + host_buffer_t cpu_input_c(gpu_results_c.size()); + host_buffer_t cpu_input_r(gpu_results_r.size()); + + auto start = std::chrono::high_resolution_clock::now(); + for (size_t partition = 0; partition < C::GetNumTreeRCFiles(); partition++) { + auto pstart_gpu = std::chrono::high_resolution_clock::now(); + hash_gpu(partition); + auto pstop_gpu = std::chrono::high_resolution_clock::now(); + + gpu_results_in_use.lock(); + ch.recv(); + pool.spawn([&, partition]() { + // Protect against a race condition for gpu_results where if the CPU hashing + // is slow relative to the GPU the results could be overwritten before they are + // used. + memcpy(&cpu_input_c[0], &gpu_results_c[0], gpu_results_c.size() * sizeof(fr_t)); + memcpy(&cpu_input_r[0], &gpu_results_r[0], gpu_results_r.size() * sizeof(fr_t)); + + gpu_results_in_use.unlock(); + + if (!tree_r_only) { + hash_cpu(&tree_c_partition_roots[partition * C::PARALLEL_SECTORS], + partition, &(cpu_input_c[0]), tree_c_files, final_gpu_offset_c); + } + hash_cpu(&tree_r_partition_roots[partition * C::PARALLEL_SECTORS], + partition, &(cpu_input_r[0]), tree_r_files, final_gpu_offset_r); + ch.send(partition); + }); + auto pstop_cpu = std::chrono::high_resolution_clock::now(); + uint64_t secs_gpu = std::chrono::duration_cast< + std::chrono::seconds>(pstop_gpu - pstart_gpu).count(); + uint64_t secs_cpu = std::chrono::duration_cast< + std::chrono::seconds>(pstop_cpu - pstop_gpu).count(); + printf("Partition %ld took %ld seconds (gpu %ld, cpu %ld)\n", + partition, secs_gpu + secs_cpu, secs_gpu, secs_cpu); + } + ch.recv(); + write_roots(&tree_c_partition_roots[0], &tree_r_partition_roots[0]); + auto stop = std::chrono::high_resolution_clock::now(); + uint64_t secs = std::chrono::duration_cast< + std::chrono::seconds>(stop - start).count(); + + size_t total_page_reads = nodes_to_read * C::GetNumTreeRCFiles() / + C::NODES_PER_PAGE * C::GetNumLayers(); + printf("pc2 took %ld seconds utilizing %0.1lf iOPS\n", + secs, (double)total_page_reads / (double)secs); +} + +template +void pc2_t::process_writes(int core, size_t max_write_size, + mtx_fifo_t& to_disk_fifo, + mtx_fifo_t& pool, + std::atomic& terminate, + std::atomic& disk_writer_done) { + set_core_affinity(core); + fr_t* staging = new fr_t[max_write_size]; + + size_t count = 0; + while(!terminate || to_disk_fifo.size() > 0) { + if (pool.is_full()) { + continue; + } + + buf_to_disk_batch_t* to_disk_batch = to_disk_fifo.dequeue(); + if (to_disk_batch != nullptr) { +#ifndef DISABLE_FILE_WRITES + for (size_t batch_elmt = 0; batch_elmt < disk_io_batch_size; batch_elmt++) { + buf_to_disk_t* to_disk = to_disk_batch->batch[batch_elmt]; + if (to_disk == nullptr || to_disk->size == 0) { + continue; + } + // printf("Writing batch element %ld stride %ld size %ld %p\n", + // batch_elmt, to_disk->stride, to_disk->size, to_disk->data); + if (to_disk->stride == 1) { + // Copy chunks of contiguous data + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + if (to_disk->src[i] != nullptr) { + // printf("Writing from %p to %p offset %ld size %ld\n", + // to_disk->src[i], to_disk->dst[i], to_disk->offset, to_disk->size); + to_disk->dst[i]->write_data(to_disk->offset, to_disk->src[i], to_disk->size); + } + } + } else { + // Copy strided src data + assert (max_write_size <= to_disk->size); + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + if (to_disk->src[i] != nullptr) { + for (size_t j = 0; j < to_disk->size; j++) { + staging[j] = to_disk->src[i][j * to_disk->stride]; + if (to_disk->reverse) { + node_t *n = (node_t*)&staging[j]; + n->reverse_l(); + } + } + to_disk->dst[i]->write_data(to_disk->offset, staging, to_disk->size); + } + } + } + } +#endif + // count++; + pool.enqueue(to_disk_batch); + } + } + delete [] staging; + disk_writer_done--; +} + +template +struct pc2_batcher_t { + typedef typename pc2_t::buf_to_disk_batch_t buf_to_disk_batch_t; + + buf_to_disk_batch_t* unbundle; + buf_to_disk_batch_t* bundle; + mtx_fifo_t& to_disk; + mtx_fifo_t& pool_full; + mtx_fifo_t& pool_empty; + size_t idx_unbundle; + size_t idx_bundle; + std::mutex mtx; + + pc2_batcher_t(mtx_fifo_t& _pool_full, + mtx_fifo_t& _pool_empty, + mtx_fifo_t& _to_disk) + : pool_full(_pool_full), pool_empty(_pool_empty), to_disk(_to_disk) + { + unbundle = pool_full.dequeue(); + bundle = pool_empty.dequeue(); + assert (unbundle != nullptr); + assert (bundle != nullptr); + idx_unbundle = 0; + idx_bundle = 0; + } + + ~pc2_batcher_t() { + flush(); + } + + void flush() { + std::unique_lock lock(mtx); + // Issue any partially bundles writes + assert (idx_bundle == idx_unbundle); + if (idx_bundle > 0) { + while (idx_bundle < buf_to_disk_batch_t::BATCH_SIZE) { + unbundle->batch[idx_unbundle]->size = 0; + bundle->batch[idx_bundle] = unbundle->batch[idx_unbundle++]; + idx_bundle++; + idx_unbundle++; + } + to_disk.enqueue(bundle); + pool_empty.enqueue(unbundle); + } else { + // Untouched bundle/unbundle batches + if (bundle != nullptr) { + pool_empty.enqueue(bundle); + } + if (unbundle != nullptr) { + pool_full.enqueue(bundle); + } + } + bundle = nullptr; + unbundle = nullptr; + idx_unbundle = 0; + idx_bundle = 0; + } + + buf_to_disk_t* dequeue() { + std::unique_lock lock(mtx); + if (unbundle == nullptr) { + unbundle = pool_full.dequeue(); + if (unbundle == nullptr) { + return nullptr; + } + } + buf_to_disk_t* buf = unbundle->batch[idx_unbundle++]; + if (idx_unbundle == buf_to_disk_batch_t::BATCH_SIZE) { + pool_empty.enqueue(unbundle); + unbundle = nullptr; + idx_unbundle = 0; + } + return buf; + } + + bool enqueue(buf_to_disk_t* buf) { + std::unique_lock lock(mtx); + if (bundle == nullptr) { + bundle = pool_empty.dequeue(); + if (bundle == nullptr) { + //return false; + assert(false); + } + } + bundle->batch[idx_bundle++] = buf; + if (idx_bundle == buf_to_disk_batch_t::BATCH_SIZE) { + to_disk.enqueue(bundle); + bundle = nullptr; + idx_bundle = 0; + } + return true; + } + + size_t size() { + std::unique_lock lock(mtx); + return std::min + (// Available buffer slots to store data + (unbundle == nullptr ? 0 : (buf_to_disk_batch_t::BATCH_SIZE - idx_unbundle)) + + pool_full.size() * buf_to_disk_batch_t::BATCH_SIZE, + + // Available empty buffer slots + (bundle == nullptr ? 0 : (buf_to_disk_batch_t::BATCH_SIZE - idx_bundle)) + + pool_empty.size() * buf_to_disk_batch_t::BATCH_SIZE); + } +}; + + +template +void pc2_t::hash_gpu(size_t partition) { + assert (stream_count % ngpus() == 0); + + nodes_per_stream = nodes_to_read / stream_count; + + for (size_t i = 0; i < resources.size(); i++) { + resources[i]->reset(); + } + + // Start a thread to process writes to disk + std::atomic terminate = false; + const size_t num_writers = (size_t)this->topology.pc2_writer_cores; + thread_pool_t pool(num_writers); + std::atomic disk_writer_done(num_writers); + for (size_t i = 0; i < num_writers; i++) { + pool.spawn([this, &terminate, &disk_writer_done, i]() { + process_writes(this->topology.pc2_writer + i, batch_size, + host_buf_to_disk, host_buf_pool_full, + terminate, disk_writer_done); + }); + } + pc2_batcher_t disk_batcher(host_buf_pool_full, host_buf_pool_empty, host_buf_to_disk); + + bool all_done = false; + cuda_lambda_t cuda_notify(1); + in_ptrs_d in_d; + buf_to_disk_t* to_disk = nullptr; + buf_to_disk_t* to_disk_r = nullptr; + fr_t* fr = nullptr; + size_t disk_bufs_needed = 0; + + // printf("to_disk_fifo %ld, pool_full %ld, pool_empty %ld\n", + // host_buf_to_disk.size(), host_buf_pool_full.size(), host_buf_pool_empty.size()); + + //size_t num_writes = 0; + + // auto start = std::chrono::high_resolution_clock::now(); + while (!all_done) { + // auto now = std::chrono::high_resolution_clock::now(); + // uint64_t secs = std::chrono::duration_cast< + // std::chrono::seconds>(now - start).count(); + // if (secs > 60) { + // printf("to_disk_fifo %ld, pool_full %ld, pool_empty %ld\n", + // host_buf_to_disk.size(), host_buf_pool_full.size(), host_buf_pool_empty.size()); + // for (size_t resource_num = 0; resource_num < resources.size(); resource_num++) { + // printf("resource %ld state %d\n", resource_num, (int)resources[resource_num]->state); + // } + // start = now; + // } + + all_done = true; + for (size_t resource_num = 0; resource_num < resources.size(); resource_num++) { + gpu_resource_t& resource = *resources[resource_num]; + select_gpu(resource.gpu); + int gpu_id = resource.gpu.id(); + + if (resource.state != ResourceState::DONE) { + all_done = false; + } + + fr_t* out_c_d = nullptr; + fr_t* out_r_d = nullptr; + size_t layer_offset; + node_id_t addr; + size_t offset_c; + size_t offset_r; + bool write_tree_r; + bool write_tree_c; + + // Device storage for the hash result + if (resource.work_c.buf != nullptr) { + out_c_d = &(*resource.work_c.buf)[0]; + out_r_d = &(*resource.work_r.buf)[0]; + } + + switch (resource.state) { + case ResourceState::DONE: + // Nothing + break; + + case ResourceState::IDLE: + // Initiate data read + resource.last = !resource.scheduler_c.next([](work_item_t& w) {}, + &resource.work_c); + resource.scheduler_r.next([](work_item_t& w) {}, + &resource.work_r); + if (resource.work_c.is_leaf) { +#ifdef DISABLE_FILE_READS + resource.state = ResourceState::HASH_COLUMN; + resource.column_data = (fr_t*)reader.get_slot(resource.id); +#else + resource.state = ResourceState::DATA_READ; +#endif + } else { + resource.state = ResourceState::HASH_LEAF; + } + break; + + case ResourceState::DATA_READ: + // Initiate the next data read + resource.start_node = (// Perform batch_size nodes in parallel + (uint64_t)resource.work_c.idx.node() * batch_size + + // Each resource (GPU stream) works on a differet nodes_per_stream chunk + nodes_per_stream * resource.id + + // Each partition is size nodes_to_read + partition * nodes_to_read); + resource.column_data = (fr_t*)reader.load_layers + (resource.id, + tree_r_only ? C::GetNumLayers() - 1 : 0, // start layer + resource.start_node, batch_size, + tree_r_only ? 1 : C::GetNumLayers(), // num_layers + &resource.valid, &resource.valid_count); + resource.state = ResourceState::DATA_WAIT; + break; + + case ResourceState::DATA_WAIT: + if (resource.valid.load() == resource.valid_count) { + if (disk_batcher.size() < 1) { + break; + } + to_disk = disk_batcher.dequeue(); + assert (to_disk != nullptr); + + fr_t* encode_buf = &resource.replica_data[0]; + + // Copy layer 11 data to to_disk buffer for encoding/writing + // If only building tree-r then only the last layer is present + fr_t* layer11; + if (tree_r_only) { + layer11 = &resource.column_data[0]; + } else { + layer11 = &resource.column_data[C::PARALLEL_SECTORS * + (C::GetNumLayers() - 1) * batch_size]; + } + memcpy(encode_buf, layer11, + C::PARALLEL_SECTORS * batch_size * sizeof(fr_t)); + + // Encode non CC sectors + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + if (data_files[i].is_open()) { + for (size_t j = 0; j < batch_size; j++) { + // Perform the field add without moving to Montgomery space + fr_t data = data_files[i][resource.start_node + j]; + fr_t* elmt = &encode_buf[i + j * C::PARALLEL_SECTORS]; + node_t* n = (node_t*)elmt; + if (!reader.data_is_big_endian()) { + n->reverse_l(); + } + *elmt += data; + if (!reader.data_is_big_endian()) { + n->reverse_l(); + } + } + } + } + + // Prepare write pointers + to_disk->size = batch_size; + to_disk->stride = C::PARALLEL_SECTORS; + to_disk->reverse = true; + to_disk->offset = resource.start_node; + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + to_disk->src[i] = &to_disk->data[i]; + to_disk->dst[i] = &sealed_files[i]; + } + + // Copy the encoded replica data into the disk buffer + memcpy(&to_disk->data[0], + &resource.replica_data[0], + batch_size * C::PARALLEL_SECTORS * sizeof(fr_t)); + + assert(disk_batcher.enqueue(to_disk)); + if (tree_r_only) { + resource.state = ResourceState::HASH_COLUMN_LEAVES; + } else { + resource.state = ResourceState::HASH_COLUMN; + } + } + break; + + case ResourceState::HASH_COLUMN: + if (disk_batcher.size() < 1) { + break; + } + to_disk = disk_batcher.dequeue(); + assert (to_disk != nullptr); + + resource.stream.HtoD(&resource.column_data_d[0], resource.column_data, resource.batch_elements); + + // Hash the columns + switch (C::GetNumLayers()) { + case 2: + poseidon_columns[gpu_id].arity_2->hash_batch_device + (out_c_d, &resource.column_data_d[0], &resource.aux_d[0], + batch_size * C::PARALLEL_SECTORS, C::PARALLEL_SECTORS, + resource.stream, true, false, true, true, + !reader.data_is_big_endian()); + break; + case 11: + poseidon_columns[gpu_id].arity_11->hash_batch_device + (out_c_d, &resource.column_data_d[0], &resource.aux_d[0], + batch_size * C::PARALLEL_SECTORS, C::PARALLEL_SECTORS, + resource.stream, true, false, true, true, + !reader.data_is_big_endian()); + break; + default: + assert(false); + } + + // Initiate copy of the hashed data from GPU + fr = to_disk->data; + resource.stream.DtoH(fr, out_c_d, batch_size * C::PARALLEL_SECTORS); + + // Initiate transfer of tree-c data to files + layer_offset = layer_offsets_c[resource.work_c.idx.layer() - 1]; + addr = node_id_t(resource.work_c.idx.layer() - 1, + resource.work_c.idx.node() * batch_size + layer_offset * resource_num); + offset_c = tree_c_address.address(addr); + to_disk->size = batch_size; + to_disk->stride = 1; + to_disk->reverse = false; + to_disk->offset = offset_c / sizeof(fr_t); + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + //to_disk->dst[i] = (fr_t*)&tree_c_files[i][partition][offset_c]; + to_disk->dst[i] = tree_c_files[i][partition]; + to_disk->src[i] = &to_disk->data[i * batch_size]; + // printf("Initiate column write[%ld] from %p to %p offset %ld size %ld\n", + // i, to_disk->src[i], to_disk->dst[i], to_disk->offset, to_disk->size); + } + //num_writes++; + + resources[resource_num]->async_done = false; + cuda_notify.schedule(resource.stream, [this, resource_num, offset_c, + to_disk, &disk_batcher]() { + assert(disk_batcher.enqueue(to_disk)); + resources[resource_num]->async_done = true; + }); + + resource.state = ResourceState::HASH_COLUMN_LEAVES; + break; + + case ResourceState::HASH_COLUMN_LEAVES: + if (!resources[resource_num]->async_done) { + break; + } + if (!tree_r_only) { + if (disk_batcher.size() < 1) { + break; + } + to_disk = disk_batcher.dequeue(); + assert (to_disk != nullptr); + + // Hash tree-c + poseidon_trees[gpu_id]->hash_batch_device + (out_c_d, out_c_d, &resource.aux_d[0], + batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity(), 1, + resource.stream, false, false, true, true, + !reader.data_is_big_endian()); + } + + // Hash tree-r using the replica data. If there are any non-CC + // sectors then copy the encoded replica data over + if (has_non_cc_sectors || tree_r_only) { + resource.stream.HtoD + (&resource.column_data_d[batch_size * C::PARALLEL_SECTORS * (C::GetNumLayers() - 1)], + &resource.replica_data[0], C::PARALLEL_SECTORS * batch_size); + } + poseidon_trees[gpu_id]->hash_batch_device + (out_r_d, + &resource.column_data_d[batch_size * C::PARALLEL_SECTORS * (C::GetNumLayers() - 1)], + &resource.aux_d[0], + batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity(), + C::PARALLEL_SECTORS, + resource.stream, false, true, true, true, + !reader.data_is_big_endian()); + + if (!tree_r_only) { + // Initiate copy of the hashed data from GPU, reusing the host side column buffer + resource.stream.DtoH(&to_disk->data[0], out_c_d, + batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity()); + + // Initiate transfer of tree-c data to files + layer_offset = layer_offsets_c[resource.work_c.idx.layer()]; + addr = node_id_t(resource.work_c.idx.layer(), + resource.work_c.idx.node() * batch_size / C::GetNumTreeRCArity() + + layer_offset * resource_num); + offset_c = tree_c_address.address(addr); + to_disk->size = batch_size / C::GetNumTreeRCArity(); + to_disk->stride = 1; + to_disk->reverse = false; + to_disk->offset = offset_c / sizeof(fr_t); + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + to_disk->dst[i] = tree_c_files[i][partition]; + to_disk->src[i] = &to_disk->data[i * batch_size / C::GetNumTreeRCArity()]; + // printf("Initiate column leaf write from %p to %p offset %ld size %ld\n", + // to_disk->src[i], to_disk->dst[i], to_disk->offset, to_disk->size); + } + } + + resources[resource_num]->async_done = false; + cuda_notify.schedule(resource.stream, [this, resource_num, to_disk, &disk_batcher]() { + if (!tree_r_only) { + assert (disk_batcher.enqueue(to_disk)); + } + resources[resource_num]->async_done = true; + }); + + resource.state = ResourceState::HASH_WAIT; + break; + + case ResourceState::HASH_LEAF: + write_tree_c = !tree_r_only; + write_tree_r = resource.work_r.idx.layer() > C::GetNumTreeRDiscardRows(); + disk_bufs_needed = write_tree_c + write_tree_r; + if (disk_batcher.size() < disk_bufs_needed) { + break; + } + if (resource.last && !gpu_results_in_use.try_lock()) { + break; + } + if (!tree_r_only) { + if (write_tree_c) { + to_disk = disk_batcher.dequeue(); + assert (to_disk != nullptr); + } + + // Hash tree-c + for (size_t i = 0; i < C::GetNumTreeRCArity(); i++) { + in_d.ptrs[i] = &(*resource.work_c.inputs[i])[0]; + } + + poseidon_trees[gpu_id]->hash_batch_device_ptrs + (out_c_d, in_d, &resource.aux_d[0], + batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity(), + C::PARALLEL_SECTORS, + resource.stream, false, false, true, true, + !reader.data_is_big_endian()); + } + + // Hash tree-r + for (size_t i = 0; i < C::GetNumTreeRCArity(); i++) { + in_d.ptrs[i] = &(*resource.work_r.inputs[i])[0]; + } + poseidon_trees[gpu_id]->hash_batch_device_ptrs + (out_r_d, in_d, &resource.aux_d[0], + batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity(), + C::PARALLEL_SECTORS, + resource.stream, false, false, true, true, + !reader.data_is_big_endian()); + + if (!tree_r_only) { + // Initiate copy of the hashed data + resource.stream.DtoH(&to_disk->data[0], out_c_d, + batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity()); + if (resource.last) { + // Stash the final result in a known place + size_t stride = batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity(); + fr_t* host_buf_c = (fr_t*)&gpu_results_c[resource.id * stride]; + CUDA_OK(cudaMemcpyAsync(host_buf_c, &to_disk->data[0], + batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity() * sizeof(fr_t), + cudaMemcpyHostToHost, resource.stream)); + } + + // Compute offsets in the output files - tree-c + layer_offset = layer_offsets_c[resource.work_c.idx.layer()]; + addr = node_id_t(resource.work_c.idx.layer(), + resource.work_c.idx.node() * batch_size / C::GetNumTreeRCArity() + + layer_offset * resource_num); + offset_c = tree_c_address.address(addr); + to_disk->size = batch_size / C::GetNumTreeRCArity(); + to_disk->stride = 1; + to_disk->reverse = false; + to_disk->offset = offset_c / sizeof(fr_t); + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + //to_disk->dst[i] = (fr_t*)&tree_c_files[i][partition][offset_c]; + to_disk->dst[i] = tree_c_files[i][partition]; + to_disk->src[i] = &to_disk->data[i * batch_size / C::GetNumTreeRCArity()]; + // printf("Initiate tree-c write from %p to %p offset %ld size %ld\n", + // to_disk->src[i], to_disk->dst[i], to_disk->offset, to_disk->size); + } + } + + // tree-r + if (resource.last) { + // Stash the final result in a known place + size_t stride = batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity(); + fr_t* host_buf_r = (fr_t*)&gpu_results_r[resource.id * stride]; + resource.stream.DtoH(host_buf_r, out_r_d, + batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity()); + } + + if (write_tree_r) { + to_disk_r = disk_batcher.dequeue(); + assert (to_disk_r != nullptr); + resource.stream.DtoH(&to_disk_r->data[0], out_r_d, + batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity()); + + layer_offset = layer_offsets_r[resource.work_r.idx.layer() - C::GetNumTreeRDiscardRows() - 1]; + addr = node_id_t(resource.work_r.idx.layer() - C::GetNumTreeRDiscardRows() - 1, + resource.work_r.idx.node() * batch_size / C::GetNumTreeRCArity() + + layer_offset * resource_num); + offset_r = tree_r_address.address(addr); + to_disk_r->size = batch_size / C::GetNumTreeRCArity(); + to_disk_r->stride = 1; + to_disk_r->reverse = false; + to_disk_r->offset = offset_r / sizeof(fr_t); + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + //to_disk_r->dst[i] = (fr_t*)&tree_r_files[i][partition][offset_r]; + to_disk_r->dst[i] = tree_r_files[i][partition]; + to_disk_r->src[i] = &to_disk_r->data[i * batch_size / C::GetNumTreeRCArity()]; + // printf("Initiate tree-r write from %p to %p offset %ld size %ld\n", + // to_disk->src[i], to_disk->dst[i], to_disk->offset, to_disk->size); + } + } + + // Initiate transfer of data to files + resources[resource_num]->async_done = false; + cuda_notify.schedule(resource.stream, [this, resource_num, &disk_batcher, + to_disk, to_disk_r, write_tree_r, write_tree_c]() { + if (resources[resource_num]->last) { + gpu_results_in_use.unlock(); + } + if (write_tree_c) { + assert(disk_batcher.enqueue(to_disk)); + } + if (write_tree_r) { + assert(disk_batcher.enqueue(to_disk_r)); + } + resources[resource_num]->async_done = true; + }); + + resource.state = ResourceState::HASH_WAIT; + break; + + case ResourceState::HASH_WAIT: + if (resource.async_done.load() == true) { + if (resource.last) { + resource.state = ResourceState::DONE; + } else { + resource.state = ResourceState::IDLE; + } + } + break; + + default: + abort(); + } + } + } + for (size_t resource_num = 0; resource_num < stream_count; resource_num++) { + resources[resource_num]->stream.sync(); + } + disk_batcher.flush(); + + terminate = true; + + // Really only need this at the last partition... + while (disk_writer_done > 0) {} + + //printf("num_writes %ld\n", num_writes); +} + +template +void pc2_t::hash_cpu(fr_t* roots, size_t partition, fr_t* input, + std::vector*>* tree_files, + size_t file_offset) { + // This count is one layer above the leaves + const size_t nodes_to_hash = batch_size * stream_count / C::GetNumTreeRCArity() / C::GetNumTreeRCArity(); + // Number of consecutive nodes in the input stream + const size_t group_size = batch_size / C::GetNumTreeRCArity(); + // For simplicity of indexing require batch size to be a multiple of arity + assert (group_size % C::GetNumTreeRCArity() == 0); + + tree_address_t final_tree(nodes_to_hash, C::GetNumTreeRCArity(), sizeof(fr_t), 0); + + Poseidon hasher(C::GetNumTreeRCArity()); + + auto hash_func = [this, &hasher, &final_tree, input, partition, tree_files, file_offset, group_size] + (work_item_t& w) { + node_id_t addr(w.idx.layer() - 1, w.idx.node()); + size_t offset = final_tree.address(addr) + file_offset; + + if (w.is_leaf) { + for (size_t sector = 0; sector < C::PARALLEL_SECTORS; sector++) { + fr_t* out = &(*w.buf)[sector]; + fr_t in[C::GetNumTreeRCArity()]; + + size_t first_input_node = w.idx.node() * C::GetNumTreeRCArity(); + for (size_t i = 0; i < C::GetNumTreeRCArity(); i++) { + size_t input_group = (first_input_node + i) / group_size; + size_t node_in_group = (first_input_node + i) % group_size; + + in[i] = input[input_group * group_size * C::PARALLEL_SECTORS + + sector * group_size + node_in_group]; + } + hasher.Hash((uint8_t*)out, (uint8_t*)in); + tree_files[sector][partition]->write_data(offset / sizeof(fr_t), &out[0], 1); + } + } else { + for (size_t sector = 0; sector < C::PARALLEL_SECTORS; sector++) { + fr_t* out = &(*w.buf)[sector]; + fr_t in[C::GetNumTreeRCArity()]; + for (size_t i = 0; i < C::GetNumTreeRCArity(); i++) { + in[i] = (*w.inputs[i])[sector]; + } + hasher.Hash((uint8_t*)out, (uint8_t*)in); + tree_files[sector][partition]->write_data(offset / sizeof(fr_t), (fr_t*)&out[0], 1); + } + } + }; + + buffers_t buffers(C::PARALLEL_SECTORS); + scheduler_t scheduler(nodes_to_hash, C::GetNumTreeRCArity(), buffers); + host_buffer_t* host_buf = scheduler.run(hash_func); + memcpy(roots, &(*host_buf)[0], sizeof(fr_t) * C::PARALLEL_SECTORS); + assert (scheduler.is_done()); +} + +template +void pc2_t::write_roots(fr_t* roots_c, fr_t* roots_r) { + if (C::GetNumTreeRCFiles() > 1) { + Poseidon hasher = C::GetNumTreeRCFiles() == 16 ? + Poseidon(2) : Poseidon(C::GetNumTreeRCFiles()); + Poseidon hasher8(8); + + for (size_t sector = 0; sector < C::PARALLEL_SECTORS; sector++) { + fr_t in[C::GetNumTreeRCFiles()]; + fr_t out_c; + if (!tree_r_only) { + for (size_t i = 0; i < C::GetNumTreeRCFiles(); i++) { + in[i] = roots_c[i * C::PARALLEL_SECTORS + sector]; + } + if (C::GetNumTreeRCFiles() == 16) { + hasher8.Hash((uint8_t*)&in[0], (uint8_t*)&in[0]); + hasher8.Hash((uint8_t*)&in[1], (uint8_t*)&in[8]); + } + + hasher.Hash((uint8_t*)&out_c, (uint8_t*)in); + } + + fr_t out_r; + for (size_t i = 0; i < C::GetNumTreeRCFiles(); i++) { + in[i] = roots_r[i * C::PARALLEL_SECTORS + sector]; + } + if (C::GetNumTreeRCFiles() == 16) { + hasher8.Hash((uint8_t*)&in[0], (uint8_t*)&in[0]); + hasher8.Hash((uint8_t*)&in[1], (uint8_t*)&in[8]); + } + hasher.Hash((uint8_t*)&out_r, (uint8_t*)in); + + int p_aux = open(p_aux_filenames[sector].c_str(), O_RDWR | O_CREAT, (mode_t)0664); + assert (p_aux != -1); + if (tree_r_only) { + fr_t zero; + zero.zero(); + assert (write(p_aux, &zero, sizeof(fr_t)) == sizeof(fr_t)); + } else { + assert (write(p_aux, &out_c, sizeof(fr_t)) == sizeof(fr_t)); + } + assert (write(p_aux, &out_r, sizeof(fr_t)) == sizeof(fr_t)); + close(p_aux); + } + } else { + for (size_t sector = 0; sector < C::PARALLEL_SECTORS; sector++) { + fr_t out_c = roots_c[sector]; + fr_t out_r = roots_r[sector]; + + int p_aux = open(p_aux_filenames[sector].c_str(), O_RDWR | O_CREAT, (mode_t)0664); + assert (p_aux != -1); + if (tree_r_only) { + fr_t zero; + zero.zero(); + assert (write(p_aux, &zero, sizeof(fr_t)) == sizeof(fr_t)); + } else { + assert (write(p_aux, &out_c, sizeof(fr_t)) == sizeof(fr_t)); + } + assert (write(p_aux, &out_r, sizeof(fr_t)) == sizeof(fr_t)); + close(p_aux); + } + } +} + +template +void pc2_hash(topology_t& topology, + bool tree_r_only, + streaming_node_reader_t& reader, + size_t nodes_to_read, size_t batch_size, + size_t stream_count, + const char** data_filenames, const char* output_dir) { + pc2_t pc2(topology, tree_r_only, reader, nodes_to_read, batch_size, stream_count, + data_filenames, output_dir); + pc2.hash(); +} + +template +void do_pc2_cleanup(const char* output_dir) { + std::vector directories; + std::vector p_aux_filenames; + std::vector> tree_c_filenames; + std::vector> tree_r_filenames; + std::vector sealed_filenames; + + pc2_t::get_filenames(output_dir, + directories, + p_aux_filenames, + tree_c_filenames, + tree_r_filenames, + sealed_filenames); + + for (auto fname : p_aux_filenames) { + std::filesystem::remove(fname); + } + for (auto fname : sealed_filenames) { + std::filesystem::remove(fname); + } + for (size_t i = 0; i < tree_c_filenames.size(); i++) { + for (auto fname : tree_c_filenames[i]) { + std::filesystem::remove(fname); + } + } + for (size_t i = 0; i < tree_r_filenames.size(); i++) { + for (auto fname : tree_r_filenames[i]) { + std::filesystem::remove(fname); + } + } +} + +#ifdef RUNTIME_SECTOR_SIZE +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +#endif +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); +template void pc2_hash(topology_t&, bool, streaming_node_reader_t&, size_t, size_t, size_t, const char**, const char*); + + +#ifdef RUNTIME_SECTOR_SIZE +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +#endif +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); +template void do_pc2_cleanup(const char* output_dir); + +#endif diff --git a/extern/supraseal/pc2/cuda/pc2.cuh b/extern/supraseal/pc2/cuda/pc2.cuh new file mode 100644 index 000000000..9456866c8 --- /dev/null +++ b/extern/supraseal/pc2/cuda/pc2.cuh @@ -0,0 +1,316 @@ +// Copyright Supranational LLC +#ifndef __PC2_CUH__ +#define __PC2_CUH__ + +#include "../../nvme/ring_t.hpp" +#include "../pc2_internal.hpp" +#include "file_writer_t.hpp" + +//#define DISABLE_FILE_READS +//#define DISABLE_FILE_WRITES + +// Class to compute the offset of serialized nodes in a tree. +template +class tree_address_t { + size_t node_count; + size_t arity; + size_t node_size; + std::vector layer_offsets; +public: + tree_address_t(size_t _node_count, size_t _arity, size_t _node_size, size_t layer_skips) + : node_count(_node_count), arity(_arity), node_size(_node_size) { + size_t layer = 0; + size_t offset = 0; + size_t arity = P::GetNumTreeRCArity(); + + for (size_t i = 0; i < layer_skips; i++) { + node_count /= arity; + } + while (node_count > 1) { + layer_offsets.push_back(offset); + layer++; + offset += node_count * node_size; + node_count /= arity; + } + layer_offsets.push_back(offset); + } + + size_t address(node_id_t

& node) { + size_t base = layer_offsets[node.layer()]; + return base + (size_t)node.node() * node_size; + } + + // Total tree size + size_t data_size() { + return layer_offsets.back() + node_size; + } + + void print() { + size_t layer = 0; + for (auto i : layer_offsets) { + printf("layer %2ld, offset 0x%08lx %ld\n", layer, i, i); + layer++; + } + } +}; + +enum class ResourceState { + IDLE, + DATA_READ, + DATA_WAIT, + HASH_COLUMN, + HASH_COLUMN_WRITE, + HASH_COLUMN_LEAVES, + HASH_LEAF, + HASH_WAIT, + DONE +}; + +typedef host_ptr_t host_buffer_t; + +template +struct gpu_resource_t { + size_t id; + + // GPU id + const gpu_t& gpu; + + // GPU stream + stream_t stream; + + // Storage for column input data + size_t batch_elements; + // Host side column (layer) data + fr_t* column_data; + // Device side column (layer) data + dev_ptr_t column_data_d; + // Host side column (layer) data + host_ptr_t replica_data; + // Starting node for the column data + size_t start_node; + // Valid count from page reader + std::atomic valid; + // Expected valid count for all pages + size_t valid_count; + + // Hashed node buffers + buffers_t buffers; + + // Aux buffer + dev_ptr_t aux_d; + + // Schedulers for tree-c and tree-r. They will follow identical paths + // but this is a clean way to track input/output buffers through the tree. + scheduler_t scheduler_c; + scheduler_t scheduler_r; + + // Current work item + work_item_t work_c; + work_item_t work_r; + // Flag set by Cuda when a hashing job is complete + std::atomic async_done; + + ResourceState state; + + // Last hash is in progress + bool last; + + gpu_resource_t(size_t _id, + const gpu_t& _gpu, + size_t _nodes_to_read, + size_t _batch_size) + : id(_id), + gpu(_gpu), + stream(gpu.id()), + // TODO: could allocate 1 layer when only doing tree_r + batch_elements(C::PARALLEL_SECTORS * C::GetNumLayers() * _batch_size), + column_data_d(batch_elements), + replica_data(C::PARALLEL_SECTORS * _batch_size), + buffers(_batch_size * C::PARALLEL_SECTORS), + // Size aux to hold the larger of the tree and column hash data + aux_d(max(// column aux size + _batch_size * C::PARALLEL_SECTORS * (C::GetNumLayers() + 1), + // tree aux size - expand to hold domain tag + _batch_size * C::PARALLEL_SECTORS / C::GetNumTreeRCArity() * + C::GetNumTreeRCArityDT())), + scheduler_c(_nodes_to_read / _batch_size, C::GetNumTreeRCArity(), buffers), + scheduler_r(_nodes_to_read / _batch_size, C::GetNumTreeRCArity(), buffers), + async_done(true), + state(ResourceState::IDLE), + last(false) + {} + void reset() { + state = ResourceState::IDLE; + last = false; + async_done = true; + scheduler_c.reset(); + scheduler_r.reset(); + } +}; + +template +struct buf_to_disk_t { + // Block of data from the device (pointed into by src) + fr_t* data; + // Destination address (mmapped file) + file_writer_t* dst[C::PARALLEL_SECTORS]; + size_t offset; + + // Source address + fr_t* src[C::PARALLEL_SECTORS]; + // Size of each write, in field elements + size_t size; + // Stride for subsequent field elements + size_t stride; + // Whether bytes should be reversed + bool reverse; +}; + +template +class pc2_t { +private: + topology_t& topology; + bool tree_r_only; + streaming_node_reader_t& reader; + size_t nodes_to_read; + size_t batch_size; + tree_address_t tree_c_address; + tree_address_t tree_r_address; + size_t stream_count; + size_t nodes_per_stream; + + union PoseidonCudaOption { + PoseidonCuda<3>* arity_2; + PoseidonCuda<12>* arity_11; + }; + + // Array of vectors of mapped files + std::vector*> tree_c_files[C::PARALLEL_SECTORS]; + std::vector*> tree_r_files[C::PARALLEL_SECTORS]; + // Files that store the data being sealed + mmap_t data_files[C::PARALLEL_SECTORS]; + // Files that store the sealed data + file_writer_t sealed_files[C::PARALLEL_SECTORS]; + + // Store the partition roots + std::vector tree_c_partition_roots; + std::vector tree_r_partition_roots; + + // Storage to transfer results from GPU to CPU for tree-c and tree-r + std::mutex gpu_results_in_use; + host_ptr_t gpu_results_c; + host_ptr_t gpu_results_r; + + // Final offset for GPU data in tree-c and tree-rfiles + size_t final_gpu_offset_c; + size_t final_gpu_offset_r; + + // Used to compute the actual node id for the various streams + std::vector layer_offsets_c; + std::vector layer_offsets_r; + + // GPU resources + std::vector poseidon_columns; + std::vector*> poseidon_trees; + std::vector*> resources; + + // Buffer to store pages loaded from drives + uint8_t* page_buffer; + + // Buffer pool for data coming back from GPU + // The number of buffers should be large enough to hide disk IO delays. + // + static const size_t num_host_bufs = 1<<13; + static const size_t disk_io_batch_size = 64; + // static const size_t num_host_bufs = 64; + // static const size_t disk_io_batch_size = 4; + static const size_t num_host_batches = num_host_bufs / disk_io_batch_size; + // Should be a minimum of gpu resources / disk_io_batch_size + static const size_t num_host_empty_batches = 8; +public: + typedef batch_t*, disk_io_batch_size> buf_to_disk_batch_t; +private: + + // Memory space for the host side buffers + host_ptr_t host_buf_storage; + // Store the host buffer batch objects + // Each batch contains disk_io_batch_size buffers, each of which contains + // batch_size * C::PARALLEL_SECTORS field elements. + std::vector> host_bufs; + std::vector host_batches; + // Queue to write to disk + mtx_fifo_t host_buf_to_disk; + // Pool of available full batches + mtx_fifo_t host_buf_pool_full; + // Pool of available empty batches + mtx_fifo_t host_buf_pool_empty; + + // p_aux filenames + std::vector p_aux_filenames; + + // When performing data encoding, the source data files. `data_filenames` + // or any individual pointer may be null, in which case CC is assumed. + const char** data_filenames; + // Record the presence of CC/non-CC to simplify coding logic + bool has_cc_sectors; + bool has_non_cc_sectors; + + // The output directory for files we will write + const char* output_dir; + +public: + static void get_filenames(const char* output_dir, + std::vector& directories, + std::vector& p_aux_filenames, + std::vector>& tree_c_filenames, + std::vector>& tree_r_filenames, + std::vector& sealed_filenames); +private: + void open_files(); + + void hash_gpu(size_t partition); + void hash_cpu(fr_t* roots, size_t partition, fr_t* input, + std::vector*>* tree_files, + size_t file_offset); + void write_roots(fr_t* roots_c, fr_t* roots_r); + void process_writes(int core, size_t max_write_size, + mtx_fifo_t& to_disk, + mtx_fifo_t& pool, + std::atomic& terminate, + std::atomic& disk_writer_done); + + static void parse_custom_paths(const char* custom_paths, + std::vector& directories, + std::vector& p_aux_filenames, + std::vector>& tree_c_filenames, + std::vector>& tree_r_filenames, + std::vector& sealed_filenames); + + static void generate_default_paths(const char* output_dir, + const std::string& pc2_replica_output_dir, + std::vector& directories, + std::vector& p_aux_filenames, + std::vector>& tree_c_filenames, + std::vector>& tree_r_filenames, + std::vector& sealed_filenames); + + static void add_paths_for_sector(const char* output_dir, + size_t sector, + const std::string& pc2_replica_output_dir, + std::vector& directories, + std::vector& p_aux_filenames, + std::vector>& tree_c_filenames, + std::vector>& tree_r_filenames, + std::vector& sealed_filenames); + +public: + pc2_t(topology_t& _topology, + bool _tree_r_only, streaming_node_reader_t& _reader, + size_t _nodes_to_read, size_t _batch_size, size_t _stream_count, + const char** data_filenames, const char* output_dir); + ~pc2_t(); + + void hash(); +}; + +#endif diff --git a/extern/supraseal/pc2/pc2.hpp b/extern/supraseal/pc2/pc2.hpp new file mode 100644 index 000000000..b8435b592 --- /dev/null +++ b/extern/supraseal/pc2/pc2.hpp @@ -0,0 +1,124 @@ +// Copyright Supranational LLC +#ifndef __PC2_HPP__ +#define __PC2_HPP__ + +#include +#include "pc2_internal.hpp" +#include "../util/util.hpp" +#include "../pc1/tree_c.hpp" +#include "../pc1/tree_r.hpp" + +template +void do_pc2_cpu(topology_t& topology, + nvme_controllers_t& controllers, + streaming_node_reader_t& node_reader, size_t block_offset, + const char** data_filenames, const char* output_dir) { + + node_reader.alloc_slots(1, C::GetNumNodes() * C::GetNumLayers(), true); + std::atomic valid; + size_t valid_count; + fr_t* data = (fr_t*)node_reader.load_layers(0, 0, 0, + C::GetNumNodes(), C::GetNumLayers(), &valid, &valid_count); + + thread_pool_t pool; + + while (valid.load() != valid_count) { + usleep(100); + } + + for (size_t i = 0; i < C::PARALLEL_SECTORS; i++) { + std::vector tree_c_input(C::GetNumNodes() * C::GetNumLayers()); + std::vector tree_r_input(C::GetNumNodes()); + node_t tree_c_root, tree_r_root; + + for (size_t j = 0; j < C::GetNumLayers(); j++) { + for (size_t k = 0; k < C::GetNumNodes(); k++) { + fr_t val = data[j * C::GetNumNodes() * C::PARALLEL_SECTORS + + k * C::PARALLEL_SECTORS + i]; + tree_c_input[k * C::GetNumLayers() + j] = val; + if (j == C::GetNumLayers() - 1) { + tree_r_input[k] = val; + } + } + } + + std::string pc2_replica_output_dir = output_dir; + pc2_replica_output_dir += "/replicas"; + if (!std::filesystem::exists(pc2_replica_output_dir.c_str())) { + pc2_replica_output_dir = output_dir; + } + + const size_t MAX = 256; + char fname[MAX]; + if (C::PARALLEL_SECTORS > 1) { + snprintf(fname, MAX, "%s/%03ld", pc2_replica_output_dir.c_str(), i); + } else { + snprintf(fname, MAX, "%s", pc2_replica_output_dir.c_str()); + } + + std::string sub_output_dir(fname); + if (!std::filesystem::exists(sub_output_dir)) { + std::filesystem::create_directory(sub_output_dir); + } + + std::string p_aux_path = sub_output_dir + std::string("/p_aux"); + mmap_t p_aux_file; + p_aux_file.mmap_write(p_aux_path.c_str(), 2 * sizeof(node_t), true); + TreeC tree_c; + tree_c_root = tree_c.BuildTreeC((node_t*)&tree_c_input[0], + sub_output_dir, pool); + TreeR tree_r; + tree_r_root = tree_r.BuildTreeR((node_t*)&tree_r_input[0], + sub_output_dir, pool); + + p_aux_file[0] = tree_c_root; + p_aux_file[1] = tree_r_root; + } +} + +template +int do_pc2(topology_t& topology, + nvme_controllers_t& controllers, size_t block_offset, + const char** data_filenames, const char* output_dir) { + set_core_affinity(topology.pc2_hasher); + + streaming_node_reader_t node_reader(&controllers, topology.pc2_qpair, + block_offset, topology.pc2_reader, + (size_t)topology.pc2_sleep_time); + + // Do PC2 on the CPU if the sector size is <= 32KiB + if (C::GetSectorSizeLg() <= 15) { + do_pc2_cpu(topology, controllers, node_reader, block_offset, + data_filenames, output_dir); + + return 0; + } else { + // Total number of streams across all GPUs + // Use less streams if sector size is <= 16MiB + size_t stream_count = C::GetSectorSizeLg() < 24 ? 8 : 64; + + // Batch size in nodes. Each node includes all parallel sectors + // Reduce batch size if sector size is <= 16MiB + // TODO: try larger batch size for 32GB + size_t batch_size = C::GetSectorSizeLg() < 24 ? 64 * 8 : 64; + assert (batch_size % C::GetNumTreeRCArity() == 0); + + // Nodes to read per partition + size_t nodes_to_read = C::GetNumNodes() / C::GetNumTreeRCFiles(); + + assert (batch_size % C::NODES_PER_PAGE == 0); + + // Allocate storage for 2x the streams to support tree-c and tree-r + // PC2 assumes that nodes for subsequent layers are contiguous, so each + // layer's nodes should fill some number of pages. + node_reader.alloc_slots(stream_count * 2, C::GetNumLayers() * batch_size, true); + + bool tree_r_only = false; + pc2_hash(topology, tree_r_only, node_reader, + nodes_to_read, batch_size, stream_count, + data_filenames, output_dir); + return 0; + } +} + +#endif diff --git a/extern/supraseal/pc2/pc2_internal.hpp b/extern/supraseal/pc2/pc2_internal.hpp new file mode 100644 index 000000000..bb5154057 --- /dev/null +++ b/extern/supraseal/pc2/pc2_internal.hpp @@ -0,0 +1,25 @@ +// Copyright Supranational LLC +#ifndef __PC2_INTERNAL_HPP__ +#define __PC2_INTERNAL_HPP__ + +#include "../sealing/constants.hpp" +#include "../sealing/data_structures.hpp" +#include "../sealing/topology_t.hpp" +#ifdef STREAMING_NODE_READER_FILES +#include "../c1/streaming_node_reader_files.hpp" +#else +#include "../nvme/streaming_node_reader_nvme.hpp" +#endif + +template +void pc2_hash(topology_t& topology, + bool tree_r_only, + streaming_node_reader_t& _reader, + size_t _nodes_to_read, size_t _batch_size, + size_t _stream_count, + const char** data_filenames, const char* output_dir); + +template +void do_pc2_cleanup(const char* output_dir); + +#endif diff --git a/extern/supraseal/pc2/planner.cpp b/extern/supraseal/pc2/planner.cpp new file mode 100644 index 000000000..cb0c531bc --- /dev/null +++ b/extern/supraseal/pc2/planner.cpp @@ -0,0 +1,215 @@ +// Copyright Supranational LLC + +#include +#include +#include +#include +#include +#include "pc2_internal.hpp" + +//using namespace std; + +typedef dev_ptr_t gpu_buffer_t; + +template +struct work_item_t { + // Index of the arity elements to be hashed + node_id_t

idx; + size_t dependencies_ready; + bool is_leaf; + buffer_t* buf; + buffer_t* inputs[P::GetNumTreeRCArity()]; + + work_item_t() { + idx = (uint64_t)-1; + dependencies_ready = 0; + is_leaf = false; + buf = nullptr; + for (size_t i = 0; i < P::GetNumTreeRCArity(); i++) { + inputs[i] = nullptr; + } + } + + work_item_t(node_id_t

_idx, bool _is_leaf) { + idx = _idx; + dependencies_ready = 0; + is_leaf = _is_leaf; + buf = nullptr; + for (size_t i = 0; i < P::GetNumTreeRCArity(); i++) { + inputs[i] = nullptr; + } + } + + size_t leaf_num() { + return idx.node() & (P::GetNumTreeRCArity() - 1); + } + + void print() { + printf("layer %2d, node %08x, deps ready %ld, buf %p", + idx.layer(), idx.node(), dependencies_ready, buf); + } +}; + +template +class buffers_t { + + std::vector buffers; + size_t num_elements; + size_t num_buffers; + +public: + buffers_t(size_t _num_elements) { + num_buffers = 0; + num_elements = _num_elements; + } + + ~buffers_t() { + while (buffers.size() != 0) { + delete buffers.back(); + buffers.pop_back(); + } + } + + buffer_t* get() { + buffer_t* buf; + if (buffers.size() == 0) { + buf = new buffer_t(num_elements); + num_buffers++; + } else { + buf = buffers.back(); + buffers.pop_back(); + } + return buf; + } + + void put(buffer_t* buf) { + buffers.push_back(buf); + } + + size_t size() { + return buffers.size(); + } +}; + +void indent(size_t levels) { + for (size_t i = 0; i < levels; i++) { + printf(" "); + } +} + +// This doesn't need to consider striding of sectors within a page except +// for the storage used. +template +class scheduler_t { +public: + typedef std::vector> work_stack_t; + typedef std::map> work_map_t; + typedef std::function&)> hash_cb_t; + +protected: + size_t initial_nodes; + work_stack_t stack; + work_map_t wip; + buffers_t& bufs; + size_t hash_count; + size_t arity; + +public: + scheduler_t(size_t _initial_nodes, size_t _arity, + buffers_t& _bufs) + : initial_nodes(_initial_nodes), bufs(_bufs) + { + arity = _arity; + + reset(); + } + + ~scheduler_t() { + } + + void reset() { + hash_count = 0; + + wip.clear(); + stack.clear(); + + // Insert all of the initial work for the bottom layer of the tree + for (size_t i = 0; i < initial_nodes; i++) { + // Start with layer one since the hash represents the resulting layer, + // not the input layer + stack.emplace_back(node_id_t

(1, initial_nodes - i - 1), true); + } + } + + // Returns true if there is more work to be done + bool next(hash_cb_t hash_cb, work_item_t* work) { + if (stack.size() == 0) { + return false; + } + + // Pop the element + work_item_t w = stack.back(); + stack.pop_back(); + + // We need a buffer whether it's a leaf or an internal node + w.buf = bufs.get(); + if (work) { + *work = w; + } else { + // Perform a hash + hash_cb(w); + } + + hash_count++; + + // Return the input buffers to the pool + if (!w.is_leaf) { + for (size_t i = 0; i < P::GetNumTreeRCArity(); i++) { + bufs.put(w.inputs[i]); + } + } + + // Compute the location of the parent node + // Map them to the output node in the next layer + node_id_t

next_layer_id(w.idx.layer() + 1, w.idx.node() / arity); + + // Record the result + if (wip.find(next_layer_id) == wip.end()) { + wip.emplace(next_layer_id, work_item_t(next_layer_id, false)); + } + work_item_t& dependant_work = wip.at(next_layer_id); + dependant_work.inputs[w.leaf_num()] = w.buf; + + dependant_work.dependencies_ready++; + if (dependant_work.dependencies_ready == arity) { + // Move from wip into the queue + stack.push_back(dependant_work); + wip.erase(next_layer_id); + } + + return stack.size() != 0; + } + + bool is_done() { + if (wip.size() != 1) { + printf("ERROR planner.cpp: expected wip.size() to be 1\n"); + assert(false); + } + work_item_t& work = wip.begin()->second; + if (work.inputs[0] == nullptr) { + printf("ERROR planner.cpp: expected work.inputs[0] != nullptr\n"); + assert(false); + } + if (work.inputs[1] != nullptr) { + printf("ERROR planner.cpp: expected work.inputs[1] == nullptr\n"); + assert(false); + } + return true; + } + + buffer_t* run(hash_cb_t hash_cb) { + while(next(hash_cb, nullptr)) {} + work_item_t& work = wip.begin()->second; + return work.inputs[0]; + } +}; diff --git a/extern/supraseal/poseidon/constants/constants_11 b/extern/supraseal/poseidon/constants/constants_11 new file mode 100644 index 000000000..40cd016c1 Binary files /dev/null and b/extern/supraseal/poseidon/constants/constants_11 differ diff --git a/extern/supraseal/poseidon/constants/constants_16 b/extern/supraseal/poseidon/constants/constants_16 new file mode 100644 index 000000000..902258a1d Binary files /dev/null and b/extern/supraseal/poseidon/constants/constants_16 differ diff --git a/extern/supraseal/poseidon/constants/constants_2 b/extern/supraseal/poseidon/constants/constants_2 new file mode 100644 index 000000000..93349c571 Binary files /dev/null and b/extern/supraseal/poseidon/constants/constants_2 differ diff --git a/extern/supraseal/poseidon/constants/constants_24 b/extern/supraseal/poseidon/constants/constants_24 new file mode 100644 index 000000000..56727726d Binary files /dev/null and b/extern/supraseal/poseidon/constants/constants_24 differ diff --git a/extern/supraseal/poseidon/constants/constants_36 b/extern/supraseal/poseidon/constants/constants_36 new file mode 100644 index 000000000..9470850e7 Binary files /dev/null and b/extern/supraseal/poseidon/constants/constants_36 differ diff --git a/extern/supraseal/poseidon/constants/constants_4 b/extern/supraseal/poseidon/constants/constants_4 new file mode 100644 index 000000000..c2378654d Binary files /dev/null and b/extern/supraseal/poseidon/constants/constants_4 differ diff --git a/extern/supraseal/poseidon/constants/constants_8 b/extern/supraseal/poseidon/constants/constants_8 new file mode 100644 index 000000000..8160f78bd Binary files /dev/null and b/extern/supraseal/poseidon/constants/constants_8 differ diff --git a/extern/supraseal/poseidon/cuda/poseidon.cu b/extern/supraseal/poseidon/cuda/poseidon.cu new file mode 100644 index 000000000..4913ba92f --- /dev/null +++ b/extern/supraseal/poseidon/cuda/poseidon.cu @@ -0,0 +1,179 @@ +// Copyright Supranational LLC + +#include +#include + +struct kernel_params_t { + int t; + int partial_rounds; + int half_full_rounds; + fr_t* round_constants; + fr_t* mds_matrix; + fr_t* pre_sparse_matrix; + fr_t* sparse_matrices; +}; + +#include "../poseidon.hpp" +#ifndef __CUDA_ARCH__ +#include "../poseidon.cpp" +#endif + +// CUDA doesn't seem to like templatized kernel arguments so encapsulate +// it in a struct. +template +struct in_ptrs_d { + fr_t* ptrs[ARITY]; +}; + +#include "poseidon_kernels.cu" + +template +struct PoseidonInternal { + static const size_t DOMAIN_TAG = 1; + static const size_t ARITY = ARITY_DT - DOMAIN_TAG; + + static void hash_batch_ptrs(kernel_params_t& params, fr_t& domain_tag, + //fr_t* out_d, fr_t* in_d[ARITY], fr_t* aux_d, + fr_t* out_d, in_ptrs_d in_d, fr_t* aux_d, + size_t num_hashes, size_t stride, + const cudaStream_t& stream, + const bool first_tree_c, const bool first_tree_r, + const bool to_mont, const bool from_mont, const bool bswap, + const bool multi_in_ptr = true) { + // block size for kernels 1 and 3 where we launch one thread per element + const int block_size_13 = (256 / ARITY_DT) * ARITY_DT; + const int hashes_per_block_13 = block_size_13 / ARITY_DT; + // Block size for kernels 2 and 4 where we launch one thread per ARITY_DT elements + const int block_size_24 = 128; + + int thread_count_13 = num_hashes * ARITY_DT; + int block_count_13 = (thread_count_13 + block_size_13 - 1) / block_size_13; + int block_count_24 = (num_hashes + block_size_24 - 1) / block_size_24; + + // printf("threads_13 %d, threads 24 %d\n", + // block_size_13 * block_count_13, + // block_size_24 * block_count_24); + + assert (aux_d != in_d.ptrs[0]); + poseidon_hash_1_0<<>> + (in_d, aux_d, + domain_tag, + params, + num_hashes, stride, + to_mont, bswap, + first_tree_c, first_tree_r, multi_in_ptr); + + poseidon_hash_2<<>> + (aux_d, + params, + ARITY_DT * (params.half_full_rounds + 1), + params.half_full_rounds, + num_hashes); + + poseidon_hash_3<<>> + (aux_d, + params, + ARITY_DT * (params.half_full_rounds + 1) + params.partial_rounds, + params.half_full_rounds + params.partial_rounds, + thread_count_13); + + poseidon_hash_4<<>> + (aux_d, out_d, + params.mds_matrix, + num_hashes, from_mont); + } +}; + +template struct PoseidonInternal<12>; +template struct PoseidonInternal<9>; +template struct PoseidonInternal<3>; + +#ifndef __CUDA_ARCH__ +template +class PoseidonCuda : public Poseidon { + static const size_t DOMAIN_TAG = 1; + static const size_t ARITY = ARITY_DT - DOMAIN_TAG; + + gpu_ptr_t constants_d; + kernel_params_t kernel_params; + const gpu_t& gpu; + +public: + PoseidonCuda(const gpu_t& _gpu) : Poseidon(ARITY), gpu(_gpu) { + select_gpu(gpu); + constants_d = gpu_ptr_t{(fr_t*)gpu.Dmalloc(constants_size_)}; + fr_t* constants_ptr = &constants_d[0]; + gpu.HtoD(constants_ptr, constants_file_, constants_size_ / sizeof(fr_t)); + gpu.sync(); + + AssignPointers(constants_ptr, + &kernel_params.round_constants, &kernel_params.mds_matrix, + &kernel_params.pre_sparse_matrix, &kernel_params.sparse_matrices); + kernel_params.t = t_; + kernel_params.partial_rounds = partial_rounds_; + kernel_params.half_full_rounds = half_full_rounds_; + } + + void hash_batch(fr_t* out, fr_t* in, + size_t count, size_t stride, + const bool first_tree_c, const bool first_tree_r, + const bool to_mont, const bool from_mont, const bool bswap) { + select_gpu(gpu); + stream_t& stream = gpu[0]; + + size_t batch_count = ((count + stride - 1) / stride); + size_t elements_per_arity = batch_count * stride; + size_t elements_to_xfer = ARITY_DT * elements_per_arity; + + dev_ptr_t in_d(ARITY * elements_per_arity); + dev_ptr_t out_d(count); + dev_ptr_t aux_d(ARITY_DT * elements_per_arity); + + // printf("elements_htod %ld element[0] %08x element[128] %08x\n", + // ARITY * elements_per_arity, ((uint32_t*)&in[0])[0], ((uint32_t*)&in[128])[0]); + stream.HtoD(&in_d[0], in, ARITY * elements_per_arity); + hash_batch_device(&out_d[0], &in_d[0], &aux_d[0], + count, stride, + stream, first_tree_c, first_tree_r, + to_mont, from_mont, bswap); + + stream.DtoH(out, &out_d[0], count); + stream.sync(); + } + + void hash_batch_device(fr_t* out_d, fr_t* in_d, fr_t* aux_d, + size_t count, size_t stride, + stream_t& stream, const bool first_tree_c, const bool first_tree_r, + const bool to_mont, const bool from_mont, const bool bswap) { + select_gpu(gpu); + in_ptrs_d in_ptrs_d; + memset(&in_ptrs_d, 0, sizeof(in_ptrs_d)); + in_ptrs_d.ptrs[0] = in_d; + hash_batch_device_ptrs(out_d, in_ptrs_d, aux_d, + count, stride, + stream, first_tree_c, first_tree_r, + to_mont, from_mont, bswap, false); + } + + // count - number of hash results to produce + // The following are only used when first == true: + // stride - number of elements between subsequent inputs to a hash + void hash_batch_device_ptrs(fr_t* out_d, in_ptrs_d in_d, fr_t* aux_d, + size_t count, size_t stride, + stream_t& stream, const bool first_tree_c, const bool first_tree_r, + const bool to_mont, const bool from_mont, + const bool bswap, const bool multi_in_ptrs = true) { + select_gpu(gpu); + assert(count % stride == 0); + PoseidonInternal::hash_batch_ptrs(kernel_params, domain_tag_, + &out_d[0], in_d, &aux_d[0], + count, stride, + stream, first_tree_c, first_tree_r, + to_mont, from_mont, bswap, + multi_in_ptrs); + } +}; + +#endif diff --git a/extern/supraseal/poseidon/cuda/poseidon_kernels.cu b/extern/supraseal/poseidon/cuda/poseidon_kernels.cu new file mode 100644 index 000000000..8695ba9b3 --- /dev/null +++ b/extern/supraseal/poseidon/cuda/poseidon_kernels.cu @@ -0,0 +1,377 @@ +// Copyright Supranational LLC + +#include + + +#ifdef __CUDA_ARCH__ + +extern __shared__ fr_t scratchpad[]; + +__device__ __forceinline__ +fr_t pow_5(const fr_t& element) { + fr_t tmp = sqr(element); + tmp = sqr(tmp); + return element * tmp; +} + +__device__ __forceinline__ +void quintic_s_box(fr_t& element, const fr_t& round_constant) { + + element = pow_5(element); + element += round_constant; +} + +__device__ __forceinline__ +void partial_quintic_s_box(fr_t& element) { + + element = pow_5(element); +} + +__device__ __forceinline__ +void add_full_round_constants(fr_t& element, const fr_t& round_constant) { + + element += round_constant; +} + +__device__ __forceinline__ +void matrix_mul(fr_t& element, const fr_t* matrix, const int t, + const int thread_pos, const int shared_pos) { + + scratchpad[threadIdx.x] = element; + __syncthreads(); + + element = fr_t::dot_product(&scratchpad[shared_pos], &matrix[thread_pos], t, t); + __syncthreads(); +} + +__device__ __forceinline__ +fr_t last_matrix_mul(const fr_t* elements, const fr_t* matrix, const int t) { + + return fr_t::dot_product(elements, &matrix[1], t, t); +} + +__device__ __forceinline__ +void scalar_product(fr_t* elements, const fr_t* sparse_matrix, + const int t) { + + elements[0] *= sparse_matrix[0]; + elements[0] += fr_t::dot_product(&elements[1], &sparse_matrix[1], t-1); +} + +__device__ __forceinline__ +void sparse_matrix_mul(fr_t* elements, const fr_t* sparse_matrix, + const int t) { + + fr_t element0 = elements[0]; + + scalar_product(elements, sparse_matrix, t); + + #pragma unroll + for (int i = 1; i < t; i++) { + elements[i] += element0 * sparse_matrix[t + i - 1]; + } +} + +__device__ __forceinline__ +void round_matrix_mul(fr_t& element, const kernel_params_t constants, + const int current_round, const int thread_pos, + const int shared_pos) { + + if (current_round == constants.half_full_rounds - 1) { + matrix_mul(element, constants.pre_sparse_matrix, constants.t, + thread_pos, shared_pos); + } + else { + matrix_mul(element, constants.mds_matrix, constants.t, thread_pos, + shared_pos); + } +} + +__device__ __forceinline__ +void full_round(fr_t& element, const kernel_params_t constants, + int& rk_offset, int& current_round, const int thread_pos, + const int shared_pos) { + + + quintic_s_box(element, constants.round_constants[rk_offset]); + rk_offset += constants.t; + + round_matrix_mul(element, constants, current_round, thread_pos, shared_pos); + current_round++; +} + +__device__ __forceinline__ +void partial_round(fr_t* elements, const int t, + const kernel_params_t constants, + int& rk_offset, int& current_round) { + + quintic_s_box(elements[0], constants.round_constants[rk_offset]); + rk_offset += 1; + + sparse_matrix_mul(elements, constants.sparse_matrices + + (t * 2 - 1) * + (current_round - constants.half_full_rounds), t); + current_round++; +} + +__device__ __forceinline__ +uint32_t bswap(uint32_t a) +{ + uint32_t ret; + asm("prmt.b32 %0, %1, %1, 0x0123;" : "=r"(ret) : "r"(a)); + return ret; +} + +__device__ __forceinline__ +void bswap(fr_t& a) +{ + for (int i = 0; i < a.len(); i++) { + a[i] = bswap(a[i]); + } +} + +#endif + +// Perform first 4 full rounds +// in_ptrs - input data +// aux_ptr - aux buffer to store results +// constants - constants related to configuration & application +// mont - if true convert field elements to montgomery form +// first - if true this is the first operation on data +// multi_in_ptr - if true multiple input pointers are used +// Launch parameters +// One thread per element (including domain tag) +// in_ptr layout +// Contains input field elements with one empty element for the +// domain tag before each 'arity' set of inputs. +// dt0 fr0 fr1 fr2 fr3 fr4 fr5 fr6 fr7 dt1 etc +// If multi_in_ptr == false then all field elements are in a contiguous buffer. +// If multi_in_ptr == true then ARITY pointers are provided, one per branch with data +// layed out as: +// in_ptr[0] = s0n0 s1n0 s2n0 s3n0 ... +// in_ptr[1] = s0n1 s1n1 s2n1 s3n1 ... +// in_ptr[2] = s0n2 s1n2 s2n2 s3n2 ... +// ... +// in_ptr[7] = s0n7 s1n7 s2n7 s3n7 ... +// +// aux_ptr +// Will contain the hashed outputs in the same layout as in_ptr and should contain +// space for num_hashes * ARITY_DT elements. +// +// num_hashes - number of inputs to hash +// stride - number of elements between subsequent inputs to a hash +template __global__ +void poseidon_hash_1_0(in_ptrs_d in_ptrs, fr_t* aux_ptr, const fr_t domain_tag, + const kernel_params_t constants, + const int num_hashes, const int stride, + const bool to_mont, const bool do_bswap, + const bool first_tree_c, const bool first_tree_r, + const bool multi_in_ptr) { +#ifdef __CUDA_ARCH__ + const int ARITY = ARITY_DT - 1; + int current_round = 0; + int rk_offset = 0; + + int tid = threadIdx.x + blockIdx.x * blockDim.x; + + if (tid >= num_hashes * ARITY_DT) { + return; + } + + // Position in shared memory + int shared_pos = (threadIdx.x / ARITY_DT) * ARITY_DT; + // Index into set of t elements + //int idx = blockIdx.x * (blockDim.x / ARITY_DT) + threadIdx.x / ARITY_DT; + //int idx = tid / ARITY_DT; + + // For PC2, traversal is + // thr0 - dt0 + // thr1 - s0n0l0 + // thr2 - s0n0l1 + // thr3 - s0n0l2 + // ... + // thr11 - s0n0l10 + // thr12 - s0n1l0 + // thr13 - s0n1l2 + + int num_batches = (num_hashes + stride - 1) / stride; + int hash_num = tid / ARITY_DT; + // Position within set of t elements + int hash_input = tid % ARITY_DT; + + int node = hash_num % num_batches; + int sector = hash_num / num_batches; + fr_t element; + + if (hash_input == 0) { + element = domain_tag; + } + else { + if (multi_in_ptr) { + // This is a bit complicated due to the pattern of the data. When num_hashes + // is equal to batch size then the pattern is: + // in_ptr[0] = s0n0 s1n0 s2n0 s3n0 ... + // in_ptr[1] = s0n1 s1n1 s2n1 s3n1 ... + // in_ptr[2] = s0n2 s1n2 s2n2 s3n2 ... + // in_ptr[7] = s0n7 s1n7 s2n7 s3n7 ... + // When num_hashes is a multiple of stride, say 2x: + // in_ptr[0] = s0n0 s0n1 + // in_ptr[1] = s0n2 s0n3 + // in_ptr[2] = s0n4 s0n5 + // in_ptr[3] = s0n6 s0n7 + // in_ptr[4] = s1n0 s1n1 + // in_ptr[7] = s1n6 s1n7 + // ie, two consecutive elements at a time. This pattern is repeated many times + // based on batch size. + + int elements_per_hash_per_ptr = num_hashes / stride; + int element_idx = hash_num * (ARITY_DT - 1) + (hash_input - 1); + int element_batch_idx = element_idx / elements_per_hash_per_ptr; + int element_batch_off = element_idx % elements_per_hash_per_ptr; + int ptr_num = element_batch_idx % ARITY; + int ptr_idx = element_idx / (ARITY * elements_per_hash_per_ptr) * elements_per_hash_per_ptr + element_batch_off; + element = in_ptrs.ptrs[ptr_num][ptr_idx]; + } else { + fr_t* in_ptr = in_ptrs.ptrs[0]; + if (first_tree_r || first_tree_c) { + int first_element; + int element_index; + if (first_tree_c) { + first_element = node * stride + sector; + element_index = first_element + (hash_input - 1) * stride * num_batches; + } else { + first_element = node * ARITY * stride + sector; + element_index = first_element + (hash_input - 1) * stride; + } + element = in_ptr[element_index]; + if (do_bswap) { + bswap(element); + } + } else { + // Access element from a packed array (no domain tag) + element = in_ptr[hash_num * (ARITY_DT - 1) + (hash_input - 1)]; + } + } + if (to_mont) { + element.to(); + } + } + + rk_offset += hash_input; + + add_full_round_constants(element, constants.round_constants[rk_offset]); + rk_offset += ARITY_DT; + + for (int i = 0; i < constants.half_full_rounds; i++) { + full_round(element, constants, rk_offset, current_round, hash_input, + shared_pos); + } + + // When first is true this unstrides the sectors from pc1, leading to + // s0n0 s0n1 ... s1n0 s1n1 ... s2n0 s2n1 ... + + __syncthreads(); + aux_ptr[hash_num * ARITY_DT + hash_input] = element; +#endif +} + +// Perform partial rounds +// Data is in aux_ptr from poseidon_hash_1 +// rk_offset - 5 * t +// current_round - 4 +// Launch params +// One thread per t elements +template __global__ +void poseidon_hash_2(fr_t* aux_ptr, const kernel_params_t constants, + int rk_offset, int current_round, const int batch_size) { + +#ifdef __CUDA_ARCH__ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx >= batch_size) { + return; + } + + aux_ptr += idx * ARITY_DT; + + fr_t elements[ARITY_DT]; + + for (int i = 0; i < ARITY_DT; i++) { + elements[i] = aux_ptr[i]; + } + + for (int i = 0; i < constants.partial_rounds; i++) { + partial_round(elements, ARITY_DT, constants, rk_offset, current_round); + } + + for (int i = 0; i < ARITY_DT; i++) { + aux_ptr[i] = elements[i]; + } +#endif +} + +// Perform 3 of the final 4 full rounds +// rk_offset - 5 * t + number of partial rounds for this config from partial_rounds_map +// current_round - 4 + number of partial rounds for this config from partial_rounds_map +// Launch parameters +// One thread per element (including domain tag) +template __global__ +void poseidon_hash_3(fr_t* aux_ptr, const kernel_params_t constants, + int rk_offset, int current_round, const int batch_size) { +#ifdef __CUDA_ARCH__ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx >= batch_size) { + return; + } + + int thread_pos = threadIdx.x % ARITY_DT; + int shared_pos = (threadIdx.x / ARITY_DT) * ARITY_DT; + idx = blockIdx.x * (blockDim.x / ARITY_DT) + threadIdx.x / ARITY_DT; + + rk_offset += thread_pos; + + fr_t element = aux_ptr[idx * ARITY_DT + thread_pos]; + + for (int i = 0; i < constants.half_full_rounds - 1; i++) { + full_round(element, constants, rk_offset, current_round, thread_pos, + shared_pos); + } + + partial_quintic_s_box(element); + + aux_ptr[idx * ARITY_DT + thread_pos] = element; +#endif +} + +// Perform last of the final 4 full rounds +// Data is in aux_ptr from poseidon_hash_1 +// Output is written to out_ptr +// Launch params +// One thread per t elements +template __global__ +void poseidon_hash_4(const fr_t* aux_ptr, fr_t* out_ptr, const fr_t* mds_matrix, + const int batch_size, const bool from_mont) { +#ifdef __CUDA_ARCH__ + int idx = threadIdx.x + blockIdx.x * blockDim.x; + + if (idx >= batch_size) { + return; + } + + aux_ptr += idx * ARITY_DT; + // fr_t elements[t]; + + // for (int i = 0; i < t; i++) { + // elements[i] = aux_ptr[i]; + // } + + // This writes state[1] into out_ptr + // No dt slots + fr_t out = last_matrix_mul(aux_ptr, mds_matrix, ARITY_DT); + if (from_mont) { + out.from(); + } + out_ptr[idx] = out; +#endif +} diff --git a/extern/supraseal/poseidon/poseidon.cpp b/extern/supraseal/poseidon/poseidon.cpp new file mode 100644 index 000000000..21220c4b2 --- /dev/null +++ b/extern/supraseal/poseidon/poseidon.cpp @@ -0,0 +1,205 @@ +// Copyright Supranational LLC + +#ifndef __POSEIDON_CPP__ +#define __POSEIDON_CPP__ + +#include +#include +#include +#include "poseidon.hpp" + +#include "../obj/constants_2.h" +#include "../obj/constants_4.h" +#include "../obj/constants_8.h" +#include "../obj/constants_11.h" +#include "../obj/constants_16.h" +#include "../obj/constants_24.h" +#include "../obj/constants_36.h" + +Poseidon::Poseidon(const int arity) : + arity_(arity), + half_full_rounds_(4), + t_(arity + 1), + domain_tag_((1 << arity) - 1) { + + const std::map partial_rounds_map = { + {2, 55}, + {4, 56}, + {8, 57}, + {11, 57}, + {16, 59}, + {24, 59}, + {36, 60} + }; + std::map::const_iterator map_res = partial_rounds_map.find(arity); + partial_rounds_ = map_res->second; + + switch (arity) { + case 2: + constants_file_ = (fr_t*)poseidon_constants_constants_2; + constants_size_ = poseidon_constants_constants_2_len; + break; + case 4: + constants_file_ = (fr_t*)poseidon_constants_constants_4; + constants_size_ = poseidon_constants_constants_4_len; + break; + case 8: + constants_file_ = (fr_t*)poseidon_constants_constants_8; + constants_size_ = poseidon_constants_constants_8_len; + break; + case 11: + constants_file_ = (fr_t*)poseidon_constants_constants_11; + constants_size_ = poseidon_constants_constants_11_len; + break; + case 16: + constants_file_ = (fr_t*)poseidon_constants_constants_16; + constants_size_ = poseidon_constants_constants_16_len; + break; + case 24: + constants_file_ = (fr_t*)poseidon_constants_constants_24; + constants_size_ = poseidon_constants_constants_24_len; + break; + case 36: + constants_file_ = (fr_t*)poseidon_constants_constants_36; + constants_size_ = poseidon_constants_constants_36_len; + break; + default: + printf("Unsupported poseidon arity %d\n", arity); + exit(1); + } + + // Assign constants pointers to location in buffer + // round_constants_ = constants_file_; + // mds_matrix_ = round_constants_ + + // (t_ * half_full_rounds_ * 2) + + // partial_rounds_; + // pre_sparse_matrix_ = mds_matrix_ + (t_ * t_); + // sparse_matrices_ = pre_sparse_matrix_ + (t_ * t_); + AssignPointers(constants_file_, + &round_constants_, &mds_matrix_, + &pre_sparse_matrix_, &sparse_matrices_); +} + +Poseidon::~Poseidon() { +} + +void Poseidon::AssignPointers(fr_t* constants_file, + fr_t** round_constants, fr_t** mds_matrix, + fr_t** pre_sparse_matrix, fr_t** sparse_matrices) { + *round_constants = constants_file; + *mds_matrix = *round_constants + + (t_ * half_full_rounds_ * 2) + + partial_rounds_; + *pre_sparse_matrix = *mds_matrix + (t_ * t_); + *sparse_matrices = *pre_sparse_matrix + (t_ * t_); +} + +void Poseidon::Hash(uint8_t* out, const uint8_t* in) { + fr_t elements[t_]; + + elements[0] = domain_tag_; + + for (int i = 0; i < t_ - 1; ++i) { + elements[i + 1].to(in + (i * 32), 32, true); + } + + for (int i = 0; i < t_; ++i) { + elements[i] += round_constants_[i]; + } + + int rk_offset = t_; + int current_round = 0; + + for (int i = 0; i < half_full_rounds_; ++i) { + FullRound(elements, rk_offset, current_round); + } + + for (int i = 0; i < partial_rounds_; ++i) { + PartialRound(elements, rk_offset, current_round); + } + + for (int i = 0; i < half_full_rounds_ - 1; ++i) { + FullRound(elements, rk_offset, current_round); + } + + LastFullRound(elements, mds_matrix_); + + elements[1].to_scalar(*((fr_t::pow_t*)out)); +} + +void Poseidon::QuinticSBox(fr_t& element, const fr_t& round_constant) { + element ^= 5; + element += round_constant; +} + +void Poseidon::MatrixMul(fr_t* elements, const fr_t* matrix) { + fr_t tmp[t_]; + + for (int i = 0; i < t_; ++i) { + tmp[i] = elements[0] * matrix[i]; + + for (int j = 1; j < t_; j++) { + tmp[i] += elements[j] * matrix[j * t_ + i]; + } + } + + for (int i = 0; i < t_; ++i) { + elements[i] = tmp[i]; + } +} + +void Poseidon::SparseMatrixMul(fr_t* elements, const fr_t* sparse_matrix) { + fr_t element0 = elements[0]; + + elements[0] *= sparse_matrix[0]; + for (int i = 1; i < t_; ++i) { + elements[0] += elements[i] * sparse_matrix[i]; + } + + for (int i = 1; i < t_; ++i) { + elements[i] += element0 * sparse_matrix[t_ + i - 1]; + } +} + +void Poseidon::RoundMatrixMul(fr_t* elements, const int current_round) { + if (current_round == 3) { + MatrixMul(elements, pre_sparse_matrix_); + } + else if ((current_round > 3) && + (current_round < half_full_rounds_ + partial_rounds_)) { + int index = current_round - half_full_rounds_; + SparseMatrixMul(elements, sparse_matrices_ + (t_ * 2 - 1) * index); + } + else { + MatrixMul(elements, mds_matrix_); + } +} + +void Poseidon::FullRound(fr_t* elements, int& rk_offset, int& current_round) { + for (int i = 0; i < t_; ++i) { + QuinticSBox(elements[i], round_constants_[rk_offset + i]); + } + rk_offset += t_; + + RoundMatrixMul(elements, current_round); + current_round++; +} + +void Poseidon::LastFullRound(fr_t* elements, const fr_t* mds_matrix) { + for (int i = 0; i < t_; ++i) { + elements[i] ^= 5; + } + + MatrixMul(elements, mds_matrix); +} + +void Poseidon::PartialRound(fr_t* elements, int& rk_offset, + int& current_round) { + QuinticSBox(elements[0], round_constants_[rk_offset]); + rk_offset += 1; + + RoundMatrixMul(elements, current_round); + current_round++; +} + +#endif diff --git a/extern/supraseal/poseidon/poseidon.hpp b/extern/supraseal/poseidon/poseidon.hpp new file mode 100644 index 000000000..412dfd947 --- /dev/null +++ b/extern/supraseal/poseidon/poseidon.hpp @@ -0,0 +1,62 @@ +// Copyright Supranational LLC + +// Poseidon for Filecoin +// Primary usage is in regenerating truncated portions of tree r + +#ifndef __POSEIDON_HPP__ +#define __POSEIDON_HPP__ + +#include +#include +#include + +class Poseidon { + public: + static constexpr vec256 BLS12_381_r = { + TO_LIMB_T(0xffffffff00000001), TO_LIMB_T(0x53bda402fffe5bfe), + TO_LIMB_T(0x3339d80809a1d805), TO_LIMB_T(0x73eda753299d7d48) + }; + static constexpr vec256 BLS12_381_rRR = { + TO_LIMB_T(0xc999e990f3f29c6d), TO_LIMB_T(0x2b6cedcb87925c23), + TO_LIMB_T(0x05d314967254398f), TO_LIMB_T(0x0748d9d99f59ff11) + }; + static constexpr vec256 BLS12_381_rONE = { + TO_LIMB_T(0x00000001fffffffe), TO_LIMB_T(0x5884b7fa00034802), + TO_LIMB_T(0x998c4fefecbc4ff5), TO_LIMB_T(0x1824b159acc5056f) + }; + // This conflicts with sppark fr_t + // typedef blst_256_t fr_t; + + Poseidon(const int arity); + ~Poseidon(); + void Hash(uint8_t* out, const uint8_t* in); + + protected: + void QuinticSBox(fr_t& element, const fr_t& round_constant); + void MatrixMul(fr_t* elements, const fr_t* matrix); + void SparseMatrixMul(fr_t* elements, const fr_t* sparse_matrix); + void RoundMatrixMul(fr_t* elements, const int current_round); + void FullRound(fr_t* elements, int& rk_offset, int& current_round); + void LastFullRound(fr_t* elements, const fr_t* mds_matrix); + void PartialRound(fr_t* elements, int& rk_offset, int& current_round); + + void AssignPointers(fr_t* constants_file, + fr_t** round_constants, fr_t** mds_matrix, + fr_t** pre_sparse_matrix, fr_t** sparse_matrices); + + int arity_; + int partial_rounds_; + int half_full_rounds_; + int t_; + + fr_t domain_tag_; + + fr_t* constants_file_; + size_t constants_size_; + fr_t* round_constants_; + fr_t* mds_matrix_; + fr_t* pre_sparse_matrix_; + fr_t* sparse_matrices_; +}; +#endif // __POSEIDON_HPP__ diff --git a/extern/supraseal/sealing/constants.hpp b/extern/supraseal/sealing/constants.hpp new file mode 100644 index 000000000..5223182c4 --- /dev/null +++ b/extern/supraseal/sealing/constants.hpp @@ -0,0 +1,291 @@ +// Copyright Supranational LLC + +#ifndef __CONSTANTS_HPP__ +#define __CONSTANTS_HPP__ + +#include // size_t +#include // uint* +#include // log2 +#include +#include + +/////////////////////////////////////////////////////////// +// Graph constants +/////////////////////////////////////////////////////////// + +// Only 512MB and 32GB are broadly tested +enum SectorSizeLg { + Sector2KB = 11, + Sector4KB = 12, + Sector16KB = 14, + Sector32KB = 15, + Sector8MB = 23, + Sector16MB = 24, + Sector512MB = 29, + Sector1GB = 30, + Sector32GB = 35, + Sector64GB = 36 +}; + +const size_t NODE_SIZE_LG = 5; // In Bytes. SHA-256 digest size +const size_t NODE_SIZE = (1 << NODE_SIZE_LG); // In Bytes. SHA-256 digest size +const size_t NODE_WORDS = NODE_SIZE / sizeof(uint32_t); + +const size_t PARENT_COUNT_BASE = 6; // Number of parents from same layer +const size_t PARENT_COUNT_EXP = 8; // Number of parents from previous layer +const size_t PARENT_COUNT = PARENT_COUNT_BASE + PARENT_COUNT_EXP; +const size_t PARENT_SIZE = sizeof(uint32_t); + +const size_t NODE_0_REPEAT = 1; +const size_t NODE_0_BLOCKS = 2; +const size_t LAYER_1_REPEAT = 3; +const size_t LAYERS_GT_1_REPEAT = 7; +const size_t NODE_GT_0_BLOCKS = 20; + +// Full padding block for the hash buffer of node 0 in each layer +const uint8_t NODE_0_PADDING[] __attribute__ ((aligned (32))) = { + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00 +}; + +// Half padding block for the hash buffer of non node 0 in each layer +const uint8_t NODE_PADDING_X2[] __attribute__ ((aligned (32))) = { + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x27, 0x00, 0x00 +}; + +// Smallest unit of memory we are working with. A page is typically 4KB +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +const size_t NODES_PER_HASHER = 2; // Number of nodes to calculate per hashing thread + +///////////////////////////////////////////////////////// +// Constants solely for testing, these will be inputs +///////////////////////////////////////////////////////// + +// ticket comes from on-chain randomness +const uint8_t TICKET[] __attribute__ ((aligned (32))) = { 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 }; + +// porep_seed is the config porep_id +// Mostly 0's with a single byte indicating the sector size and api version +// See porep_id() in rust-filecoin-proofs-api/src/registry.rs +// const uint8_t SEED[] = { 99, 99, 99, 99, 99, 99, 99, 99, +// 99, 99, 99, 99, 99, 99, 99, 99, +// 99, 99, 99, 99, 99, 99, 99, 99, +// 99, 99, 99, 99, 99, 99, 99, 99 }; +const uint8_t SEED[] __attribute__ ((aligned (32))) = { 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1 }; + +// Default ordering for atomics +static const std::memory_order DEFAULT_MEMORY_ORDER = std::memory_order_seq_cst; +//static const std::memory_order DEFAULT_MEMORY_ORDER = std::memory_order_relaxed; + + +///////////////////////////////////////////////////////// +// Constants for PC1 buffer sizing +///////////////////////////////////////////////////////// + +// Operations between threads are batched for efficient coordination. +// BATCH_SIZE must be a multiple of PARENT_COUNT or PARENT_COUNT - 1 +// Parent pointers contain pointers to all 14 parents +const size_t PARENT_PTR_BATCH_SIZE = PARENT_COUNT; +// We only need 13 nodes from disk/cache since 1 was always just hashed +const size_t PAGE_BATCH_SIZE = PARENT_COUNT - 1; + +// Parents buffer stores base and expander parent values that are not located +// in the node buffer. The intention is the parents buffer is filled with +// values read from disk. The first parent is not required since it is the +// previous node and we know for sure it is in the node buffer. +const size_t PARENT_BUFFER_BATCHES = 1<<18; +const size_t PARENT_BUFFER_NODES = PARENT_BUFFER_BATCHES * PAGE_BATCH_SIZE; + +// Number of hashing buffers allocated in memory. +// Once hit then the buffer wraps around to the beginning +// This needs to be tuned based on available RAM and timing required to keep +// hashing threads and reading threads in sync. +// Should be sized parent buffer + desired cache nodes +const size_t NODE_BUFFER_BATCHES = PARENT_BUFFER_BATCHES * 2; +const size_t NODE_BUFFER_NODES = NODE_BUFFER_BATCHES; +const size_t NODE_BUFFER_SYNC_LG_BATCH_SIZE = 2; +const size_t NODE_BUFFER_SYNC_BATCH_SIZE = 1<> NODE_BUFFER_SYNC_LG_BATCH_SIZE) + +// The coordinator will create a contiguous block of memory where it +// copies the data from the parent pointers so the hashers can simply +// walk through it. There will be COORD_BATCH_COUNT batches, each of +// size COORD_BATCH_SIZE. Further we guarantee all of these nodes will +// be present in the node buffer so we won't bother with +// reference/consumed counts. +static const size_t COORD_BATCH_SIZE = 256; +static const size_t COORD_BATCH_COUNT = 4; +static const size_t COORD_BATCH_NODE_COUNT = COORD_BATCH_SIZE * COORD_BATCH_COUNT; + +///////////////////////////////////////////////////////// +// Constants for C1 +///////////////////////////////////////////////////////// + +const size_t LABEL_PARENTS = 37; +const size_t LAYER_ONE_REPEAT_SEQ = 6; +const size_t LAYER_ONE_FINAL_SEQ = LABEL_PARENTS % + (LAYER_ONE_REPEAT_SEQ * + PARENT_COUNT_BASE); +const size_t LAYER_N_REPEAT_SEQ = 2; +const size_t LAYER_N_FINAL_SEQ = LABEL_PARENTS % + (LAYER_N_REPEAT_SEQ * PARENT_COUNT); + +const uint32_t SINGLE_PROOF_DATA = 0; // storage-proofs-core/src/merkle/proof.rs + +// node_t is here instead of data_structures because it is used to compute constants +// in various places and it is comprised of only primitive types (and so doesn't +// pull in a bunch of other includes). +struct node_t { + uint32_t limbs[NODE_WORDS]; + void reverse_l() { + for (size_t i = 0; i < NODE_WORDS; i++) { + limbs[i] = htonl(limbs[i]); + } + } + void reverse_s() { + for (size_t i = 0; i < NODE_WORDS * 2; i++) { + ((uint16_t*)limbs)[i] = htons(((uint16_t*)limbs)[i]); + } + } +}; + +#include "sector_parameters.hpp" + +///////////////////////////////////////////////////////// +// Templated constants for number of parallel sectors +///////////////////////////////////////////////////////// + +// Number of sectors being processed within the buffers. +// Used to determine stride between parents in node_buffer +// A good value here would be a multiple that fits in a 4KB page +// 128: 32B * 128 = 4096 Page consumed by single node index +// 64: 32B * 64 * 2 = 4096 Page consumed by two node indices +// 32: 32B * 32 * 4 = 4096 Page consumed by four node indices +// 16: 32B * 16 * 8 = 4096 Page consumed by eight node indices +// What we are trying to accomplish with this is to improve the efficiency of +// random reads. Typcially when reading distant base parents and all +// expander parents an entire page needs to be read to get only a single 32B +// node. If this is done across many sealing threads, then the read +// efficiency is not good. With the interleaved approach the goal is for all +// 4KB page data to be useful. This can reduce the number of system level +// read operations by the interleaved node factor. +// Must evenly fit in the page! +template +class sealing_config_t : public sector_parameters_t { +public: + static const size_t PARALLEL_SECTORS = SECTORS; + // Number of nodes stored per page (packed) + static const size_t NODES_PER_PAGE = PAGE_SIZE / (PARALLEL_SECTORS * NODE_SIZE); + + // // Sector parameters + // static const sector_parameters_t P; +}; + +#ifdef RUNTIME_SECTOR_SIZE +typedef sealing_config_t<128, sector_parameters2KB > sealing_config_128_2KB_t; +typedef sealing_config_t<128, sector_parameters4KB > sealing_config_128_4KB_t; +typedef sealing_config_t<128, sector_parameters16KB > sealing_config_128_16KB_t; +typedef sealing_config_t<128, sector_parameters32KB > sealing_config_128_32KB_t; +typedef sealing_config_t<128, sector_parameters8MB > sealing_config_128_8MB_t; +typedef sealing_config_t<128, sector_parameters16MB > sealing_config_128_16MB_t; +typedef sealing_config_t<128, sector_parameters1GB > sealing_config_128_1GB_t; +typedef sealing_config_t<128, sector_parameters64GB > sealing_config_128_64GB_t; +typedef sealing_config_t< 64, sector_parameters2KB > sealing_config_64_2KB_t; +typedef sealing_config_t< 64, sector_parameters4KB > sealing_config_64_4KB_t; +typedef sealing_config_t< 64, sector_parameters16KB > sealing_config_64_16KB_t; +typedef sealing_config_t< 64, sector_parameters32KB > sealing_config_64_32KB_t; +typedef sealing_config_t< 64, sector_parameters8MB > sealing_config_64_8MB_t; +typedef sealing_config_t< 64, sector_parameters16MB > sealing_config_64_16MB_t; +typedef sealing_config_t< 64, sector_parameters1GB > sealing_config_64_1GB_t; +typedef sealing_config_t< 64, sector_parameters64GB > sealing_config_64_64GB_t; +typedef sealing_config_t< 32, sector_parameters2KB > sealing_config_32_2KB_t; +typedef sealing_config_t< 32, sector_parameters4KB > sealing_config_32_4KB_t; +typedef sealing_config_t< 32, sector_parameters16KB > sealing_config_32_16KB_t; +typedef sealing_config_t< 32, sector_parameters32KB > sealing_config_32_32KB_t; +typedef sealing_config_t< 32, sector_parameters8MB > sealing_config_32_8MB_t; +typedef sealing_config_t< 32, sector_parameters16MB > sealing_config_32_16MB_t; +typedef sealing_config_t< 32, sector_parameters1GB > sealing_config_32_1GB_t; +typedef sealing_config_t< 32, sector_parameters64GB > sealing_config_32_64GB_t; +typedef sealing_config_t< 16, sector_parameters2KB > sealing_config_16_2KB_t; +typedef sealing_config_t< 16, sector_parameters4KB > sealing_config_16_4KB_t; +typedef sealing_config_t< 16, sector_parameters16KB > sealing_config_16_16KB_t; +typedef sealing_config_t< 16, sector_parameters32KB > sealing_config_16_32KB_t; +typedef sealing_config_t< 16, sector_parameters8MB > sealing_config_16_8MB_t; +typedef sealing_config_t< 16, sector_parameters16MB > sealing_config_16_16MB_t; +typedef sealing_config_t< 16, sector_parameters1GB > sealing_config_16_1GB_t; +typedef sealing_config_t< 16, sector_parameters64GB > sealing_config_16_64GB_t; +typedef sealing_config_t< 8, sector_parameters2KB > sealing_config_8_2KB_t; +typedef sealing_config_t< 8, sector_parameters4KB > sealing_config_8_4KB_t; +typedef sealing_config_t< 8, sector_parameters16KB > sealing_config_8_16KB_t; +typedef sealing_config_t< 8, sector_parameters32KB > sealing_config_8_32KB_t; +typedef sealing_config_t< 8, sector_parameters8MB > sealing_config_8_8MB_t; +typedef sealing_config_t< 8, sector_parameters16MB > sealing_config_8_16MB_t; +typedef sealing_config_t< 8, sector_parameters1GB > sealing_config_8_1GB_t; +typedef sealing_config_t< 8, sector_parameters64GB > sealing_config_8_64GB_t; +typedef sealing_config_t< 4, sector_parameters2KB > sealing_config_4_2KB_t; +typedef sealing_config_t< 4, sector_parameters4KB > sealing_config_4_4KB_t; +typedef sealing_config_t< 4, sector_parameters16KB > sealing_config_4_16KB_t; +typedef sealing_config_t< 4, sector_parameters32KB > sealing_config_4_32KB_t; +typedef sealing_config_t< 4, sector_parameters8MB > sealing_config_4_8MB_t; +typedef sealing_config_t< 4, sector_parameters16MB > sealing_config_4_16MB_t; +typedef sealing_config_t< 4, sector_parameters1GB > sealing_config_4_1GB_t; +typedef sealing_config_t< 4, sector_parameters64GB > sealing_config_4_64GB_t; +typedef sealing_config_t< 2, sector_parameters2KB > sealing_config_2_2KB_t; +typedef sealing_config_t< 2, sector_parameters4KB > sealing_config_2_4KB_t; +typedef sealing_config_t< 2, sector_parameters16KB > sealing_config_2_16KB_t; +typedef sealing_config_t< 2, sector_parameters32KB > sealing_config_2_32KB_t; +typedef sealing_config_t< 2, sector_parameters8MB > sealing_config_2_8MB_t; +typedef sealing_config_t< 2, sector_parameters16MB > sealing_config_2_16MB_t; +typedef sealing_config_t< 2, sector_parameters1GB > sealing_config_2_1GB_t; +typedef sealing_config_t< 2, sector_parameters64GB > sealing_config_2_64GB_t; +typedef sealing_config_t< 1, sector_parameters2KB > sealing_config_1_2KB_t; +typedef sealing_config_t< 1, sector_parameters4KB > sealing_config_1_4KB_t; +typedef sealing_config_t< 1, sector_parameters16KB > sealing_config_1_16KB_t; +typedef sealing_config_t< 1, sector_parameters32KB > sealing_config_1_32KB_t; +typedef sealing_config_t< 1, sector_parameters8MB > sealing_config_1_8MB_t; +typedef sealing_config_t< 1, sector_parameters16MB > sealing_config_1_16MB_t; +typedef sealing_config_t< 1, sector_parameters1GB > sealing_config_1_1GB_t; +typedef sealing_config_t< 1, sector_parameters64GB > sealing_config_1_64GB_t; +#endif +typedef sealing_config_t<128, sector_parameters512MB> sealing_config_128_512MB_t; +typedef sealing_config_t<128, sector_parameters32GB > sealing_config_128_32GB_t; +typedef sealing_config_t< 64, sector_parameters512MB> sealing_config_64_512MB_t; +typedef sealing_config_t< 64, sector_parameters32GB > sealing_config_64_32GB_t; +typedef sealing_config_t< 32, sector_parameters512MB> sealing_config_32_512MB_t; +typedef sealing_config_t< 32, sector_parameters32GB > sealing_config_32_32GB_t; +typedef sealing_config_t< 16, sector_parameters512MB> sealing_config_16_512MB_t; +typedef sealing_config_t< 16, sector_parameters32GB > sealing_config_16_32GB_t; +typedef sealing_config_t< 8, sector_parameters512MB> sealing_config_8_512MB_t; +typedef sealing_config_t< 8, sector_parameters32GB > sealing_config_8_32GB_t; +typedef sealing_config_t< 4, sector_parameters512MB> sealing_config_4_512MB_t; +typedef sealing_config_t< 4, sector_parameters32GB > sealing_config_4_32GB_t; +typedef sealing_config_t< 2, sector_parameters512MB> sealing_config_2_512MB_t; +typedef sealing_config_t< 2, sector_parameters32GB > sealing_config_2_32GB_t; +typedef sealing_config_t< 1, sector_parameters512MB> sealing_config_1_512MB_t; +typedef sealing_config_t< 1, sector_parameters32GB > sealing_config_1_32GB_t; + +#endif // __CONSTANTS_HPP__ diff --git a/extern/supraseal/sealing/data_structures.hpp b/extern/supraseal/sealing/data_structures.hpp new file mode 100644 index 000000000..2c746932d --- /dev/null +++ b/extern/supraseal/sealing/data_structures.hpp @@ -0,0 +1,156 @@ +// Copyright Supranational LLC + +#ifndef __DATA_STRUCTURES_HPP__ +#define __DATA_STRUCTURES_HPP__ + +#include +#include "constants.hpp" +#include "../nvme/ring_t.hpp" +#include "../nvme/nvme_io_tracker_t.hpp" + +// One node worth of parallel sectors +template +struct parallel_node_t { + node_t sectors[C::PARALLEL_SECTORS]; +}; + +// One page of nodes. In the end page will be a contiguous block of memory +// so it's easy to access uint32_t's starting at &nodes[0]. +template +struct page_t { + parallel_node_t parallel_nodes[C::NODES_PER_PAGE]; +}; + +// Buffer for replica IDs for each sector +// First 32B = replica_id +// Second 32B = current_layer || current_node || 0's (20B) +// Third 32B = padding (0x80) || 0's (31B) Only for node 0 +// Fourth 32B = 0's || length (512b = 64B = 0x200) Only for node 0 +// Final element is padding for all nodes > 0 +// This structure gets replicated per hasher rather than a single instance +// for all parallel sectors. This is because the coordinator creates a +// packed buffer when staging data for parents that contains only the parents +// needed for that hasher, not all parallel sectors. This means the offsets between +// parents is not the same as for all PARALLEL_SECTORS. +struct replica_id_buffer_t { + uint32_t ids[NODES_PER_HASHER][NODE_WORDS]; + uint32_t cur_loc[NODES_PER_HASHER][NODE_WORDS]; + uint32_t pad_0[NODES_PER_HASHER][NODE_WORDS]; + uint32_t pad_1[NODES_PER_HASHER][NODE_WORDS]; + uint32_t padding[NODES_PER_HASHER][NODE_WORDS]; +}; + +template +struct batch_t { + // Note BATCH_SIZE does not add to sizeof(batch_t) + static const int BATCH_SIZE = sz; + T batch[sz]; +}; + +// Type to store pointers to parent pages +// +// **Handling of very recent nodes** +// +// The coordinator will create a local copy of parents for the hasher to +// access. For parents that are far from the head it's fine to just copy +// the data. For local parents the data might be exist at the time the +// coordinator is setting up the local buffer so we still need to pass the +// parent pointer to the hasher. +// +// Storage core +// - Sets up parent pointers as usual +// - For nodes there are local do not record reference counts +// +// Coordinator +// - Copies data into local buffer for hashers to use +// - For nodes that are local it does not copy the data. Instead it +// passes pointer to the hashers in a side struct +// +// Hashers +// - Set up parent pointers into local buffer or from side struct + +template +struct parent_ptr_t { + // Pointer to the parent in the node buffer or parent buffer + parallel_node_t* ptr; +}; +// The parent pointers must be contiguous so synchronization data is +// stored in a separate struct +struct parent_ptr_sync_t { + static const uint32_t NOT_NODE_BUFFER = (uint32_t)-1; + static const uint32_t LOCAL_NODE = (uint32_t)-2; + // When a parent pointer points into the node buffer record the + // node buffer index + uint32_t node_buffer_idx; + inline bool is_node_buffer() { + return node_buffer_idx != NOT_NODE_BUFFER && node_buffer_idx != LOCAL_NODE; + } +}; + +// Structure to iterate over node and layer +// To disambiguate nodes on the various layers we combine the layer and +// node into a single id. In this way all nodes are unique and can be +// added, subtracted, etc. This is useful for managing the cache across +// layers. +template +class node_id_t { + uint64_t _id; + +public: + node_id_t() noexcept { + _id = 0; + } + node_id_t(uint64_t node) { + _id = node; + } + node_id_t(uint32_t layer, uint32_t node) { + _id = ((uint64_t)layer << C::GetNodeBits()) | node; + } + + uint64_t id() { + return _id; + } + uint32_t node() { + return _id & C::GetNodeMask(); + } + uint32_t layer() { + return _id >> C::GetNodeBits(); + } + bool operator<(const node_id_t x) const { + return _id < x._id; + } + void operator++(int) { + _id++; + } + void operator--(int) { + _id--; + } + void operator+=(uint64_t x) { + _id += x; + } + operator uint64_t() { + return _id; + } +}; + +struct node_io_t { + enum type_e { + READ = 0, + WRITE, + NOP + }; + + // Node to read/write + uint64_t node; + // Read or write + type_e type; + + // Used for callbacks to signal when data is valid + ring_buffer_valid_t* valid; + + // Used for SPDK calls + nvme_io_tracker_t tracker; +}; + + +#endif diff --git a/extern/supraseal/sealing/sector_parameters.hpp b/extern/supraseal/sealing/sector_parameters.hpp new file mode 100644 index 000000000..86ac08272 --- /dev/null +++ b/extern/supraseal/sealing/sector_parameters.hpp @@ -0,0 +1,110 @@ +// Copyright Supranational LLC + +// Runtime sector parameters +// Mostly derived from filecoin-proofs/src/constants.rs + +#ifndef __SECTOR_PARAMETERS_HPP__ +#define __SECTOR_PARAMETERS_HPP__ + +#include +#include +#include + +template +class SectorParameters { + public: + static constexpr size_t ONE_KB = 1024; + static constexpr size_t ONE_MB = ONE_KB * 1024; + static constexpr size_t ONE_GB = ONE_MB * 1024; + + static constexpr size_t GetSectorSizeLg() { return sector_size_lg_; } + static constexpr size_t GetSectorSize() { return sector_size_; } + static constexpr size_t GetNodeBits() { return node_bits_; } + static constexpr size_t GetNodeMask() { return node_mask_; } + static constexpr size_t GetNumChallenges() { return num_challenges_; } + static constexpr size_t GetNumPartitions() { return num_partitions_; } + static constexpr size_t GetNumLayers() { return num_layers_; } + static constexpr size_t GetNumNodes() { return num_leaves_; } + static constexpr size_t GetNumLeaves() { return num_leaves_; } + static constexpr size_t GetNumTreeDArity() { return tree_d_arity_; } + static constexpr size_t GetNumTreeDLevels() { return tree_d_levels_; } + static constexpr size_t GetNumTreeRLabels() { return tree_r_labels_; } + static constexpr size_t GetChallengeStartMask() { return ~(tree_r_labels_ - 1); } + static constexpr size_t GetNumTreeRCFiles() { return tree_rc_files_; } + static constexpr size_t GetNumTreeRCLgArity() { return tree_rc_lg_base_arity_; } + static constexpr size_t GetNumTreeRCArity() { return tree_rc_base_arity_; } + static constexpr size_t GetNumTreeRCArityDT() { return tree_rc_base_arity_ + 1; } + static constexpr size_t GetNumTreeRCConfig() { return tree_rc_config_; } + static constexpr size_t GetNumTreeRDiscardRows() { return tree_r_discard_rows_; } + static constexpr size_t GetNumTreeRCLevels() { + // FIXME - this won't work for non-uniform trees + size_t tree_rc_levels = log2(num_leaves_) / log2(tree_rc_base_arity_); + assert(tree_rc_top_arity_ == 0); + assert(tree_rc_sub_arity_ != 2); + + return tree_rc_levels; + } + + private: + static const size_t sector_size_lg_ = 63 - __builtin_clzll((uint64_t)sector_size_); + static const size_t node_bits_ = sector_size_lg_ - NODE_SIZE_LG; + static const size_t node_mask_ = (1UL << node_bits_) - 1; + static const size_t num_challenges_ = sector_size_ <= ONE_GB ? 2 : 180; + static const size_t num_partitions_ = sector_size_ <= ONE_GB ? 1 : 10; + static const size_t num_layers_ = sector_size_ <= ONE_GB ? 2 : 11; + static const size_t num_leaves_ = sector_size_ / sizeof(node_t); + static const size_t tree_d_arity_ = 2; + static const size_t tree_d_levels_ = log2(num_leaves_); + static const size_t tree_rc_config_ = ((sector_size_== 2 * ONE_KB || sector_size_== 8 * ONE_MB || sector_size_== 512 * ONE_MB) ? 0 : + (sector_size_== 32 * ONE_KB || sector_size_== 64 * ONE_GB) ? 2 : + 1); + static const size_t tree_rc_lg_base_arity_ = 3; + static const size_t tree_rc_base_arity_ = 1 << tree_rc_lg_base_arity_; + static const size_t tree_rc_sub_arity_ = ((sector_size_== 2 * ONE_KB || sector_size_== 8 * ONE_MB || sector_size_== 512 * ONE_MB) ? 0 : + (sector_size_== 4 * ONE_KB || sector_size_== 16 * ONE_MB || sector_size_== 1 * ONE_GB) ? 2 : + 8); + static const size_t tree_rc_top_arity_ = ((sector_size_== 32 * ONE_KB || sector_size_== 64 * ONE_GB) ? 2 : 0); + static const size_t tree_r_discard_rows_ = sector_size_ <= (32 * ONE_KB) ? 1 : 2; + static const size_t tree_rc_files_ = ((tree_rc_top_arity_ == 0) && (tree_rc_sub_arity_ == 0) ? 1 : + tree_rc_top_arity_ > 0 ? tree_rc_top_arity_ * tree_rc_sub_arity_ : + tree_rc_sub_arity_); + static const size_t tree_r_labels_ = pow(tree_rc_base_arity_, tree_r_discard_rows_ + 1); +}; + +#ifdef RUNTIME_SECTOR_SIZE + +template class SectorParameters<1UL << Sector2KB>; +template class SectorParameters<1UL << Sector4KB>; +template class SectorParameters<1UL << Sector16KB>; +template class SectorParameters<1UL << Sector32KB>; + +template class SectorParameters<1UL << Sector8MB>; +template class SectorParameters<1UL << Sector16MB>; + +template class SectorParameters<1UL << Sector1GB>; +template class SectorParameters<1UL << Sector64GB>; + +#endif + +template class SectorParameters<1UL << Sector512MB>; +template class SectorParameters<1UL << Sector32GB>; + +#ifdef RUNTIME_SECTOR_SIZE + +typedef SectorParameters<1UL << Sector2KB> sector_parameters2KB; +typedef SectorParameters<1UL << Sector4KB> sector_parameters4KB; +typedef SectorParameters<1UL << Sector16KB> sector_parameters16KB; +typedef SectorParameters<1UL << Sector32KB> sector_parameters32KB; + +typedef SectorParameters<1UL << Sector8MB> sector_parameters8MB; +typedef SectorParameters<1UL << Sector16MB> sector_parameters16MB; + +typedef SectorParameters<1UL << Sector1GB> sector_parameters1GB; +typedef SectorParameters<1UL << Sector64GB> sector_parameters64GB; + +#endif + +typedef SectorParameters<1UL << Sector512MB> sector_parameters512MB; +typedef SectorParameters<1UL << Sector32GB> sector_parameters32GB; + +#endif // __SECTOR_PARAMETERS_HPP__ diff --git a/extern/supraseal/sealing/supra_seal.cpp b/extern/supraseal/sealing/supra_seal.cpp new file mode 100644 index 000000000..d90c5b57d --- /dev/null +++ b/extern/supraseal/sealing/supra_seal.cpp @@ -0,0 +1,523 @@ +// Copyright Supranational LLC + +#include +#include +#include // file read +#include // printing +#include +#include // htonl + +// Enable profiling +//#define PROFILE + +// Enable data collection in the orchestrator using the timestamp counter +//#define TSC + +// Enable data collection in the hasher using the timestamp counter +//#define HASHER_TSC + +// Enable more general statistics collection +//#define STATS + +// Disable reading parents from disk (will not produce the correct result) +//#define NO_DISK_READS + +// Print a message if the orchestrator is stalled for too long +//#define PRINT_STALLS + +// Verify that hashed result matches a known good sealing +//#define VERIFY_HASH_RESULT + +#include "../sealing/constants.hpp" +#include "../nvme/streaming_node_reader_nvme.hpp" + +#include "../c1/c1.hpp" +#include "../pc1/pc1.hpp" +#include "../pc2/pc2.hpp" +#include "../util/util.hpp" +#include "../util/sector_util.cpp" + +#include "../util/debug_helpers.cpp" + +// Simplify calling the various functions for different +// sector configurations +#define COMMA , +#define SECTOR_CALL_TABLE(FUNC) \ + switch (num_sectors) { \ + case 128: \ + FUNC(sealing_config_t<128 COMMA decltype(params)>); \ + break; \ + case 64: \ + FUNC(sealing_config_t<64 COMMA decltype(params)>); \ + break; \ + case 32: \ + FUNC(sealing_config_t<32 COMMA decltype(params)>); \ + break; \ + case 16: \ + FUNC(sealing_config_t<16 COMMA decltype(params)>); \ + break; \ + case 8: \ + FUNC(sealing_config_t<8 COMMA decltype(params)>); \ + break; \ + case 4: \ + FUNC(sealing_config_t<4 COMMA decltype(params)>); \ + break; \ + case 2: \ + FUNC(sealing_config_t<2 COMMA decltype(params)>); \ + break; \ + } + +class sealing_ctx_t { +public: + nvme_controllers_t* controllers; + topology_t* topology; + + template + sealing_ctx_t(P& params, const char* filename) { + init

(filename); + } + + template + void init(const char* filename) { + printf("Initializing spdk using config %s\n", filename); + + topology = new topology_t(filename); + + // Initialize SPDK + struct spdk_env_opts opts; + spdk_env_opts_init(&opts); + opts.name = "nvme"; + int rc = spdk_env_init(&opts); + if (rc < 0) { + fprintf(stderr, "Unable to initialize SPDK env\n"); + exit(1); + } + + controllers = new nvme_controllers_t(topology->get_allowed_nvme()); + controllers->init(4); // qpairs + controllers->sort(); + + if (controllers->size() < topology->get_allowed_nvme().size()) { + printf("Unable to attached to all specified NVMe. Ensure spdk scripts/setup.sh" + " was run and drive list is up-to-date in .cfg\n"); + exit(1); + } + + print_parameters

(); + //print_temps(); + } + + void print_temps() { + std::thread t = std::thread([this]() { + while (true) { + controllers->print_temperatures(); + sleep(5 * 60); + } + }); + t.detach(); + } +}; + +static sealing_ctx_t* sealing_ctx = nullptr; + +static std::mutex ctx_mtx; + +static void init_ctx(size_t sector_size) { + std::unique_lock lck(ctx_mtx); + if (sealing_ctx == nullptr) { + SECTOR_PARAMS_TABLE(sealing_ctx = new sealing_ctx_t(params, "supra_seal.cfg")); + } +} + +extern "C" +void supra_seal_init(size_t sector_size, const char* config_file) { + printf("INIT called %s\n", config_file); + std::unique_lock lck(ctx_mtx); + if (sealing_ctx == nullptr) { + SECTOR_PARAMS_TABLE(sealing_ctx = new sealing_ctx_t(params, config_file)); + } +} + +extern "C" +size_t get_nvme_health_info(nvme_health_info* health_infos, size_t max_controllers, size_t sector_size) { + init_ctx(sector_size); + + if (!sealing_ctx || !sealing_ctx->controllers) { + return 0; + } + + auto health_data = (*sealing_ctx->controllers).get_health_info(); + size_t count = std::min(health_data.size(), max_controllers); + + // Copy the health info into the provided array + for (size_t i = 0; i < count; i++) { + health_infos[i] = health_data[i]; + } + + return count; +} + +extern "C" +int pc1(uint64_t block_offset, size_t num_sectors, + const uint8_t* replica_ids, const char* parents_filename, + size_t sector_size) { + init_ctx(sector_size); +#ifndef __CUDA_ARCH__ +#define CALL_PC1(C) \ + do_pc1(sealing_ctx->controllers, \ + *sealing_ctx->topology, \ + block_offset, \ + (const uint32_t*)replica_ids, \ + parents_filename); + SECTOR_PARAMS_TABLE(SECTOR_CALL_TABLE(CALL_PC1)); +#undef CALL_PC1 +#endif + return 0; +} + +extern "C" +int pc2_cleanup(size_t num_sectors, const char* output_dir, + size_t sector_size) { +#define CALL_PC2_CLEANUP(C) \ + do_pc2_cleanup(output_dir); + + SECTOR_PARAMS_TABLE(SECTOR_CALL_TABLE(CALL_PC2_CLEANUP)); +#undef CALL_PC2_CLEANUP + return 0; +} + +extern "C" +int pc2(size_t block_offset, size_t num_sectors, const char* output_dir, + const char** data_filenames, size_t sector_size) { + init_ctx(sector_size); + +#define CALL_PC2(C) \ + do_pc2(*sealing_ctx->topology, \ + *sealing_ctx->controllers, \ + block_offset, \ + data_filenames, output_dir); + SECTOR_PARAMS_TABLE(SECTOR_CALL_TABLE(CALL_PC2)); +#undef CALL_PC2 + return 0; +} + +extern "C" +int c1(size_t block_offset, size_t num_sectors, size_t sector_slot, + const uint8_t* replica_id, const uint8_t* seed, + const uint8_t* ticket, const char* cache_path, + const char* parents_filename, const char* replica_path, + size_t sector_size) { + size_t qpair = sealing_ctx->topology->c1_qpair; + int node_reader_core = sealing_ctx->topology->c1_reader; + const char* output_dir = cache_path; + + init_ctx(sector_size); + +#define CALL_C1(C) \ + { \ + streaming_node_reader_t reader(sealing_ctx->controllers, qpair, \ + block_offset, node_reader_core, \ + sealing_ctx->topology->c1_sleep_time); \ + return do_c1(reader, \ + num_sectors, sector_slot, \ + replica_id, seed, \ + ticket, cache_path, \ + parents_filename, replica_path, \ + output_dir); \ + } + + SECTOR_PARAMS_TABLE(SECTOR_CALL_TABLE(CALL_C1)); +#undef CALL_C1 + + return 0; +} + +template +int do_node_read(size_t sector_size, uint64_t node_to_read) { + // Read and print a hashed node + size_t pages_to_read = 1; + + init_ctx(sector_size); + + page_t *pages = (page_t *) + spdk_dma_zmalloc(sizeof(page_t) * pages_to_read, PAGE_SIZE, NULL); + assert (pages != nullptr); + + size_t ctrl_id; + size_t block_on_controller; + nvme_node_indexes(sealing_ctx->controllers->size(), + node_to_read, ctrl_id, block_on_controller); + + printf("Reading block %ld on controller %ld\n", block_on_controller, ctrl_id); + + sequential_io_t sio((*sealing_ctx->controllers)[ctrl_id]); + SPDK_ERROR(sio.rw(true, pages_to_read, (uint8_t *)&pages[0], block_on_controller)); + + size_t node_in_page = node_to_read % C::NODES_PER_PAGE; + printf("Node %8lx, ctrl %ld, block %ld, node_in_page %ld\n", + node_to_read, ctrl_id, block_on_controller, node_in_page); + + char prefix[32]; + snprintf(prefix, 32, "Node %8lx: ", node_to_read); + print_node(&pages[0].parallel_nodes[node_in_page], 0, prefix, true); + return 0; +} + +int node_read(size_t sector_size, size_t num_sectors, uint64_t node_to_read) { +#define CALL_NR(C) \ + do_node_read(sector_size, node_to_read); + SECTOR_PARAMS_TABLE(SECTOR_CALL_TABLE(CALL_NR)); +#undef CALL_NR + return 0; +} + +extern "C" +size_t get_max_block_offset(size_t sector_size) { + init_ctx(sector_size); + + if (sealing_ctx->controllers[0].size() == 0) { + return 0; + } + size_t min_block = (*sealing_ctx->controllers)[0].get_page_count(0); + for (size_t i = 1; i < (*sealing_ctx->controllers).size(); i++) { + size_t blocks = (*sealing_ctx->controllers)[i].get_page_count(0); + if (min_block > blocks) { + min_block = blocks; + } + } + return min_block; +} + +extern "C" +size_t get_slot_size(size_t num_sectors, size_t sector_size) { + size_t num_layers; + SECTOR_PARAMS_TABLE(num_layers = params.GetNumLayers()); + + // Number of nodes stored per page (packed) + size_t nodes_per_page = PAGE_SIZE / (num_sectors * NODE_SIZE); + // Number of pages per layer + size_t pages_per_layer = sector_size / NODE_SIZE / nodes_per_page; + + // We want the number of sectors to be a power of two and 128 at maximum + if (!((num_sectors & (num_sectors - 1)) == 0 && num_sectors > 0 && num_sectors <= 128)) { + printf("Unsupported number of sectors %ld\n", num_sectors); + exit(1); + } + + size_t num_controllers = sealing_ctx->controllers[0].size(); + size_t pages_per_layer_per_controller = + ((pages_per_layer + num_controllers - 1) / num_controllers); + return pages_per_layer_per_controller * num_layers; +} + +node_t* p_aux_open_read(const char* cache) { + node_t* p_aux_buf = nullptr; + + const char* p_aux_template = "%s/p_aux"; + const size_t MAX = 256; + char fname[MAX]; + snprintf(fname, MAX, p_aux_template, cache); + + int p_aux_fd = open(fname, O_RDONLY); + if (p_aux_fd == -1) { + printf("p_aux_open_read failed, unable to open %s\n", fname); + return nullptr; + } + + assert (p_aux_fd != -1); + p_aux_buf = (node_t*)mmap(NULL, sizeof(node_t) * 2, PROT_READ, + MAP_SHARED, p_aux_fd, 0); + close(p_aux_fd); + + if (p_aux_buf == MAP_FAILED) { + perror("mmap failed for p_aux file"); + return nullptr; + } + + return p_aux_buf; +} + +void p_aux_close(node_t* p_aux_buf) { + munmap(p_aux_buf, sizeof(node_t) * 2); +} + +bool p_aux_write(int index, size_t nodes, uint8_t* value, const char* cache) { + assert((index == 0) || (index == 1)); + assert((index == 0) && ((nodes == 1) || (nodes == 2))); + assert((index == 1) && (nodes == 1)); + + const char* p_aux_template = "%s/p_aux"; + const size_t MAX = 256; + char fname[MAX]; + snprintf(fname, MAX, p_aux_template, cache); + + int p_aux_fd = open(fname, O_RDWR); + if (p_aux_fd == -1) { + printf("p_aux_write failed, unable to open %s\n", fname); + return false; + } + + assert (p_aux_fd != -1); + node_t* p_aux_buf = (node_t*)mmap(NULL, sizeof(node_t) * 2, PROT_WRITE, + MAP_SHARED, p_aux_fd, 0); + + if (p_aux_buf == MAP_FAILED) { + perror("mmap failed for p_aux file"); + return false; + } + + std::memcpy(&(p_aux_buf[index]), value, nodes * sizeof(node_t)); + + munmap(p_aux_buf, sizeof(node_t) * 2); + close(p_aux_fd); + return true; +} + +template +bool get_comm_from_tree(SectorParameters ¶ms, uint8_t* comm, const char* cache, + size_t num_files, const char* prefix) { + uint8_t* bufs[num_files]; + + size_t file_size =0; + + for (size_t l = 0; l < num_files; ++l) { + const size_t MAX = 256; + char fname[MAX]; + if (num_files == 1) { + snprintf(fname, MAX, prefix, cache); + } else { + snprintf(fname, MAX, prefix, cache, l); + } + + int tree_fd = open(fname, O_RDONLY); + if (tree_fd == -1) { + printf("Failed to open tree file %s\n", fname); + return false; + } + + assert (tree_fd != -1); + struct stat buf; + fstat(tree_fd, &buf); + bufs[l] = (uint8_t*)mmap(NULL, buf.st_size, PROT_READ, MAP_SHARED, + tree_fd, 0); + file_size = buf.st_size; + if (bufs[l] == MAP_FAILED) { + perror("mmap failed for tree file"); + return false; + } + close(tree_fd); + } + + if (num_files == 1) { + uint8_t* comm_addr = bufs[0] + (file_size - sizeof(node_t)); + std::memcpy(comm, comm_addr, sizeof(node_t)); + } else { + // Since files > 1, assume poseidon tree + size_t arity = params.GetNumTreeRCArity(); + node_t nodes[arity]; + + for (size_t l = 0; l < num_files; ++l) { + uint8_t* last_addr = bufs[l] + (file_size - sizeof(node_t)); + std::memcpy((uint8_t*)&(nodes[l]), last_addr, sizeof(node_t)); + } + + Poseidon poseidon_comm(arity); + poseidon_comm.Hash(comm, (uint8_t*)&(nodes[0])); + } + + for (size_t l = 0; l < num_files; ++l) { + munmap(bufs[l], file_size); + } + + return true ; +} + +extern "C" +bool get_comm_c_from_tree(uint8_t* comm_c, const char* cache_path, + size_t sector_size) { + SECTOR_PARAMS_TABLE(if (params.GetNumTreeRCFiles() == 1) { \ + return get_comm_from_tree(params, comm_c, cache_path, \ + params.GetNumTreeRCFiles(), \ + "%s/sc-02-data-tree-c.dat"); \ + } \ + return get_comm_from_tree(params, comm_c, cache_path, \ + params.GetNumTreeRCFiles(), \ + "%s/sc-02-data-tree-c-%ld.dat"); \ + ); +} + +extern "C" +bool get_comm_c(uint8_t* comm_c, const char* cache_path) { + node_t* p_aux_buf = p_aux_open_read(cache_path); + if (p_aux_buf == nullptr) return false; + + std::memcpy(comm_c, &(p_aux_buf[0]), sizeof(node_t)); + + p_aux_close(p_aux_buf); + return true; +} + +extern "C" +bool set_comm_c(uint8_t* comm_c, const char* cache_path) { + return p_aux_write(0, 1, comm_c, cache_path); +} + +extern "C" +bool get_comm_r_last_from_tree(uint8_t* comm_r_last, const char* cache_path, + size_t sector_size) { + SECTOR_PARAMS_TABLE(if (params.GetNumTreeRCFiles() == 1) { \ + return get_comm_from_tree(params, comm_r_last, cache_path, \ + params.GetNumTreeRCFiles(), \ + "%s/sc-02-data-tree-r-last.dat"); \ + } \ + return get_comm_from_tree(params, comm_r_last, cache_path, \ + params.GetNumTreeRCFiles(), \ + "%s/sc-02-data-tree-r-last-%ld.dat"); \ + ); +} + +extern "C" +bool get_comm_r_last(uint8_t* comm_r_last, const char* cache_path) { + node_t* p_aux_buf = p_aux_open_read(cache_path); + if (p_aux_buf == nullptr) return false; + + std::memcpy(comm_r_last, &(p_aux_buf[1]), sizeof(node_t)); + + p_aux_close(p_aux_buf); + return true; +} + +extern "C" +bool set_comm_r_last(uint8_t* comm_r_last, const char* cache_path) { + return p_aux_write(1, 1, comm_r_last, cache_path); +} + +extern "C" +bool get_comm_r(uint8_t* comm_r, const char* cache_path) { + node_t* p_aux_buf = p_aux_open_read(cache_path); + if (p_aux_buf == nullptr) return false; + + Poseidon poseidon_comm_r(2); + poseidon_comm_r.Hash(comm_r, (uint8_t*)p_aux_buf); + + p_aux_close(p_aux_buf); + + return true; +} + +extern "C" +bool get_comm_d(uint8_t* comm_d, const char* cache_path, size_t sector_size) { + SECTOR_PARAMS_TABLE(return get_comm_from_tree(params, comm_d, cache_path, 1, "%s/sc-02-data-tree-d.dat")); +} + +extern "C" +bool get_cc_comm_d(uint8_t* comm_d, size_t sector_size) { + SECTOR_PARAMS_TABLE(std::memcpy(comm_d, CC_TREE_D_NODE_VALUES[params.GetNumTreeDLevels()], \ + sizeof(node_t))); + + return true; +} + +#undef SECTOR_PARAMS_TABLE +#undef SECTOR_CALL_TABLE +#undef COMMA diff --git a/extern/supraseal/sealing/supra_seal.h b/extern/supraseal/sealing/supra_seal.h new file mode 100644 index 000000000..714b05f9c --- /dev/null +++ b/extern/supraseal/sealing/supra_seal.h @@ -0,0 +1,78 @@ +// Copyright Supranational LLC + +#ifndef __SUPRA_SEAL_H__ +#define __SUPRA_SEAL_H__ + +#ifdef __cplusplus +extern "C" { +#endif + + +// Optional init function. +// config_file - topology config file. Defaults to supra_config.cfg +void supra_seal_init(size_t sector_size, const char* config_file); + +// Perform pc1, storing the sealed layers starting at block_offset. +int pc1(uint64_t block_offset, size_t num_sectors, + const uint8_t* replica_ids, const char* parents_filename, + size_t sector_size); + +// Perform pc2 for layers stored starting at block_offset. +// 'data_filenames' may be NULL for all CC sectors, or point to +// an array of length 'num_sectors' of pointers to filenames. +// Any element in the array may be NULL for CC or contain a path +// to a data file. +int pc2(size_t block_offset, size_t num_sectors, const char* output_dir, + const char** data_filenames, size_t sector_size); + +// Delete files associated with pc2 +int pc2_cleanup(size_t num_sectors, const char* output_dir, size_t sector_size); + +int c1(size_t block_offset,size_t num_sectors, size_t sector_slot, + const uint8_t* replica_id, const uint8_t* seed, + const uint8_t* ticket, const char* cache_path, + const char* parents_filename, const char* replica_path, + size_t sector_size); + +// Returns the highest available block offset, which is the minimum block +// count across all attached NVMe drives, plus one. I.e., the usable blocks +// are [0 .. max). +size_t get_max_block_offset(size_t sector_size); + +// Returns the size in blocks required to for the given num_sectors. +size_t get_slot_size(size_t num_sectors, size_t sector_size); + +// 32 bytes of space for the comm_ values should be preallocated prior to call + +// Returns comm_c after calculating from tree file(s) +bool get_comm_c_from_tree(uint8_t* comm_c, const char* cache_path, size_t sector_size); + +// Returns comm_c from p_aux file +bool get_comm_c(uint8_t* comm_c, const char* cache_path); + +// Sets comm_c in the p_aux file +bool set_comm_c(uint8_t* comm_c, const char* cache_path); + +// Returns comm_r_last after calculating from tree file(s) +bool get_comm_r_last_from_tree(uint8_t* comm_r_last, const char* cache_path, size_t sector_size); + +// Returns comm_r_last from p_aux file +bool get_comm_r_last(uint8_t* comm_r_last, const char* cache_path); + +// Sets comm_r_last in the p_aux file +bool set_comm_r_last(uint8_t* comm_r_last, const char* cache_path); + +// Returns comm_r after calculating from p_aux file +bool get_comm_r(uint8_t* comm_r, const char* cache_path); + +// Returns comm_d from tree_d file +bool get_comm_d(uint8_t* comm_d, const char* cache_path); + +// Returns comm_d for a cc sector +bool get_cc_comm_d(uint8_t* comm_d, size_t sector_size); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/extern/supraseal/sealing/supra_seal.hpp b/extern/supraseal/sealing/supra_seal.hpp new file mode 100644 index 000000000..250b48ae7 --- /dev/null +++ b/extern/supraseal/sealing/supra_seal.hpp @@ -0,0 +1,33 @@ +// Copyright Supranational LLC + +#ifndef __SUPRA_SEAL_HPP__ +#define __SUPRA_SEAL_HPP__ + +#include +#include "../util/stats.hpp" +#include "../sealing/constants.hpp" +#include "../nvme/nvme.hpp" +#include "../sealing/data_structures.hpp" + +// Forward declarations +template class coordinator_t; +template class node_rw_t; +template class orchestrator_t; + +const size_t STATS_PERIOD = 1<<22; +const size_t STATS_MASK = STATS_PERIOD - 1; + +extern std::mutex print_mtx; + +#include "../util/debug_helpers.hpp" +#include "topology_t.hpp" +#include "../pc1/system_buffers_t.hpp" +#include "../pc1/parent_iter_t.hpp" +#include "../pc1/orchestrator_t.hpp" +#include "../pc1/node_rw_t.hpp" +#include "../pc1/coordinator_t.hpp" +#include "supra_seal.h" + +int node_read(size_t sector_size, size_t num_sectors, uint64_t node_to_read); + +#endif diff --git a/extern/supraseal/sealing/topology_t.hpp b/extern/supraseal/sealing/topology_t.hpp new file mode 100644 index 000000000..337142c0e --- /dev/null +++ b/extern/supraseal/sealing/topology_t.hpp @@ -0,0 +1,223 @@ +// Copyright Supranational LLC + +#ifndef __TOPOLOGY_T_HPP__ +#define __TOPOLOGY_T_HPP__ + +#include +#include +#include +#include +#include +#include "../sealing/constants.hpp" +#include + +// This is used to size buffers. There will be some waste in cases where there are +// fewer hashers per coordinator but it is minimal. This should be sized according to +// the number of hashers that can run in a CCX. +const size_t MAX_HASHERS_PER_COORD = 14; + +//using namespace std; +using namespace libconfig; + +class topology_t { +public: + struct coordinator_t { + size_t hashers_per_core; + int core; + size_t num_hashers; + + coordinator_t(size_t _hashers_per_core, int _core, size_t _num_hashers) : + hashers_per_core(_hashers_per_core), core(_core), num_hashers(_num_hashers) + {} + + size_t num_sectors() const { + return num_hashers * NODES_PER_HASHER; + } + + int get_hasher_core(size_t i) const { + if (hashers_per_core == 1) { + return core + 1 + i; + } + + if (i & 0x1) { + // Odd hasher, it's on the hyperthread + return core + 1 + i / hashers_per_core + get_physical_cores(); + } + return core + 1 + i / hashers_per_core; + } + }; + + struct sector_config_t { + size_t hashers_per_core; + size_t sectors; + std::vector coordinators; + + size_t num_coordinators() const { + return coordinators.size(); + } + + int get_coordinator_core(size_t i) const { + return coordinators[i].core; + } + + size_t num_hashers() const { + size_t count = 0; + for (size_t i = 0; i < num_coordinators(); i++) { + count += coordinators[i].num_hashers; + } + return count; + } + + size_t num_sectors() const { + return num_hashers() * NODES_PER_HASHER; + } + + size_t num_hashing_cores() const { + return num_coordinators() + (num_hashers() + hashers_per_core - 1) / hashers_per_core; + } + }; + +public: + static int get_physical_cores() { + return std::thread::hardware_concurrency() / 2; + } + + size_t hashers_per_core; + std::set nvme_addrs; + std::map sector_configs; + + // Core numbers + int pc1_reader; + int pc1_writer; + int pc1_orchestrator; + int pc1_qpair_reader; + int pc1_qpair_writer; + int pc1_writer_sleep_time; + int pc1_reader_sleep_time; + int pc2_reader; + int pc2_hasher; + int pc2_hasher_cpu; + int pc2_writer; + int pc2_writer_cores; + int pc2_sleep_time; + int pc2_qpair; + int c1_reader; + int c1_sleep_time; + int c1_qpair; + + topology_t(const char* filename) { + Config cfg; + try { + cfg.readFile(filename); + } catch(const FileIOException &fioex) { + std::cerr << "Could not read config file " << filename << std::endl; + exit(1); + } catch(const ParseException &pex) { + std::cerr << "Parse error at " << pex.getFile() << ":" << pex.getLine() + << " - " << pex.getError() << std::endl; + exit(1); + } + const Setting& root = cfg.getRoot(); + + try { + const Setting& nvme = root["spdk"]["nvme"]; + for (int i = 0; i < nvme.getLength(); i++) { + std::string nvme_id = nvme[i]; + nvme_addrs.insert(nvme_id); + } + + hashers_per_core = (int)root["topology"]["pc1"]["hashers_per_core"]; + if (hashers_per_core != 1 && hashers_per_core != 2) { + printf("hashers_per_core must be 1 or 2, got %ld\n", hashers_per_core); + exit(1); + } + + const Setting& topology_pc1_topos = root["topology"]["pc1"]["sector_configs"]; + + for (int i = 0; i < topology_pc1_topos.getLength(); i++) { + Setting& coord = topology_pc1_topos[i]; + int sectors = coord["sectors"]; + + sector_config_t sector_config; + sector_config.sectors = sectors; + sector_config.hashers_per_core = hashers_per_core; + + std::cout << "sectors " << sectors << std::endl; + Setting& coordinators_cfg = coord["coordinators"]; + for (int j = 0; j < coordinators_cfg.getLength(); j++) { + int core = coordinators_cfg[j]["core"]; + int hashers = coordinators_cfg[j]["hashers"]; + assert ((size_t)hashers <= MAX_HASHERS_PER_COORD); + std::cout << " coord " << core << " hashers " << hashers << std::endl; + sector_config.coordinators.push_back(coordinator_t(hashers_per_core, core, hashers)); + } + sector_configs.insert(std::pair((size_t)sector_config.sectors, sector_config)); + } + + pc1_reader = root["topology"]["pc1"]["reader"]; + pc1_writer = root["topology"]["pc1"]["writer"]; + pc1_orchestrator = root["topology"]["pc1"]["orchestrator"]; + pc1_qpair_reader = root["topology"]["pc1"]["qpair_reader"]; + pc1_qpair_writer = root["topology"]["pc1"]["qpair_writer"]; + pc1_reader_sleep_time = root["topology"]["pc1"]["reader_sleep_time"]; + pc1_writer_sleep_time = root["topology"]["pc1"]["writer_sleep_time"]; + pc2_reader = root["topology"]["pc2"]["reader"]; + pc2_hasher = root["topology"]["pc2"]["hasher"]; + pc2_hasher_cpu = root["topology"]["pc2"]["hasher_cpu"]; + pc2_writer = root["topology"]["pc2"]["writer"]; + pc2_writer_cores = root["topology"]["pc2"]["writer_cores"]; + pc2_sleep_time = root["topology"]["pc2"]["sleep_time"]; + pc2_qpair = root["topology"]["pc2"]["qpair"]; + c1_reader = root["topology"]["c1"]["reader"]; + c1_sleep_time = root["topology"]["c1"]["sleep_time"]; + c1_qpair = root["topology"]["c1"]["qpair"]; + } catch(const SettingNotFoundException &nfex) { + // Ignore. + } + } + + std::set get_allowed_nvme() { + return nvme_addrs; + } + + sector_config_t* get_sector_config(size_t parallel_sectors) { + const auto& it = sector_configs.find(parallel_sectors); + if (it == sector_configs.end()) { + return nullptr; + } + return &it->second; + } + + void print(size_t parallel_sectors) { + sector_config_t* config = get_sector_config(parallel_sectors); + + printf("Num coordinators: %ld\n", config->num_coordinators()); + printf("Num hashers: %ld\n", config->num_hashers()); + printf("Num sectors: %ld\n", config->num_sectors()); + printf("Num hashing cores: %ld\n", config->num_hashing_cores()); + printf("core process0 HT process1\n"); + size_t sector = 0; + for (size_t i = 0; i < config->num_coordinators(); i++) { + printf("%2d coord%-2ld\n", config->coordinators[i].core, i); + for (size_t j = 0; j < config->coordinators[i].num_hashers; j++) { + if (hashers_per_core == 1 || j == config->coordinators[i].num_hashers - 1) { + printf("%2d %2ld,%2ld %2d\n", + config->coordinators[i].get_hasher_core(j), + sector, sector + 1, + config->coordinators[i].get_hasher_core(j) + get_physical_cores()); + sector += 2; + } else { + printf("%2d %2ld,%2ld %2d %2ld,%2ld\n", + config->coordinators[i].get_hasher_core(j), + sector, sector + 1, + config->coordinators[i].get_hasher_core(j + 1), + sector + 2, sector + 3); + j++; + sector += 4; + } + } + } + } +}; + +#endif diff --git a/extern/supraseal/sha/sha_ext_mbx2.S b/extern/supraseal/sha/sha_ext_mbx2.S new file mode 100644 index 000000000..142e83afd --- /dev/null +++ b/extern/supraseal/sha/sha_ext_mbx2.S @@ -0,0 +1,438 @@ +// Copyright Supranational LLC + +.intel_syntax noprefix + +.global sha_ext_mbx2 +.type sha_ext_mbx2, @function + +.SECTION .text + .ALIGN 32 + +.type sha_ext_mbx2, @function +sha_ext_mbx2: +.cfi_startproc + endbr64 + push rbp +.cfi_adjust_cfa_offset 8 +.cfi_offset rbp,-16 + mov rbp, rsp +.cfi_def_cfa_register rbp + push r12 +.cfi_offset r12,-24 + push r13 +.cfi_offset r13,-32 + sub rsp, 64 + and rsp, 0xFFFFFFFFFFFFFFF0 + movdqa xmm1, [FIRST_STATE+rip] + movdqa xmm2, [SECOND_STATE+rip] + movdqa xmm8, xmm1 + movdqa xmm9, xmm2 + lea r10, [CONSTANTS+rip] + mov r13d, 1 + mov r11, rsi + mov rsi, [rsi] + add rsi, 192 + cmp r9, 1 + jz shaloop + add rsi, 64 +shaloop: + movdqa [rsp], xmm1 + movdqa [rsp+0x10], xmm2 + movdqa [rsp+0x20], xmm8 + movdqa [rsp+0x30], xmm9 + mov r12, [r11] + add r12, rcx + movdqu xmm0, [r12] + movdqa xmm3, xmm0 + paddd xmm0, [r10] + sha256rnds2 xmm2, xmm1, xmm0 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + movdqu xmm0, [r12+0x20] + movdqa xmm10, xmm0 + paddd xmm0, [r10] + sha256rnds2 xmm9, xmm8, xmm0 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + movdqu xmm0, [r12+0x10] + movdqa xmm4, xmm0 + paddd xmm0, [r10+0x10] + sha256rnds2 xmm2, xmm1, xmm0 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm3, xmm4 + movdqu xmm0, [r12+0x30] + movdqa xmm11, xmm0 + paddd xmm0, [r10+0x10] + sha256rnds2 xmm9, xmm8, xmm0 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm10, xmm11 + mov r12, [r11+0x8] + add r12, rcx + sub r8, 1 + cmove r12, rsi + movdqu xmm0, [r12] + movdqa xmm5, xmm0 + paddd xmm0, [r10+0x20] + sha256rnds2 xmm2, xmm1, xmm0 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm4, xmm5 + movdqu xmm0, [r12+0x20] + movdqa xmm12, xmm0 + paddd xmm0, [r10+0x20] + sha256rnds2 xmm9, xmm8, xmm0 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm11, xmm12 + movdqu xmm0, [r12+0x10] + movdqa xmm6, xmm0 + paddd xmm0, [r10+0x30] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm6 + palignr xmm7, xmm5, 0x04 + paddd xmm3, xmm7 + sha256msg2 xmm3, xmm6 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm5, xmm6 + movdqu xmm0, [r12+0x30] + movdqa xmm13, xmm0 + paddd xmm0, [r10+0x30] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm13 + palignr xmm14, xmm12, 0x04 + paddd xmm10, xmm14 + sha256msg2 xmm10, xmm13 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm12, xmm13 + movdqa xmm0, xmm3 + paddd xmm0, [r10+0x40] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm3 + palignr xmm7, xmm6, 0x04 + paddd xmm4, xmm7 + sha256msg2 xmm4, xmm3 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm6, xmm3 + movdqa xmm0, xmm10 + paddd xmm0, [r10+0x40] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm10 + palignr xmm14, xmm13, 0x04 + paddd xmm11, xmm14 + sha256msg2 xmm11, xmm10 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm13, xmm10 + movdqa xmm0, xmm4 + paddd xmm0, [r10+0x50] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm4 + palignr xmm7, xmm3, 0x04 + paddd xmm5, xmm7 + sha256msg2 xmm5, xmm4 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm3, xmm4 + movdqa xmm0, xmm11 + paddd xmm0, [r10+0x50] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm11 + palignr xmm14, xmm10, 0x04 + paddd xmm12, xmm14 + sha256msg2 xmm12, xmm11 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm10, xmm11 + movdqa xmm0, xmm5 + paddd xmm0, [r10+0x60] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm5 + palignr xmm7, xmm4, 0x04 + paddd xmm6, xmm7 + sha256msg2 xmm6, xmm5 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm4, xmm5 + movdqa xmm0, xmm12 + paddd xmm0, [r10+0x60] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm12 + palignr xmm14, xmm11, 0x04 + paddd xmm13, xmm14 + sha256msg2 xmm13, xmm12 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm11, xmm12 + movdqa xmm0, xmm6 + paddd xmm0, [r10+0x70] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm6 + palignr xmm7, xmm5, 0x04 + paddd xmm3, xmm7 + sha256msg2 xmm3, xmm6 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm5, xmm6 + movdqa xmm0, xmm13 + paddd xmm0, [r10+0x70] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm13 + palignr xmm14, xmm12, 0x04 + paddd xmm10, xmm14 + sha256msg2 xmm10, xmm13 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm12, xmm13 + movdqa xmm0, xmm3 + paddd xmm0, [r10+0x80] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm3 + palignr xmm7, xmm6, 0x04 + paddd xmm4, xmm7 + sha256msg2 xmm4, xmm3 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm6, xmm3 + movdqa xmm0, xmm10 + paddd xmm0, [r10+0x80] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm10 + palignr xmm14, xmm13, 0x04 + paddd xmm11, xmm14 + sha256msg2 xmm11, xmm10 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm13, xmm10 + movdqa xmm0, xmm4 + paddd xmm0, [r10+0x90] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm4 + palignr xmm7, xmm3, 0x04 + paddd xmm5, xmm7 + sha256msg2 xmm5, xmm4 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm3, xmm4 + movdqa xmm0, xmm11 + paddd xmm0, [r10+0x90] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm11 + palignr xmm14, xmm10, 0x04 + paddd xmm12, xmm14 + sha256msg2 xmm12, xmm11 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm10, xmm11 + movdqa xmm0, xmm5 + paddd xmm0, [r10+0xA0] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm5 + palignr xmm7, xmm4, 0x04 + paddd xmm6, xmm7 + sha256msg2 xmm6, xmm5 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm4, xmm5 + movdqa xmm0, xmm12 + paddd xmm0, [r10+0xA0] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm12 + palignr xmm14, xmm11, 0x04 + paddd xmm13, xmm14 + sha256msg2 xmm13, xmm12 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm11, xmm12 + movdqa xmm0, xmm6 + paddd xmm0, [r10+0xB0] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm6 + palignr xmm7, xmm5, 0x04 + paddd xmm3, xmm7 + sha256msg2 xmm3, xmm6 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm5, xmm6 + movdqa xmm0, xmm13 + paddd xmm0, [r10+0xB0] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm13 + palignr xmm14, xmm12, 0x04 + paddd xmm10, xmm14 + sha256msg2 xmm10, xmm13 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm12, xmm13 + movdqa xmm0, xmm3 + paddd xmm0, [r10+0xC0] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm3 + palignr xmm7, xmm6, 0x04 + paddd xmm4, xmm7 + sha256msg2 xmm4, xmm3 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + sha256msg1 xmm6, xmm3 + movdqa xmm0, xmm10 + paddd xmm0, [r10+0xC0] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm10 + palignr xmm14, xmm13, 0x04 + paddd xmm11, xmm14 + sha256msg2 xmm11, xmm10 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + sha256msg1 xmm13, xmm10 + movdqa xmm0, xmm4 + paddd xmm0, [r10+0xD0] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm4 + palignr xmm7, xmm3, 0x04 + paddd xmm5, xmm7 + sha256msg2 xmm5, xmm4 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + movdqa xmm0, xmm11 + paddd xmm0, [r10+0xD0] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm11 + palignr xmm14, xmm10, 0x04 + paddd xmm12, xmm14 + sha256msg2 xmm12, xmm11 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + movdqa xmm0, xmm5 + paddd xmm0, [r10+0xE0] + sha256rnds2 xmm2, xmm1, xmm0 + movdqa xmm7, xmm5 + palignr xmm7, xmm4, 0x04 + paddd xmm6, xmm7 + sha256msg2 xmm6, xmm5 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + movdqa xmm0, xmm12 + paddd xmm0, [r10+0xE0] + sha256rnds2 xmm9, xmm8, xmm0 + movdqa xmm14, xmm12 + palignr xmm14, xmm11, 0x04 + paddd xmm13, xmm14 + sha256msg2 xmm13, xmm12 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + movdqa xmm0, xmm6 + paddd xmm0, [r10+0xF0] + sha256rnds2 xmm2, xmm1, xmm0 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm1, xmm2, xmm0 + movdqa xmm0, xmm13 + paddd xmm0, [r10+0xF0] + sha256rnds2 xmm9, xmm8, xmm0 + pshufd xmm0, xmm0, 0x0E + sha256rnds2 xmm8, xmm9, xmm0 + paddd xmm1, [rsp] + paddd xmm2, [rsp+0x10] + paddd xmm8, [rsp+0x20] + paddd xmm9, [rsp+0x30] + add r11, 16 + cmp r8, 0 + jz done + sub r13, 1 + jne shaloop + mov r13, r9 + mov r11, rdx + jmp shaloop + +done: + movdqa xmm3, xmm1 + movdqa xmm4, xmm8 + punpckhdq xmm1, xmm2 + punpckhdq xmm8, xmm9 + punpckldq xmm2, xmm3 + punpckldq xmm9, xmm4 + pshufd xmm1, xmm1, 0x72 + pshufd xmm2, xmm2, 0x27 + pshufd xmm8, xmm8, 0x72 + pshufd xmm9, xmm9, 0x27 + movdqa xmm15, [STATE_MASK+rip] + pand xmm2, xmm15 + pand xmm9, xmm15 + movdqu [rdi], xmm1 + movdqu [rdi+0x10], xmm2 + movdqu [rdi+0x20], xmm8 + movdqu [rdi+0x30], xmm9 + lea rsp, [rbp-16] + pop r13 + pop r12 +.cfi_restore r13 +.cfi_restore r12 +.cfi_def_cfa_register rsp + pop rbp +.cfi_adjust_cfa_offset -8 +.cfi_restore rbp + ret +.cfi_endproc +.size sha_ext_mbx2, . - sha_ext_mbx2 + +.SECTION .rodata + +.ALIGN 16 +FIRST_STATE: + .quad 0x510E527F9B05688C + .quad 0x6A09E667BB67AE85 + +SECOND_STATE: + .quad 0x1F83D9AB5BE0CD19 + .quad 0x3C6EF372A54FF53A + +STATE_MASK: + .quad 0xFFFFFFFFFFFFFFFF + .quad 0xFFFFFF3FFFFFFFFF + +CONSTANTS: + .byte 0x98, 0x2F, 0x8A, 0x42, 0x91, 0x44, 0x37, 0x71 + .byte 0xCF, 0xFB, 0xC0, 0xB5, 0xA5, 0xDB, 0xB5, 0xE9 + .byte 0x5B, 0xC2, 0x56, 0x39, 0xF1, 0x11, 0xF1, 0x59 + .byte 0xA4, 0x82, 0x3F, 0x92, 0xD5, 0x5E, 0x1C, 0xAB + .byte 0x98, 0xAA, 0x07, 0xD8, 0x01, 0x5B, 0x83, 0x12 + .byte 0xBE, 0x85, 0x31, 0x24, 0xC3, 0x7D, 0x0C, 0x55 + .byte 0x74, 0x5D, 0xBE, 0x72, 0xFE, 0xB1, 0xDE, 0x80 + .byte 0xA7, 0x06, 0xDC, 0x9B, 0x74, 0xF1, 0x9B, 0xC1 + .byte 0xC1, 0x69, 0x9B, 0xE4, 0x86, 0x47, 0xBE, 0xEF + .byte 0xC6, 0x9D, 0xC1, 0x0F, 0xCC, 0xA1, 0x0C, 0x24 + .byte 0x6F, 0x2C, 0xE9, 0x2D, 0xAA, 0x84, 0x74, 0x4A + .byte 0xDC, 0xA9, 0xB0, 0x5C, 0xDA, 0x88, 0xF9, 0x76 + .byte 0x52, 0x51, 0x3E, 0x98, 0x6D, 0xC6, 0x31, 0xA8 + .byte 0xC8, 0x27, 0x03, 0xB0, 0xC7, 0x7F, 0x59, 0xBF + .byte 0xF3, 0x0B, 0xE0, 0xC6, 0x47, 0x91, 0xA7, 0xD5 + .byte 0x51, 0x63, 0xCA, 0x06, 0x67, 0x29, 0x29, 0x14 + .byte 0x85, 0x0A, 0xB7, 0x27, 0x38, 0x21, 0x1B, 0x2E + .byte 0xFC, 0x6D, 0x2C, 0x4D, 0x13, 0x0D, 0x38, 0x53 + .byte 0x54, 0x73, 0x0A, 0x65, 0xBB, 0x0A, 0x6A, 0x76 + .byte 0x2E, 0xC9, 0xC2, 0x81, 0x85, 0x2C, 0x72, 0x92 + .byte 0xA1, 0xE8, 0xBF, 0xA2, 0x4B, 0x66, 0x1A, 0xA8 + .byte 0x70, 0x8B, 0x4B, 0xC2, 0xA3, 0x51, 0x6C, 0xC7 + .byte 0x19, 0xE8, 0x92, 0xD1, 0x24, 0x06, 0x99, 0xD6 + .byte 0x85, 0x35, 0x0E, 0xF4, 0x70, 0xA0, 0x6A, 0x10 + .byte 0x16, 0xC1, 0xA4, 0x19, 0x08, 0x6C, 0x37, 0x1E + .byte 0x4C, 0x77, 0x48, 0x27, 0xB5, 0xBC, 0xB0, 0x34 + .byte 0xB3, 0x0C, 0x1C, 0x39, 0x4A, 0xAA, 0xD8, 0x4E + .byte 0x4F, 0xCA, 0x9C, 0x5B, 0xF3, 0x6F, 0x2E, 0x68 + .byte 0xEE, 0x82, 0x8F, 0x74, 0x6F, 0x63, 0xA5, 0x78 + .byte 0x14, 0x78, 0xC8, 0x84, 0x08, 0x02, 0xC7, 0x8C + .byte 0xFA, 0xFF, 0xBE, 0x90, 0xEB, 0x6C, 0x50, 0xA4 + .byte 0xF7, 0xA3, 0xF9, 0xBE, 0xF2, 0x78, 0x71, 0xC6 + +.section .note.GNU-stack,"",@progbits +.section .note.gnu.property,"a",@note + .long 4,2f-1f,5 + .byte 0x47,0x4E,0x55,0 +1: .long 0xc0000002,4,3 +.align 8 +2: + +.att_syntax prefix diff --git a/extern/supraseal/sha/sha_functions.hpp b/extern/supraseal/sha/sha_functions.hpp new file mode 100644 index 000000000..8e52439b1 --- /dev/null +++ b/extern/supraseal/sha/sha_functions.hpp @@ -0,0 +1,39 @@ +// Copyright Supranational LLC + +#ifndef __SHA_FUNCTIONS_HPP__ +#define __SHA_FUNCTIONS_HPP__ + +#include // uint* + +#if (defined(__x86_64__) || defined(__x86_64) || defined(_M_X64)) && \ + defined(__SHA__) +# define blst_sha256_block blst_sha256_block_data_order_shaext +#elif defined(__aarch64__) && defined(__ARM_FEATURE_CRYPTO) +# define blst_sha256_block blst_sha256_block_armv8 +#else +# define blst_sha256_block blst_sha256_block_data_order +#endif + +const uint32_t SHA256_INITIAL_DIGEST[8] = { + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 +}; + +// Modified to be in ABEF, CDGH format for SHA Extensions +const uint32_t SHA256_INITIAL_DIGEST_MB[16] = { + 0x9b05688c, 0x510e527f, 0xbb67ae85, 0x6a09e667, + 0x5be0cd19, 0x1f83d9ab, 0xa54ff53a, 0x3c6ef372, + 0x9b05688c, 0x510e527f, 0xbb67ae85, 0x6a09e667, + 0x5be0cd19, 0x1f83d9ab, 0xa54ff53a, 0x3c6ef372 +}; + +extern "C" { + void blst_sha256_block(uint32_t* h, const void* in, size_t blocks); + void blst_sha256_emit(uint8_t* md, const uint32_t* h); + + void sha_ext_mbx2(uint32_t* digest, uint32_t** replica_id_buf, + uint32_t** data_buf, size_t offset, + size_t blocks, size_t repeat); +} + +#endif // __SHA_FUNCTIONS_HPP__ diff --git a/extern/supraseal/test.sh b/extern/supraseal/test.sh new file mode 100755 index 000000000..b0ef78672 --- /dev/null +++ b/extern/supraseal/test.sh @@ -0,0 +1,126 @@ +#!/bin/bash + +# Copyright Supranational LLC + +set -e +set -x + +# Cache files, assumed to be CC for the tests below. To generate, edit +# fil-proofs-tooling/src/bin/benchy/window_post.rs in rust-fil-proofs. Around +# line 129, change +# .map(|_| rand::random::()) +# to +# .map(|_| 0x0) +# +# env RUST_LOG=trace FIL_PROOFS_USE_MULTICORE_SDR=1 FIL_PROOFS_USE_GPU_COLUMN_BUILDER=1 FIL_PROOFS_USE_GPU_TREE_BUILDER=1 BELLMAN_CUSTOM_GPU="NVIDIA GeForce RTX 3090:10496" ./target/release/benchy window-post --size 512MiB --cache ./cache_benchy_run_512MiB 2>&1 | tee run_512m_0.log +# env RUST_LOG=trace FIL_PROOFS_USE_MULTICORE_SDR=1 FIL_PROOFS_USE_GPU_COLUMN_BUILDER=1 FIL_PROOFS_USE_GPU_TREE_BUILDER=1 BELLMAN_CUSTOM_GPU="NVIDIA GeForce RTX 3090:10496" ./target/release/benchy window-post --size 32GiB --cache ./cache_benchy_run_32GiB 2>&1 | tee run_32g_0.log + +cache_16KiB="../cache_benchy_run_16KiB" +cache_512MiB="../cache_benchy_run_512MiB" +cache_32GiB="../cache_benchy_run_32GiB" + +# This cache is random data, not cc +cache_512MiB_rand="../cache_benchy_run_512MiB_rand" + +test_dir="/var/tmp/supra_seal/test_tmp" + +rm -fr $test_dir/* + +./build.sh -r + +echo "************************************************************" +echo "* 16KiB pc2" +echo "************************************************************" + +mkdir -p $test_dir/pc2_test_16KiB +./bin/pc2 -i $cache_16KiB -o $test_dir/pc2_test_16KiB -b 16KiB +cmp $cache_16KiB/p_aux $test_dir/pc2_test_16KiB/p_aux +cmp $cache_16KiB/sc-02-data-tree-c-0.dat $test_dir/pc2_test_16KiB/sc-02-data-tree-c-0.dat +cmp $cache_16KiB/sc-02-data-tree-c-7.dat $test_dir/pc2_test_16KiB/sc-02-data-tree-c-7.dat +cmp $cache_16KiB/sc-02-data-tree-r-last-0.dat $test_dir/pc2_test_16KiB/sc-02-data-tree-r-last-0.dat +cmp $cache_16KiB/sc-02-data-tree-r-last-1.dat $test_dir/pc2_test_16KiB/sc-02-data-tree-r-last-1.dat + +echo "************************************************************" +echo "* 512MiB pc2" +echo "************************************************************" + +mkdir -p $test_dir/pc2_test_512MiB +./bin/pc2 -i $cache_512MiB -o $test_dir/pc2_test_512MiB -b 512MiB +cmp $cache_512MiB/p_aux $test_dir/pc2_test_512MiB/p_aux +cmp $cache_512MiB/sc-02-data-tree-c.dat $test_dir/pc2_test_512MiB/sc-02-data-tree-c.dat +cmp $cache_512MiB/sc-02-data-tree-r-last.dat $test_dir/pc2_test_512MiB/sc-02-data-tree-r-last.dat +cmp $cache_512MiB/sealed-file $test_dir/pc2_test_512MiB/sealed-file + +echo "************************************************************" +echo "* 512MiB tree-r random" +echo "************************************************************" + +mkdir -p $test_dir/tree-r_test_512MiB_rand +./bin/tree_r -l $cache_512MiB_rand/sc-02-data-layer-2.dat -d $cache_512MiB_rand/staged-file -o $test_dir/tree-r_test_512MiB_rand -b 512MiB +# Only the root of r is written to p_aux +cmp -i 32 ../cache_benchy_run_512MiB_rand/p_aux /var/tmp/supraseal/test_tmp/tree-r_test_512MiB_rand/p_aux +cmp $cache_512MiB_rand/sc-02-data-tree-r-last.dat $test_dir/tree-r_test_512MiB_rand/sc-02-data-tree-r-last.dat +cmp $cache_512MiB_rand/sealed-file $test_dir/tree-r_test_512MiB_rand/sealed-file + +echo "************************************************************" +echo "* 512MiB tree-r-cpu random" +echo "************************************************************" + +mkdir -p $test_dir/tree-r-cpu_test_512MiB_rand +./bin/tree_r_cpu -l $cache_512MiB_rand/sc-02-data-layer-2.dat -d $cache_512MiB_rand/staged-file -o $test_dir/tree-r-cpu_test_512MiB_rand -b 512MiB +cmp $cache_512MiB_rand/sc-02-data-tree-r-last.dat $test_dir/tree-r-cpu_test_512MiB_rand/sc-02-data-tree-r-last.dat +cmp $cache_512MiB_rand/sealed-file $test_dir/tree-r-cpu_test_512MiB_rand/sealed-file + + +echo "************************************************************" +echo "* 32GiB pc2" +echo "************************************************************" + +mkdir -p $test_dir/pc2_test_32GiB +./bin/pc2 -i $cache_32GiB -o $test_dir/pc2_test_32GiB -b 32GiB +cmp $cache_32GiB/p_aux $test_dir/pc2_test_32GiB/p_aux +cmp $cache_32GiB/sc-02-data-tree-c-0.dat $test_dir/pc2_test_32GiB/sc-02-data-tree-c-0.dat +cmp $cache_32GiB/sc-02-data-tree-c-7.dat $test_dir/pc2_test_32GiB/sc-02-data-tree-c-7.dat +cmp $cache_32GiB/sc-02-data-tree-r-last-0.dat $test_dir/pc2_test_32GiB/sc-02-data-tree-r-last-0.dat +cmp $cache_32GiB/sc-02-data-tree-r-last-1.dat $test_dir/pc2_test_32GiB/sc-02-data-tree-r-last-1.dat +cmp $cache_32GiB/sealed-file $test_dir/pc2_test_32GiB/sealed-file + +echo "************************************************************" +echo "* 32GiB tree-r CC" +echo "************************************************************" + +mkdir -p $test_dir/tree-r_test_32GiB +./bin/tree_r -l $cache_32GiB/sc-02-data-layer-11.dat -d $cache_32GiB/staged-file -o $test_dir/tree-r_test_32GiB -b 32GiB +# Only the root of r is written to p_aux +cmp -i 32 ../cache_benchy_run_32GiB/p_aux /var/tmp/supraseal/test_tmp/tree-r_test_32GiB/p_aux +cmp $cache_32GiB/sc-02-data-tree-r-last-0.dat $test_dir/tree-r_test_32GiB/sc-02-data-tree-r-last-0.dat +cmp $cache_32GiB/sc-02-data-tree-r-last-1.dat $test_dir/tree-r_test_32GiB/sc-02-data-tree-r-last-1.dat + +echo "************************************************************" +echo "* 32GiB tree-r-cpu" +echo "************************************************************" + +mkdir -p $test_dir/tree-r-cpu_test_32GiB +./bin/tree_r_cpu -l $cache_32GiB/sc-02-data-layer-11.dat -d $cache_32GiB/staged-file -o $test_dir/tree-r-cpu_test_32GiB -b 32GiB +cmp $cache_32GiB/sc-02-data-tree-r-last-0.dat $test_dir/tree-r-cpu_test_32GiB/sc-02-data-tree-r-last-0.dat +cmp $cache_32GiB/sc-02-data-tree-r-last-1.dat $test_dir/tree-r-cpu_test_32GiB/sc-02-data-tree-r-last-1.dat +cmp $cache_32GiB/sc-02-data-tree-r-last-2.dat $test_dir/tree-r-cpu_test_32GiB/sc-02-data-tree-r-last-2.dat +cmp $cache_32GiB/sc-02-data-tree-r-last-3.dat $test_dir/tree-r-cpu_test_32GiB/sc-02-data-tree-r-last-3.dat +cmp $cache_32GiB/sc-02-data-tree-r-last-4.dat $test_dir/tree-r-cpu_test_32GiB/sc-02-data-tree-r-last-4.dat +cmp $cache_32GiB/sc-02-data-tree-r-last-5.dat $test_dir/tree-r-cpu_test_32GiB/sc-02-data-tree-r-last-5.dat +cmp $cache_32GiB/sc-02-data-tree-r-last-6.dat $test_dir/tree-r-cpu_test_32GiB/sc-02-data-tree-r-last-6.dat +cmp $cache_32GiB/sc-02-data-tree-r-last-7.dat $test_dir/tree-r-cpu_test_32GiB/sc-02-data-tree-r-last-7.dat + +echo "************************************************************" +echo "* 32GiB c2" +echo "************************************************************" + +cd demos/c2-test +cargo test --release --test c2 -- --nocapture + +# echo "************************************************************" +# echo "* 512MiB sealing pipeline" +# echo "************************************************************" + +# ./exec.sh 512MiB + diff --git a/extern/supraseal/tools/pc2.cu b/extern/supraseal/tools/pc2.cu new file mode 100644 index 000000000..8490197a6 --- /dev/null +++ b/extern/supraseal/tools/pc2.cu @@ -0,0 +1,175 @@ +// g++ -g -Wall -Wextra -Werror -Wno-subobject-linkage -march=native -O3 src/tools/tree_r.cpp -Isrc/poseidon -Ideps/sppark -Ideps/blst/src -L deps/blst -lblst + +// Only supports constant arity 8 throughout the tree (2KB, 32G, etc); +// +// arguments +// last_layer_filename +// optional arguments +// data_filename - This indicates whether or not we have a CC sector + +#include // uint* +#include // mapping +#include // file stats +#include // assertions +#include // log2 +#include // file open +#include // file close +#include // printing +#include // printing +#include // time + +#include "../pc2/cuda/pc2.cu" + +#ifndef __CUDA_ARCH__ +#include "../pc1/tree_r.hpp" +#include "../pc1/tree_c.hpp" +#include "../util/debug_helpers.cpp" +#include "../sealing/sector_parameters.hpp" +#include "../util/sector_util.cpp" + +void usage(char* argv[]) { + std::cout << "If no staged data file, CC is assumed" << std::endl; + std::cout << "Usage: " << argv[0] << " [OPTIONS]" << std::endl; + std::cout << "-h Print help message" << std::endl; + std::cout << "-d Staged data file" << std::endl; + std::cout << "-i Input cache directory" << std::endl; + std::cout << "-o Output directory" << std::endl; + std::cout << "-c Parallel number of cores" << std::endl; + std::cout << "-b Sector size e.g 32GiB" << std::endl; + exit(0); +} + +template +void gpu_single_pc2(std::string config_filename, + std::string cache_dir, + std::string data_filename, + std::string output_dir) { + topology_t topology(config_filename.c_str()); + set_core_affinity(topology.pc2_hasher); + + size_t sector_size = P::GetSectorSize(); + + // Construct the layer filenames + std::vector layer_filenames; + const size_t MAX = 256; + char fname[MAX]; + const char* layer_filename_template = "%s/sc-02-data-layer-%d.dat"; + for (size_t i = 0; i < P::GetNumLayers(); i++) { + snprintf(fname, MAX, layer_filename_template, cache_dir.c_str(), i + 1); + layer_filenames.push_back(fname); + } + + // Total number of streams across all GPUs + // Use less streams if sector size is <= 16MiB + size_t stream_count = P::GetSectorSizeLg() <= 24 ? 8 : 64; + + // Batch size in nodes. Each node includes all parallel sectors + // Reduce batch size if sector size is <= 16MiB + size_t batch_size = P::GetSectorSizeLg() <= 24 ? 64 * 8 : 64 * 64 * 8; + + // Nodes to read per partition + size_t nodes_to_read = P::GetNumNodes() / P::GetNumTreeRCFiles(); + + streaming_node_reader_t> node_reader(P::GetSectorSize(), layer_filenames); + + // Allocate storage for 2x the streams to support tree-c and tree-r + node_reader.alloc_slots(stream_count * 2, P::GetNumLayers() * batch_size, true); + + bool tree_r_only = false; + const char* data_filenames[1]; + if (!data_filename.empty()) { + data_filenames[0] = data_filename.c_str(); + } else { + data_filenames[0] = nullptr; + } + pc2_hash>( + topology, tree_r_only, node_reader, nodes_to_read, batch_size, + stream_count,data_filenames, output_dir.c_str()); +} + +template +void cpu_single_pc2(std::string config_filename, + std::string cache_dir, + std::string data_filename, + std::string output_dir, + std::string last_layer_filename) { + + + mmap_t p_aux_file; + p_aux_file.mmap_write(output_dir + "/p_aux", 2 * sizeof(node_t), true); + TreeC

tree_c; + p_aux_file[0] = tree_c.BuildTreeC(cache_dir, output_dir); + TreeR

tree_r; + p_aux_file[1] = tree_r.BuildTreeR(last_layer_filename, data_filename, + output_dir); +} + +int main(int argc, char* argv[]) { + int opt = 0; + std::string data_filename = ""; + std::string cache_dir = ""; + std::string out_dir = "."; + std::string sector_size_string = ""; + std::string config_filename = "demos/rust/supra_seal.cfg"; + + while ((opt = getopt(argc, argv, "c:i:d:o:b:h")) != -1) { + switch(opt) { + case 'c': + std::cout << "config file " << optarg << std::endl; + config_filename = optarg; + break; + case 'i': + std::cout << "input cache_dir " << optarg << std::endl; + cache_dir = optarg; + break; + case 'd': + std::cout << "data_filename input " << optarg << std::endl; + data_filename = optarg; + break; + case 'o': + std::cout << "out_dir " << optarg << std::endl; + out_dir = optarg; + break; + case 'b': + std::cout << "sector_size " << optarg << std::endl; + sector_size_string = optarg; + break; + case 'h': + case ':': + case '?': + usage(argv); + break; + } + } + + if (sector_size_string == "") { + printf("Please specify a sector size\n"); + exit(0); + } + + if (cache_dir.empty()) { + printf("-c must be specified\n"); + usage(argv); + } + + size_t sector_size = get_sector_size_from_string(sector_size_string); + +#ifdef __NVCC__ + // Do PC2 on the GPU if sector size is > 32KiB + SECTOR_PARAMS_TABLE( \ + if (ngpus() && params.GetSectorSizeLg() > 15) { \ + gpu_single_pc2(config_filename, cache_dir, data_filename, out_dir); \ + \ + return 0; \ + } \ + ); +#endif + std::string last_layer_filename = cache_dir + std::string("/sc-02-data-layer-2.dat"); + SECTOR_PARAMS_TABLE( \ + cpu_single_pc2(config_filename, cache_dir, data_filename, \ + out_dir, last_layer_filename); \ + ); + + return 0; +} +#endif diff --git a/extern/supraseal/tools/tree_d.cpp b/extern/supraseal/tools/tree_d.cpp new file mode 100644 index 000000000..bb2da1f0c --- /dev/null +++ b/extern/supraseal/tools/tree_d.cpp @@ -0,0 +1,88 @@ +// Copyright Supranational LLC + +#ifndef __TREE_D_HPP__ +#define __TREE_D_HPP__ + +#include + +#include "tree_d.hpp" +#include "../sealing/constants.hpp" +#include "../sealing/sector_parameters.hpp" +#include "../util/sector_util.cpp" + +// g++ -g -Wall -Wextra -Werror -DRUNTIME_SECTOR_SIZE -march=native -O3 -I../pc1 tree_d.cpp -L../deps/blst -lblst + +int main(int argc, char* argv[]) { + int opt = 0; + bool copy = true; + std::string sector_size_string = ""; + + std::string tree_d_filename = "./sc-02-data-tree-d.dat"; + std::string data_filename = ""; + + while ((opt = getopt(argc, argv, "t:d:s:ph")) != -1) { + switch(opt) { + case 't': + std::cout << "tree_d_filename input " << optarg << std::endl; + tree_d_filename = optarg; + break; + case 'd': + std::cout << "data_filename input " << optarg << std::endl; + data_filename = optarg; + break; + case 'p': + std::cout << "Copy flag is set" << std::endl; + copy = false; + break; + case 's': + std::cout << "sector_size input " << optarg << std::endl; + sector_size_string = optarg; + break; + case 'h': + case ':': + case '?': + std::cout << "Sealing Client" << std::endl; + std::cout << "If no staged data file, CC is assumed" << std::endl; + std::cout << "Usage: " << argv[0] << " [OPTIONS]" << std::endl; + std::cout << "-h Print help message" << std::endl; + std::cout << "-t Tree D output file" << std::endl; + std::cout << "-d Staged data file" << std::endl; + std::cout << "-s Sector Size (2KiB, 32GiB, etc) " << std::endl; + std::cout << "-p Don't copy data into tree leaves" << std::endl; + break; + } + } + + if (sector_size_string.empty()) { + std::cout << "Please specify a sector size" << std::endl; + exit(1); + } + + size_t sector_size = get_sector_size_from_string(sector_size_string); + + node_t comm_d; + + auto start = std::chrono::high_resolution_clock::now(); + + SECTOR_PARAMS_TABLE( \ + TreeD tree_d(params, copy); \ + \ + if (!data_filename.empty()) { \ + tree_d.BuildTree(&comm_d, tree_d_filename, data_filename); \ + } else { \ + tree_d.BuildCCTree(&comm_d, tree_d_filename); \ + } \ + \ + std::cout << std::endl << "comm_d "; \ + tree_d.print_digest_hex(&comm_d); \ + ); + + auto cur = std::chrono::high_resolution_clock::now(); + auto duration = + std::chrono::duration_cast(cur - start).count(); + start = cur; + std::cout << "Tree D generation took " << duration << "ms" << std::endl; + + return 0; +} +#endif diff --git a/extern/supraseal/tools/tree_r.cpp b/extern/supraseal/tools/tree_r.cpp new file mode 100644 index 000000000..219cb17b6 --- /dev/null +++ b/extern/supraseal/tools/tree_r.cpp @@ -0,0 +1,150 @@ +// g++ -g -Wall -Wextra -Werror -Wno-subobject-linkage -march=native -O3 src/tools/tree_r.cpp -Isrc/poseidon -Ideps/sppark -Ideps/blst/src -L deps/blst -lblst + +// Only supports constant arity 8 throughout the tree (2KB, 32G, etc); +// +// arguments +// last_layer_filename +// optional arguments +// data_filename - This indicates whether or not we have a CC sector + +#include // uint* +#include // mapping +#include // file stats +#include // assertions +#include // log2 +#include // file open +#include // file close +#include // printing +#include // printing +#include // time + +#ifdef __NVCC__ +// Enable GPU tree-r building +#include "../pc2/cuda/pc2.cu" +#else +// CPU only +#include +#endif +#ifndef __CUDA_ARCH__ +#include "../pc1/tree_r.hpp" +#include "../util/debug_helpers.cpp" +#include "../sealing/sector_parameters.hpp" +#include "../util/sector_util.cpp" + +void usage(char* argv[]) { + std::cout << "If no staged data file, CC is assumed" << std::endl; + std::cout << "Usage: " << argv[0] << " [OPTIONS]" << std::endl; + std::cout << "-h Print help message" << std::endl; + std::cout << "-c Parallel number of cores" << std::endl; + std::cout << "-l Last layer file" << std::endl; + std::cout << "-d Staged data file" << std::endl; + std::cout << "-o Output directory" << std::endl; + std::cout << "-b Sector size e.g 32GiB" << std::endl; + exit(0); +} + +#ifdef __NVCC__ +template +void gpu_tree_r(std::string config_filename, + std::string last_layer_filename, + std::string data_filename, + std::string output_dir) { + topology_t topology(config_filename.c_str()); + set_core_affinity(topology.pc2_hasher); + + // Total number of streams across all GPUs + // Use less streams if sector size is <= 16MiB + size_t stream_count = P::GetSectorSizeLg() <= 24 ? 8 : 64; + + // Batch size in nodes. Each node includes all parallel sectors + // Reduce batch size if sector size is <= 16MiB + size_t batch_size = P::GetSectorSizeLg() <= 24 ? 64 * 8 : 64 * 64; + + // Nodes to read per partition + size_t nodes_to_read = P::GetNumNodes() / P::GetNumTreeRCFiles(); + + std::vector layer_filenames; + layer_filenames.push_back(last_layer_filename); + streaming_node_reader_t> node_reader(P::GetSectorSize(), layer_filenames); + + // Allocate storage for 2x the streams to support tree-c and tree-r + node_reader.alloc_slots(stream_count * 2, P::GetNumLayers() * batch_size, true); + + bool tree_r_only = true; + const char* data_filenames[1]; + if (!data_filename.empty()) { + data_filenames[0] = data_filename.c_str(); + } else { + data_filenames[0] = nullptr; + } + pc2_hash>(topology, tree_r_only, node_reader, + nodes_to_read, batch_size, stream_count, + data_filenames, output_dir.c_str()); +} +#endif + +int main(int argc, char* argv[]) { + int opt = 0; + std::string last_layer_filename = ""; + std::string data_filename = ""; + std::string out_dir = ""; + int cores = 0; + std::string sector_size_string = ""; + std::string config_filename = "demos/rust/supra_seal.cfg"; + + while ((opt = getopt(argc, argv, "l:d:o:c:b:h")) != -1) { + switch(opt) { + case 'c': + std::cout << "number of cores input " << optarg << std::endl; + cores = atoi(optarg); + break; + case 'l': + std::cout << "last_layer_filename input " << optarg << std::endl; + last_layer_filename = optarg; + break; + case 'd': + std::cout << "data_filename input " << optarg << std::endl; + data_filename = optarg; + break; + case 'o': + std::cout << "out_dir " << optarg << std::endl; + out_dir = optarg; + break; + case 'b': + std::cout << "sector_size " << optarg << std::endl; + sector_size_string = optarg; + break; + case 'h': + case ':': + case '?': + usage(argv); + break; + } + } + + size_t sector_size = get_sector_size_from_string(sector_size_string); + + if (last_layer_filename.empty()) { + printf("-l must be specified\n"); + usage(argv); + } + +#ifdef __NVCC__ + // Do PC2 on the GPU if sector size is > 32KiB + SECTOR_PARAMS_TABLE( \ + if (ngpus() && params.GetSectorSizeLg() > 15) { \ + gpu_tree_r(config_filename, last_layer_filename, \ + data_filename, out_dir); \ + \ + return 0; \ + } \ + ); +#endif + SECTOR_PARAMS_TABLE( \ + TreeR tree_r; \ + tree_r.BuildTreeR(last_layer_filename, data_filename, out_dir, cores); \ + ); + + return 0; +} +#endif diff --git a/extern/supraseal/util/debug_helpers.cpp b/extern/supraseal/util/debug_helpers.cpp new file mode 100644 index 000000000..4977ff600 --- /dev/null +++ b/extern/supraseal/util/debug_helpers.cpp @@ -0,0 +1,163 @@ +// Copyright Supranational LLC + +#include // uint* +#include // printing +#include // printing +#include // x86 intrinsics +#include +#include // htons +#include "debug_helpers.hpp" // header + +template +void print_parameters() { + std::cout << "Sealing Parameters" << std::endl; + std::cout << "SECTOR_SIZE "<< C::GetSectorSize() << std::endl; + std::cout << "SECTOR_SIZE_LG "<< C::GetSectorSizeLg() << std::endl; + std::cout << "NODE_SIZE "<< NODE_SIZE << std::endl; + std::cout << "NODE_WORDS "<< NODE_WORDS << std::endl; + std::cout << "NODE_COUNT "<< C::GetNumNodes() << std::endl; + std::cout << "PARENT_COUNT_BASE "<< PARENT_COUNT_BASE << std::endl; + std::cout << "PARENT_COUNT_EXP "<< PARENT_COUNT_EXP << std::endl; + std::cout << "PARENT_COUNT "<< PARENT_COUNT << std::endl; + std::cout << "PARENT_SIZE "<< PARENT_SIZE << std::endl; + std::cout << "LAYER_COUNT "<< C::GetNumLayers() << std::endl; + std::cout << "NODES_PER_HASHER "<< NODES_PER_HASHER << std::endl; + std::cout << "PARENT_BUFFER_NODES "<< PARENT_BUFFER_NODES << std::endl; + std::cout << "NODE_BUFFER_NODES "<< NODE_BUFFER_NODES << std::endl; + std::cout << std::endl; +} + +void print_digest(uint32_t* digest) { + for (int i = 0; i < 8; ++i) { + std::cout << std::hex << std::setfill('0') << std::setw(8) + << digest[i] << " "; + } + std::cout << std::endl; +} + +void print_buffer(uint8_t* buf, size_t bytes) { + for (size_t i = 0; i < bytes; ++i) { + std::cout << std::hex << std::setfill('0') << std::setw(2) + << (uint32_t) buf[i] << " "; + } + std::cout << std::endl; +} + +void print_buffer_dec(uint8_t* buf, size_t bytes) { + for (size_t i = 0; i < bytes; ++i) { + std::cout << std::dec << std::setw(3) + << (uint32_t) buf[i] << " "; + } + std::cout << std::endl; +} + +template void Log256(const __m256i & value) { + const size_t n = sizeof(__m256i) / sizeof(T); + T buffer[n]; + _mm256_storeu_si256((__m256i*)buffer, value); + for (size_t i = 0; i < n; i++) + if (sizeof(T) == 1) { + std::cout << std::setw(sizeof(T)*2) << std::setfill('0') + << std::hex << (uint32_t) buffer[i] << " "; + } else { + std::cout << std::setw(sizeof(T)*2) << std::setfill('0') + << std::hex << buffer[i] << " "; + } + std::cout << std::endl; +} + +template void Log128(const __m128i & value) { + const size_t n = sizeof(__m128i) / sizeof(T); + T buffer[n]; + _mm_storeu_si128((__m128i*)buffer, value); + for (size_t i = 0; i < n; i++) + std::cout << std::setw(8) << std::setfill('0') + << std::hex << buffer[i] << " "; + //std::cout << std::endl; +} + +void print_digest_reorder(uint32_t* digest) { + static const uint32_t BYTE_SHUFFLE_MASK[8] = { + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f, + 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f + }; + + __m128i mask = _mm_loadu_si128((__m128i*)&(BYTE_SHUFFLE_MASK[0])); + __m128i d0 = _mm_loadu_si128((__m128i*)&(digest[0])); // ABEF + __m128i d1 = _mm_loadu_si128((__m128i*)&(digest[4])); // CDGH + + d0 = _mm_shuffle_epi8(d0, mask); // Change endianess + d1 = _mm_shuffle_epi8(d1, mask); // Change endianess + + Log128(d0); + Log128(d1); + std::cout << std::endl; +} + +void print_single_node(uint64_t* n, const char *prefix, bool reverse) { + if (reverse) { + uint16_t *n16 = (uint16_t*)n; + printf("%s", prefix == NULL ? "" : prefix); + for (size_t i = 0; i < 16; i++) { + printf("%04x ", htons(n16[i ^ 1])); + } + } else { + printf("%s%016lx %016lx %016lx %016lx", + prefix == NULL ? "" : prefix, + n[0], n[1], n[2], n[3]); + } +} + +void _print_buf(uint8_t *buf, size_t lines, const char *prefix, + bool reverse, size_t words_per_page) { + if (lines == 0) { + for (unsigned node = 0; node < words_per_page; node++) { + uint64_t *p = (uint64_t *)(buf + node * NODE_SIZE); + print_single_node(p, prefix, reverse); + printf("\n"); + } + } else { + // First 'lines' nodes... + for (size_t node = 0; node < std::min(words_per_page, lines); node++) { + uint64_t *p = (uint64_t *)(buf + node * NODE_SIZE); + print_single_node(p, prefix, reverse); + if (node == std::min(words_per_page, lines) - 1) { + printf(" ... %p\n", buf); + } else { + printf("\n"); + } + } + // last 'lines' nodes... + unsigned start; + if (lines > words_per_page) { + start = 0; + } else { + start = words_per_page - lines; + } + for (size_t node = start; node < words_per_page; node++) { + uint64_t *p = (uint64_t *)(buf + node * NODE_SIZE); + print_single_node(p, prefix, reverse); + printf("\n"); + } + } +} + +template +void print_node(parallel_node_t *buf, size_t lines, const char *prefix, bool reverse) { + _print_buf((uint8_t*)buf, lines, prefix, reverse, C::PARALLEL_SECTORS); +} + +void print_buf(uint8_t *buf, size_t lines, const char *prefix) { + _print_buf(buf, lines, prefix, false, PAGE_SIZE / NODE_SIZE); +} + +void print_parents_graph(uint32_t* parents) { + const size_t count = 128; + for (size_t i = 0; i < count; i++) { + printf("Node %2ld: ", i); + for (size_t j = 0; j < PARENT_COUNT; j++) { + printf("%08x ", parents[i * PARENT_COUNT + j]); + } + printf("\n"); + } +} diff --git a/extern/supraseal/util/debug_helpers.hpp b/extern/supraseal/util/debug_helpers.hpp new file mode 100644 index 000000000..8f145df50 --- /dev/null +++ b/extern/supraseal/util/debug_helpers.hpp @@ -0,0 +1,30 @@ +// Copyright Supranational LLC + +#ifndef __DEBUG_HELPERS_HPP__ +#define __DEBUG_HELPERS_HPP__ + +#include // uint* +#include // printing +#include // printing +#include // x86 intrinsics +#include "../sealing/data_structures.hpp" // global parameters + +template +void print_parameters(); + +void print_digest(uint32_t* digest); +void print_buffer(uint8_t* buf, size_t bytes); +void print_buffer_dec(uint8_t* buf, size_t bytes); +template void Log256(const __m256i & value); +template void Log128(const __m128i & value); +void print_digest_reorder(uint32_t* digest); + +// TODO: print plain node +template +void print_node(parallel_node_t *buf, size_t lines = 0, + const char *prefix = nullptr, bool reverse = false); +void print_buf(uint8_t *buf, size_t lines = 0, + const char *prefix = nullptr); +void print_parents_graph(uint32_t* parents); + +#endif // __DEBUG_HELPERS_HPP__ diff --git a/extern/supraseal/util/file_t.hpp b/extern/supraseal/util/file_t.hpp new file mode 100644 index 000000000..d7136008c --- /dev/null +++ b/extern/supraseal/util/file_t.hpp @@ -0,0 +1,121 @@ +// Copyright Supranational LLC + +#ifndef __FILE_T_HPP__ +#define __FILE_T_HPP__ + +#include +#include +#include +#include +#include +#include + +template +class file_t { +private: + std::string fname; + size_t size; + int fd; + bool is_write; + std::mutex mtx; + +public: + file_t(std::string _fname, size_t _size, + bool _is_write = false, bool remove_first = false) + : fd(-1) + { + open_file(_fname, _size, _is_write, remove_first); + } + file_t() + : size(0), fd(-1) + {} + + bool is_open() { + return fd != -1; + } + + size_t get_size() { + return size; + } + + int file_read(std::string _fname) { + return file_read(_fname, (size_t)-1); + } + + // Verify that the file size matches _size + int file_read(std::string _fname, size_t _size) { + return open_file(_fname, _size, false); + } + + int file_write(std::string _fname, size_t _size, bool remove_first = false) { + return open_file(_fname, _size, true, remove_first); + } + + int open_file(std::string _fname, size_t _size, + bool _is_write = false, bool remove_first = false) { + fname = _fname; + size = _size; + is_write = _is_write; + + if (is_write && remove_first) { + remove(fname.c_str()); + } + + if (is_write) { + fd = open(fname.c_str(), O_RDWR | O_CREAT, (mode_t)0664); + } else { + fd = open(fname.c_str(), O_RDONLY); + } + if (fd == -1) { + printf("ERROR: Could not open file %s for %s: %s\n", + fname.c_str(), is_write ? "writing" : "reading", + strerror(errno)); + return 1; + } + + if (is_write) { + // lseek(fd, size - 1, SEEK_SET); + // assert (write(fd, "", 1) != -1); + posix_fallocate(fd, 0, size); + } else { + struct stat statbuf; + fstat(fd, &statbuf); + if (size == (size_t)-1) { + size = (size_t)statbuf.st_size; + } else if ((size_t)statbuf.st_size != size) { + printf("ERROR: file %s is size %ld, expected %ld\n", + fname.c_str(), (size_t)statbuf.st_size, size); + return 1; + } + } + return 0; + } + ~file_t() { + if (fd != -1) { + close(fd); + } + } + void advise_random() {} + + void write_data(size_t offset, T* buf, size_t wr_size) { + std::unique_lock lock(mtx); + assert (is_open()); + assert (lseek(fd, offset * sizeof(T), SEEK_SET) == offset * sizeof(T)); + if (write(fd, buf, wr_size * sizeof(T)) == -1) { + printf("pc2 write failed errno %d: %s\n", errno, strerror(errno)); + exit(1); + } + } + + void read_data(size_t offset, T* buf, size_t size) { + std::unique_lock lock(mtx); + assert (is_open()); + assert (lseek(fd, offset * sizeof(T), SEEK_SET) == offset * sizeof(T)); + if (read(fd, buf, size * sizeof(T)) == -1) { + printf("file read failed errno %d: %s\n", errno, strerror(errno)); + exit(1); + } + } +}; + +#endif diff --git a/extern/supraseal/util/mmap_t.hpp b/extern/supraseal/util/mmap_t.hpp new file mode 100644 index 000000000..22eecd116 --- /dev/null +++ b/extern/supraseal/util/mmap_t.hpp @@ -0,0 +1,129 @@ +// Copyright Supranational LLC + +#ifndef __MMAP_T_HPP__ +#define __MMAP_T_HPP__ + +#include +#include +#include +#include +#include +#include + +template +class mmap_t { +private: + std::string fname; + size_t size; + int fd; + T* data; + bool is_write; + +public: + mmap_t(std::string _fname, size_t _size, + bool _is_write = false, bool remove_first = false) + : fd(-1), data(nullptr) + { + open_mmap(_fname, _size, _is_write, remove_first); + } + mmap_t() + : size(0), fd(-1), data(nullptr) + {} + + bool is_open() { + return data != nullptr; + } + + size_t get_size() { + return size; + } + + int mmap_read(std::string _fname) { + return mmap_read(_fname, (size_t)-1); + } + + // Verify that the file size matches _size + int mmap_read(std::string _fname, size_t _size) { + return open_mmap(_fname, _size, false); + } + + int mmap_write(std::string _fname, size_t _size, bool remove_first = false) { + return open_mmap(_fname, _size, true, remove_first); + } + + int open_mmap(std::string _fname, size_t _size, + bool _is_write = false, bool remove_first = false) { + fname = _fname; + size = _size; + is_write = _is_write; + + if (is_write && remove_first) { + remove(fname.c_str()); + } + + if (is_write) { + fd = open(fname.c_str(), O_RDWR | O_CREAT, (mode_t)0664); + } else { + fd = open(fname.c_str(), O_RDONLY); + } + if (fd == -1) { + printf("ERROR: Could not open file %s for %s\n", + fname.c_str(), is_write ? "writing" : "reading"); + return 1; + } + + if (is_write) { + // lseek(fd, size - 1, SEEK_SET); + // assert (write(fd, "", 1) != -1); + posix_fallocate(fd, 0, size); + } else { + struct stat statbuf; + fstat(fd, &statbuf); + if (size == (size_t)-1) { + size = (size_t)statbuf.st_size; + } else if ((size_t)statbuf.st_size != size) { + printf("ERROR: file %s is size %ld, expected %ld\n", + fname.c_str(), (size_t)statbuf.st_size, size); + return 1; + } + } + if (is_write) { + data = (T*)mmap(NULL, size, PROT_WRITE, MAP_SHARED, fd, 0); + } else { + data = (T*)mmap(NULL, size, PROT_READ, MAP_SHARED, fd, 0); + } + if (data == MAP_FAILED) { + printf("mmap failed for file %s", fname.c_str()); + return 1; + } + return 0; + } + ~mmap_t() { + if (data != nullptr && data != MAP_FAILED) { + munmap(data, size); + } + if (fd != -1) { + close(fd); + } + } + void advise_random() { + assert(madvise(data, size, MADV_RANDOM) == 0); + } + inline operator const T*() const { return data; } + inline operator T*() const { return data; } + inline operator void*() const { return (void*)data; } + inline const T& operator[](size_t i) const { return data[i]; } + inline T& operator[](size_t i) { return data[i]; } + + void write_data(size_t offset, T* buf, size_t size) { + assert (is_open()); + memcpy(&data[offset], buf, size * sizeof(T)); + } + + void read_data(size_t offset, T* buf, size_t size) { + assert (is_open()); + memcpy(buf, &data[offset], size * sizeof(T)); + } +}; + +#endif diff --git a/extern/supraseal/util/sector_util.cpp b/extern/supraseal/util/sector_util.cpp new file mode 100644 index 000000000..51014be4e --- /dev/null +++ b/extern/supraseal/util/sector_util.cpp @@ -0,0 +1,25 @@ +// Copyright Supranational LLC + +#include +#include "sector_util.hpp" + +size_t get_sector_size_from_string(std::string& sector_size_string) { + if (sector_size_string == "32GiB") return 1UL << Sector32GB; + else if (sector_size_string == "512MiB") return 1UL << Sector512MB; +#ifdef RUNTIME_SECTOR_SIZE + else if (sector_size_string == "2KiB") return 1UL << Sector2KB; + else if (sector_size_string == "4KiB") return 1UL << Sector4KB; + else if (sector_size_string == "16KiB") return 1UL << Sector16KB; + else if (sector_size_string == "32KiB") return 1UL << Sector32KB; + + else if (sector_size_string == "8MiB") return 1UL << Sector8MB; + else if (sector_size_string == "16MiB") return 1UL << Sector16MB; + + else if (sector_size_string == "1GiB") return 1UL << Sector1GB; + else if (sector_size_string == "64GiB") return 1UL << Sector64GB; +#endif + else { + std::cout << "Invalid sector size" << std::endl; + exit(1); + } +} diff --git a/extern/supraseal/util/sector_util.hpp b/extern/supraseal/util/sector_util.hpp new file mode 100644 index 000000000..1814e9e02 --- /dev/null +++ b/extern/supraseal/util/sector_util.hpp @@ -0,0 +1,92 @@ +// Copyright Supranational LLC + +#ifndef __SECTOR_UTIL_HPP__ +#define __SECTOR_UTIL_HPP__ + +#include + +#ifdef RUNTIME_SECTOR_SIZE +#define SECTOR_PARAMS_TABLE(FUNC) \ + switch (sector_size) { \ + case 1UL << Sector64GB: { \ + sector_parameters64GB params; \ + FUNC; \ + break; \ + } \ + case 1UL << Sector32GB: { \ + sector_parameters32GB params; \ + FUNC; \ + break; \ + } \ + case 1UL << Sector1GB: { \ + sector_parameters1GB params; \ + FUNC; \ + break; \ + } \ + case 1UL << Sector512MB: { \ + sector_parameters512MB params; \ + FUNC; \ + break; \ + } \ + case 1UL << Sector16MB: { \ + sector_parameters16MB params; \ + FUNC; \ + break; \ + } \ + case 1UL << Sector8MB: { \ + sector_parameters8MB params; \ + FUNC; \ + break; \ + } \ + case 1UL << Sector32KB: { \ + sector_parameters32KB params; \ + FUNC; \ + break; \ + } \ + case 1UL << Sector16KB: { \ + sector_parameters16KB params; \ + FUNC; \ + break; \ + } \ + case 1UL << Sector4KB: { \ + sector_parameters4KB params; \ + FUNC; \ + break; \ + } \ + case 1UL << Sector2KB: { \ + sector_parameters2KB params; \ + FUNC; \ + break; \ + } \ + default: { \ + std::cout \ + << "Invalid sector size" \ + << std::endl; \ + exit(1); \ + } \ + } +#else +#define SECTOR_PARAMS_TABLE(FUNC) \ + switch (sector_size) { \ + case 1UL << Sector32GB: { \ + sector_parameters32GB params; \ + FUNC; \ + break; \ + } \ + case 1UL << Sector512MB: { \ + sector_parameters512MB params; \ + FUNC; \ + break; \ + } \ + default: { \ + std::cout \ + << "Invalid sector size" \ + << std::endl; \ + exit(1); \ + } \ + } +#endif + +size_t get_sector_size_from_string(std::string& sector_size_string); + +#endif diff --git a/extern/supraseal/util/stats.hpp b/extern/supraseal/util/stats.hpp new file mode 100644 index 000000000..17d2c4550 --- /dev/null +++ b/extern/supraseal/util/stats.hpp @@ -0,0 +1,94 @@ +// Copyright Supranational LLC + +#ifndef __STATS_HPP__ +#define __STATS_HPP__ + +struct queue_stat_t { + const char* name; + size_t capacity; + + struct stats_t { + size_t last_size; + size_t total; + size_t samples; + stats_t() { + clear(); + } + void clear() { + total = 0; + samples = 0; + } + }; + stats_t cur; + stats_t snap; + + queue_stat_t(const char* _name, size_t _capacity) : + name(_name), capacity(_capacity) + {} + queue_stat_t() { + name = "null"; + capacity = 0; + } + void init(const char* _name, size_t _capacity) { + name = _name; + capacity = _capacity; + } + void clear() { + cur.clear(); + } + void record(size_t cur_size) { +#ifdef STATS + cur.last_size = cur_size; + cur.samples++; + cur.total += cur_size; +#endif + } + void snapshot() { + snap = cur; + } + void print() { + size_t avg_size = snap.samples == 0 ? 0 : snap.total / snap.samples; + printf("%30s: capacity %10ld, cur %10ld, avg_size %10ld\n", + name, capacity, + snap.last_size, avg_size); + } +}; + +struct counter_stat_t { + const char* name; + + struct stats_t { + size_t count; + }; + stats_t cur; + stats_t snap; + + counter_stat_t(const char *_name) { + name = _name; + cur.count = 0; + } + counter_stat_t() { + name = "null"; + } + void init(const char* _name) { + name = _name; + } + void clear() { + cur.count = 0; + } + void record() { +#ifdef STATS + cur.count++; +#endif + } + void snapshot() { + snap = cur; + } + void print() { + printf("%30s: count %ld\n", + name, snap.count); + } +}; + + +#endif diff --git a/extern/supraseal/util/util.hpp b/extern/supraseal/util/util.hpp new file mode 100644 index 000000000..36f82afa3 --- /dev/null +++ b/extern/supraseal/util/util.hpp @@ -0,0 +1,33 @@ +// Copyright Supranational LLC + +#ifndef __UTIL_HPP__ +#define __UTIL_HPP__ + +#include +#include + +inline uint64_t get_tsc() { + uint64_t count; + + // Read Time-Stamp Counter, Opcode - 0x0F 0x31, EDX:EAX <- TSC + __asm__ volatile("lfence; \ + .byte 15; .byte 49; \ + shlq $32, %%rdx; \ + orq %%rdx, %%rax; \ + lfence;" + : "=a" (count) + : + : "%rdx" + ); + return count; +} + +inline void set_core_affinity(size_t core_num) { + pthread_t pid = pthread_self(); + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(core_num, &cpuset); + pthread_setaffinity_np(pid, sizeof(cpu_set_t), &cpuset); +} + +#endif diff --git a/lib/ffi/cunative/decode_sdr.go b/lib/ffi/cunative/decode_sdr.go index d4185d621..60b820e91 100644 --- a/lib/ffi/cunative/decode_sdr.go +++ b/lib/ffi/cunative/decode_sdr.go @@ -3,8 +3,8 @@ package cunative /* -#cgo CFLAGS: -I${SRCDIR}/../../../extern/supra_seal/deps/blst/bindings -#cgo LDFLAGS: -L${SRCDIR}/../../../extern/supra_seal/deps/blst -lblst +#cgo CFLAGS: -I${SRCDIR}/../../../extern/supraseal/deps/blst/bindings +#cgo LDFLAGS: -L${SRCDIR}/../../../extern/supraseal/deps/blst -lblst #include #include #include "blst.h" diff --git a/lib/ffi/cunative/decode_snap.go b/lib/ffi/cunative/decode_snap.go index c2bd7a98b..74fc2417e 100644 --- a/lib/ffi/cunative/decode_snap.go +++ b/lib/ffi/cunative/decode_snap.go @@ -3,8 +3,8 @@ package cunative /* -#cgo CFLAGS: -I${SRCDIR}/../../../extern/supra_seal/deps/blst/bindings -#cgo LDFLAGS: -L${SRCDIR}/../../../extern/supra_seal/deps/blst -lblst +#cgo CFLAGS: -I${SRCDIR}/../../../extern/supraseal/deps/blst/bindings +#cgo LDFLAGS: -L${SRCDIR}/../../../extern/supraseal/deps/blst -lblst #include #include #include "blst.h" diff --git a/lib/proof/porep_vproof_bin_test.go b/lib/proof/porep_vproof_bin_test.go index d5662d2aa..b08ecd437 100644 --- a/lib/proof/porep_vproof_bin_test.go +++ b/lib/proof/porep_vproof_bin_test.go @@ -16,7 +16,7 @@ func TestDecode(t *testing.T) { t.Skip() } - //binFile := "../../extern/supra_seal/demos/c2-test/resources/test/commit-phase1-output" + //binFile := "../../extern/supraseal/demos/c2-test/resources/test/commit-phase1-output" binFile := "../../commit-phase1-output.gz" gzData, err := os.ReadFile(binFile) @@ -122,7 +122,7 @@ fn main() -> Result<()> { } func TestDecodeSNRustDec(t *testing.T) { - //binFile := "../../extern/supra_seal/demos/c2-test/resources/test/commit-phase1-output" + //binFile := "../../extern/supraseal/demos/c2-test/resources/test/commit-phase1-output" binFile := "../../commit-phase1-output.json" rawData, err := os.ReadFile(binFile) diff --git a/lib/supraffi/seal.go b/lib/supraffi/seal.go index 81abf5cce..d1a3663f3 100644 --- a/lib/supraffi/seal.go +++ b/lib/supraffi/seal.go @@ -3,8 +3,8 @@ package supraffi /* - #cgo CFLAGS: -I${SRCDIR}/../../extern/supra_seal/sealing - #cgo LDFLAGS: -fno-omit-frame-pointer -Wl,-z,noexecstack -Wl,-z,relro,-z,now -fuse-ld=bfd -L${SRCDIR}/../../extern/supra_seal/obj -L${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/build/lib -L${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/isa-l/.libs -lsupraseal -Wl,--whole-archive -Wl,--no-as-needed -lspdk_bdev_malloc -lspdk_bdev_null -lspdk_bdev_nvme -lspdk_bdev_passthru -lspdk_bdev_lvol -lspdk_bdev_raid -lspdk_bdev_error -lspdk_bdev_gpt -lspdk_bdev_split -lspdk_bdev_delay -lspdk_bdev_zone_block -lspdk_blobfs_bdev -lspdk_blobfs -lspdk_blob_bdev -lspdk_lvol -lspdk_blob -lspdk_nvme -lspdk_bdev_ftl -lspdk_ftl -lspdk_bdev_aio -lspdk_bdev_virtio -lspdk_virtio -lspdk_vfio_user -lspdk_accel_ioat -lspdk_ioat -lspdk_scheduler_dynamic -lspdk_env_dpdk -lspdk_scheduler_dpdk_governor -lspdk_scheduler_gscheduler -lspdk_sock_posix -lspdk_event -lspdk_event_bdev -lspdk_bdev -lspdk_notify -lspdk_dma -lspdk_event_accel -lspdk_accel -lspdk_event_vmd -lspdk_vmd -lspdk_event_sock -lspdk_init -lspdk_thread -lspdk_trace -lspdk_sock -lspdk_rpc -lspdk_jsonrpc -lspdk_json -lspdk_util -lspdk_log -Wl,--no-whole-archive ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/build/lib/libspdk_env_dpdk.a -Wl,--whole-archive ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_bus_pci.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_cryptodev.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_dmadev.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_eal.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_ethdev.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_hash.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_kvargs.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_mbuf.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_mempool.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_mempool_ring.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_net.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_pci.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_power.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_rcu.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_ring.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_telemetry.a ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_vhost.a -Wl,--no-whole-archive -lnuma -lisal -pthread -ldl -lrt -luuid -lssl -lcrypto -lm -laio -lcudart_static -L${SRCDIR}/../../extern/supra_seal/deps/blst -lblst -lconfig++ -lgmp -lstdc++ + #cgo CFLAGS: -I${SRCDIR}/../../extern/supraseal/sealing + #cgo LDFLAGS: -fno-omit-frame-pointer -Wl,-z,noexecstack -Wl,-z,relro,-z,now -fuse-ld=bfd -L${SRCDIR}/../../extern/supraseal/obj -L${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/build/lib -L${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/isa-l/.libs -lsupraseal -Wl,--whole-archive -Wl,--no-as-needed -lspdk_bdev_malloc -lspdk_bdev_null -lspdk_bdev_nvme -lspdk_bdev_passthru -lspdk_bdev_lvol -lspdk_bdev_raid -lspdk_bdev_error -lspdk_bdev_gpt -lspdk_bdev_split -lspdk_bdev_delay -lspdk_bdev_zone_block -lspdk_blobfs_bdev -lspdk_blobfs -lspdk_blob_bdev -lspdk_lvol -lspdk_blob -lspdk_nvme -lspdk_bdev_ftl -lspdk_ftl -lspdk_bdev_aio -lspdk_bdev_virtio -lspdk_virtio -lspdk_vfio_user -lspdk_accel_ioat -lspdk_ioat -lspdk_scheduler_dynamic -lspdk_env_dpdk -lspdk_scheduler_dpdk_governor -lspdk_scheduler_gscheduler -lspdk_sock_posix -lspdk_event -lspdk_event_bdev -lspdk_bdev -lspdk_notify -lspdk_dma -lspdk_event_accel -lspdk_accel -lspdk_event_vmd -lspdk_vmd -lspdk_event_sock -lspdk_init -lspdk_thread -lspdk_trace -lspdk_sock -lspdk_rpc -lspdk_jsonrpc -lspdk_json -lspdk_util -lspdk_log -Wl,--no-whole-archive ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/build/lib/libspdk_env_dpdk.a -Wl,--whole-archive ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_bus_pci.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_cryptodev.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_dmadev.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_eal.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_ethdev.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_hash.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_kvargs.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_mbuf.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_mempool.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_mempool_ring.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_net.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_pci.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_power.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_rcu.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_ring.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_telemetry.a ${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_vhost.a -Wl,--no-whole-archive -lnuma -lisal -pthread -ldl -lrt -luuid -lssl -lcrypto -lm -laio -lcudart_static -L${SRCDIR}/../../extern/supraseal/deps/blst -lblst -lconfig++ -lgmp -lstdc++ #include #include #include "supra_seal.h" @@ -44,7 +44,7 @@ import ( ) /* -root = {SRCDIR}/../../extern/supra_seal/ +root = {SRCDIR}/../../extern/supraseal/ + c++ -Ideps/spdk-v22.09/include -Ideps/spdk-v22.09/isa-l/.. -Ideps/spdk-v22.09/dpdk/build/include -g -O2 -march=native -fPIC -fno-omit-frame-pointer -fno-strict-aliasing -fstack-protector -fno-common @@ -52,7 +52,7 @@ root = {SRCDIR}/../../extern/supra_seal/ -DSPDK_GIT_COMMIT=4be6d3043 -pthread -Wall -Wextra -Wno-unused-variable -Wno-unused-parameter -Wno-missing-field-initializers -Wformat -Wformat-security -Ideps/spdk-v22.09/include -Ideps/spdk-v22.09/isa-l/.. -Ideps/spdk-v22.09/dpdk/build/include --Iposeidon -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -c sealing/supra_seal.cpp -o obj/supra_seal.o -Wno-subobject-linkage +-Iposeidon -Ideps/sppark -Ideps/sppark/util -Ideps/blst/src -c sealing/supraseal.cpp -o obj/supraseal.o -Wno-subobject-linkage --- NOTE: The below lines match the top of the file, just in a moderately more readable form. @@ -62,9 +62,9 @@ NOTE: The below lines match the top of the file, just in a moderately more reada -Wl,-z,relro,-z,now -Wl,-z,noexecstack -fuse-ld=bfd --L${SRCDIR}/../../extern/supra_seal/obj --L${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/build/lib --L${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/isa-l/.libs +-L${SRCDIR}/../../extern/supraseal/obj +-L${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/build/lib +-L${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/isa-l/.libs -lsupraseal -Wl,--whole-archive -Wl,--no-as-needed @@ -118,25 +118,25 @@ NOTE: The below lines match the top of the file, just in a moderately more reada -lspdk_util -lspdk_log -Wl,--no-whole-archive -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/build/lib/libspdk_env_dpdk.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/build/lib/libspdk_env_dpdk.a -Wl,--whole-archive -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_bus_pci.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_cryptodev.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_dmadev.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_eal.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_ethdev.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_hash.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_kvargs.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_mbuf.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_mempool.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_mempool_ring.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_net.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_pci.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_power.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_rcu.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_ring.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_telemetry.a -${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_vhost.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_bus_pci.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_cryptodev.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_dmadev.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_eal.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_ethdev.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_hash.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_kvargs.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_mbuf.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_mempool.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_mempool_ring.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_net.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_pci.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_power.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_rcu.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_ring.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_telemetry.a +${SRCDIR}/../../extern/supraseal/deps/spdk-v22.09/dpdk/build/lib/librte_vhost.a -Wl,--no-whole-archive -lnuma -lisal @@ -149,7 +149,7 @@ ${SRCDIR}/../../extern/supra_seal/deps/spdk-v22.09/dpdk/build/lib/librte_vhost.a -lm -laio -lcudart_static --L${SRCDIR}/../../extern/supra_seal/deps/blst -lblst +-L${SRCDIR}/../../extern/supraseal/deps/blst -lblst -lconfig++ -lgmp -lstdc++ diff --git a/scripts/build-blst.sh b/scripts/build-blst.sh index d887ff902..b7f811525 100644 --- a/scripts/build-blst.sh +++ b/scripts/build-blst.sh @@ -1,7 +1,7 @@ #!/usr/bin/env bash -if [ ! -d "extern/supra_seal/deps/blst" ]; then - git clone https://github.com/supranational/blst.git extern/supra_seal/deps/blst - (cd extern/supra_seal/deps/blst +if [ ! -d "extern/supraseal/deps/blst" ]; then + git clone https://github.com/supranational/blst.git extern/supraseal/deps/blst + (cd extern/supraseal/deps/blst ./build.sh -march=native) fi