diff --git a/README.md b/README.md index ea36379..a32d4ae 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,170 @@ -# bragg_hls -Low-latency Bragg peak detection through high-level synthesis +# BraggHLS -# Requirements +- [BraggHLS](#bragghls) +- [Current status](#current-status) +- [Building](#building) + * [Requirements](#requirements) + * [Build steps](#build-steps) +- [Running](#running) -`sudo apt-get install libgmp3-dev` -`sudo apt-get install libmpfr-dev libmpfi-dev` \ No newline at end of file +This a framework for lowering PyTorch models to RTL using high-level synthesis (HLS) techniques. +Crucially, we do **not** use any existing HLS tools (such as Xilinx's Vitis) +The particular, driving, use case is low-latency [Bragg peak detection](https://arxiv.org/abs/2008.08198) for high-energy diffraction microscopy (HEDM). + +The "flow" is PyTorch -> MLIR -> python -> MLIR -> RTL. + +This project has a lot of moving pieces; the directory structure tells the tale: + +- [bragghls/](bragghls) - the core python library + - [compiler/compiler.py](bragghls/compiler.py) - python script the drives the entire flow + - [flopoco/](bragghls/flopoco) - functionality related to converting to and from [FloPoCo's](http://flopoco.org/) nonstandard floating point representation (for purposes of RTL generation *and* simulation) + - [ir/](bragghls/ir) - functionality related to parsing, transforming, and interpreting MLIR representations of PyTorch models. + - [rtl/](bragghls/rtl) - functionality related to emitting RTL (SystemVerilog) + - [testbench/](bragghls/testbench) - testbench runners via [cocotb](https://www.cocotb.org/) and [iverilog](http://iverilog.icarus.com/) +- [bragghls_translate/](bragghls_translate) - MLIR translation library for translating MLIR to python +- [examples/](examples) - obviously... +- [ip_cores/](ip_cores) - FloPoCo cores for 4,4 and 5,5 floating point addition and multiplication along with testbench generation script + - [flopoco_convert_ext/](ip_cores/flopoco_convert_ext) - pybind-ed extension for converting between IEEE754 and FloPoCo's floating point representation +- [scripts/](scripts) - helper scripts for things like generating new FloPoCo IPs and building the entire project +- [tests/](tests) - obviously... + +# Current status + +[linear](examples/linear.py) and [cnn](examples/cnn.py) examples work (including tiling) but [braggnn](examples/braggnn.py) still needs adjustment (compiles but doesn't pass tests). + +# Building + +The build steps are many and tortuous. + +## Requirements + +1. A compiler (GCC or Clang) +2. Python (>= 3.10) (recommended to use conda) +3. [GNU MP Bignum Library](https://gmplib.org/) (`sudo apt-get install libgmp3-dev`) +4. [GNU Multiple Precision Floating-Point Reliable Library](https://www.mpfr.org/) (`sudo apt-get install libmpfr-dev libmpfi-dev`) +4. [Multiple Precision Floating-point Interval library](http://perso.ens-lyon.fr/nathalie.revol/software.html) (`sudo apt-get libmpfi-dev`) +5. [Icarus Verilog](http://iverilog.icarus.com/) (`sudo apt-get install iverilog`) +5. Patience + +Everything else should be taken care of by the build script (if I didn't miss anything...). + +## Build steps + +1. First make sure you have all the submodules checked out + ```shell + git submodule sync --recursive + git submodule update --init --recursive --jobs 0 + ``` + This will take a while due to our dependency on LLVM. +2. `pip install -r requirements.txt` to get `cmake` and `pybind11` and `ninja` and necessary python packages +3. Run the build script [scripts/build.sh](scripts/build.sh) which will: + 1. Build all of LLVM + 2. Build Torch-MLIR against LLVM + 3. Build CIRCT against LLVM + 4. Build `bragghls_translate` and `flopoco_converter` + 5. Download GHDL and unpack it (this step is optional if you don't want to generate new IP) + +You will need all the relevant executables (`circt-opt`, `torch-mlir-opt`, etc.) in your path **and in an env variable BRAGGHLS_PATH**. See [.envrc](.envrc) for a way to add all of them (or just use [direnv](https://direnv.net/)). +You will also need the following environment variables exported: + +```shell +export ADD_PIPELINE_DEPTH=2 +export MUL_PIPELINE_DEPTH=1 +export WE=4 +export WF=4 +export PYTHONPATH= +``` + +The above are the correct numbers for the 4,4 FloPoCo IP cores. + +# Running + +Assuming everything built successfully and you have all of the correct paths and environment variables, run any of the scripts in [examples](examples) to generate MLIR IR. +Then the main [compiler driver](bragghls/compiler/compile.py) can be run with the following arguments + +```shell +usage: BraggHLS compiler driver [-h] [-t] [-r] [-s] [-v] [-b] [--wE WE] [--wF WF] fp + +positional arguments: + fp Filepath of top-level MLIR file + +options: + -h, --help show this help message and exit + -t, --translate Translate MLIR to python + -r, --rewrite Transform/rewrite python + -s, --schedule Schedule the model using CIRCT + -v, --verilog Emit verilog + -b, --testbench Run autogenerated testbench + --wE WE Bit width of exponent + --wF WF Bit width of fraction +``` + +For example, running [examples/linear.py](examples/linear.py) produces an artifacts folder at [examples/linear_bragghls_artifacts](examples/linear_bragghls_artifacts) which will contains a `linear.mlir` file that looks like + +```mlir +module attributes {torch.debug_module_name = "Linear"} { + memref.global "private" constant @__constant_8x8xf32 : memref<8x8xf32> = dense<[...]> + memref.global "private" constant @__constant_8xf32 : memref<8xf32> = dense<[...]> + func.func @forward(%arg0: memref<1x8xf32>) -> memref { + + ... + + scf.for %arg1 = %c0 to %c1 step %c1 { + scf.for %arg2 = %c0 to %c8 step %c1 { + %7 = memref.load %4[%arg1, %arg2] : memref<1x8xf32> + %8 = memref.load %6[] : memref + %9 = arith.addf %7, %8 : f32 + memref.store %9, %6[] : memref + } + } + return %6 : memref + } +} +``` + +Then running (from top-level in the source directory) +```shell +python bragghls/compiler.py examples/linear_bragghls_artifacts/linear.mlir --t -r -s -v -b --wE 4 --wF 4 +``` +will generate `linear.sv` and run the automatically generated (no artifact) testbench, and produce the following output: + +``` +INFO: Running command: iverilog "-o "examples/linear_bragghls_artifacts/sim.vvp "-D "COCOTB_SIM=1 "-g2012 "examples/linear_bragghls_artifacts/linear.sv "ip_cores/flopoco_fmul_4_4.sv "ip_cores/flopoco_fadd_4_4.sv "ip_cores/flopoco_relu.sv "ip_cores/flopoco_neg.sv" in directory:"examples/linear_bragghls_artifacts" + 0.00ns INFO Running on Icarus Verilog version 11.0 (stable) + 0.00ns INFO Running tests with cocotb v1.6.2 from /Users/mlevental/miniforge3/envs/bragghls/lib/python3.10/site-packages/cocotb + 0.00ns INFO Seeding Python random module with 1659448436 + 0.00ns WARNING Pytest not found, assertion rewriting will not occur + 0.00ns INFO Found test tb_runner.test_tb + 0.00ns INFO running test_tb (1/1) + +outputs {'_6': [ ]} +passed 43 +outputs {'_6': [ ]} +passed 87 +outputs {'_6': [ ]} +passed 131 +outputs {'_6': [ ]} +passed 175 + +... + +passed 703 +outputs {'_6': [ ]} +passed 747 +outputs {'_6': [ ]} +passed 791 +outputs {'_6': [ ]} +passed 835 +outputs {'_6': [ ]} +passed 879 + + + 1761.00ns INFO test_tb passed + 1761.00ns INFO ************************************************************************************** + ** TEST STATUS SIM TIME (ns) REAL TIME (s) RATIO (ns/s) ** + ************************************************************************************** + ** tb_runner.test_tb PASS 1761.00 1.08 1636.30 ** + ************************************************************************************** + ** TESTS=1 PASS=1 FAIL=0 SKIP=0 1761.00 1.12 1571.26 ** + ************************************************************************************** +``` \ No newline at end of file diff --git a/bragghls/compiler/compile.py b/bragghls/compiler/compile.py index 014f033..afbec5b 100644 --- a/bragghls/compiler/compile.py +++ b/bragghls/compiler/compile.py @@ -129,17 +129,17 @@ def main(args): with open(f"{artifacts_dir}/{name}.rewritten.sched.mlir", "r") as f: sched_and_rewritten_mlir = f.read() - if args.verilog: - ( - op_id_data, - func_args, - returns, - return_time, - vals, - csts, - pe_idxs, - ) = parse_mlir_module(sched_and_rewritten_mlir) + ( + op_id_data, + func_args, + returns, + return_time, + vals, + csts, + pe_idxs, + ) = parse_mlir_module(sched_and_rewritten_mlir) + if args.verilog: verilog_file, input_wires, output_wires, max_fsm_stage = emit_verilog( name, args.wE, @@ -158,6 +158,8 @@ def main(args): f.write(verilog_file) print(f"{max_fsm_stage=}") + else: + max_fsm_stage = return_time + 1 if args.testbench: testbench_runner( @@ -175,16 +177,16 @@ def main(args): if __name__ == "__main__": - DEBUG = bool(int(os.getenv("DEBUG", "0"))) - parser = argparse.ArgumentParser() - parser.add_argument("fp") - parser.add_argument("-t", "--translate", default=False, action="store_true") - parser.add_argument("-r", "--rewrite", default=False, action="store_true") - parser.add_argument("-s", "--schedule", default=False, action="store_true") - parser.add_argument("-v", "--verilog", default=False, action="store_true") - parser.add_argument("-b", "--testbench", default=False, action="store_true") - parser.add_argument("--wE", default=4) - parser.add_argument("--wF", default=4) + DEBUG = bool(int(os.getenv("DEBUG", "1"))) + parser = argparse.ArgumentParser("BraggHLS compiler driver") + parser.add_argument("fp", help="Filepath of top-level MLIR file") + parser.add_argument("-t", "--translate", default=False, action="store_true", help="Translate MLIR to python") + parser.add_argument("-r", "--rewrite", default=False, action="store_true", help="Transform/rewrite python") + parser.add_argument("-s", "--schedule", default=False, action="store_true", help="Schedule the model using CIRCT") + parser.add_argument("-v", "--verilog", default=False, action="store_true", help="Emit verilog") + parser.add_argument("-b", "--testbench", default=False, action="store_true", help="Run autogenerated testbench") + parser.add_argument("--wE", default=4, help="Bit width of exponent") + parser.add_argument("--wF", default=4, help="Bit width of fraction") args = parser.parse_args() args.wE = int(args.wE) args.wF = int(args.wF) diff --git a/bragghls/flopoco/ops.py b/bragghls/flopoco/ops.py index de47e9f..92e1b4e 100644 --- a/bragghls/flopoco/ops.py +++ b/bragghls/flopoco/ops.py @@ -33,6 +33,13 @@ def ReduceAdd(vals): return pairs[0][0] + pairs[0][1] +def check_make_val(v, wE, wF): + if not isinstance(v, Val): + assert isinstance(v, (float, int)), v + v = Val(v, wE, wF) + return v + + @dataclass(frozen=True) class Val: ieee: float @@ -48,21 +55,27 @@ def __post_init__(self): ) object.__setattr__(self, "name", str(self)) - def __mul__(self, other: "Val"): + def __mul__(self, other): + other = check_make_val(other, self.wE, self.wF) v = mul(self, other) return v def __eq__(self, other): return self.fp == other.fp - def __add__(self, other: "Val"): + def __add__(self, other): + other = check_make_val(other, self.wE, self.wF) v = add(self, other) return v - def __sub__(self, other: "Val"): + def __sub__(self, other): + other = check_make_val(other, self.wE, self.wF) v = sub(self, other) return v + def __neg__(self): + return Val(-self.ieee, self.wE, self.wF) + def copy(self): return self diff --git a/bragghls/ir/parse.py b/bragghls/ir/parse.py index d4e099f..7591c3b 100644 --- a/bragghls/ir/parse.py +++ b/bragghls/ir/parse.py @@ -140,11 +140,9 @@ def parse_mlir_module(module_str): start_time = int(start_time[0]) else: start_time = None - returns, return_time = idents[0][0], start_time - if not isinstance(returns, list): - returns = [returns] - for r in returns: - vals.add(r) + returns, return_time = [idn[0] for idn in idents], start_time + for r in returns: + vals.add(r) else: continue assert func_args and returns diff --git a/bragghls/rtl/emit_verilog.py b/bragghls/rtl/emit_verilog.py index 58f2739..9c6b121 100644 --- a/bragghls/rtl/emit_verilog.py +++ b/bragghls/rtl/emit_verilog.py @@ -31,7 +31,8 @@ def build_ip_res_val_map(pe, op_datas: list[Op], vals): else: warnings.warn(f"not mapping {res_val} to {op} in ip_res_val_map") elif op.type in {OpType.NEG, OpType.RELU}: - ip_res_val_map[res_val] = pe.frelu.res + ip = getattr(pe, op.type.value, None) + ip_res_val_map[res_val] = ip.res elif op.type in {OpType.COPY}: pass elif op.type == OpType.FMAC: @@ -178,6 +179,7 @@ def emit(*args): frelu = ReLU(pe_idx, signal_width) emit(frelu.instantiate()) fneg = Neg(pe_idx, signal_width) + emit(fneg.instantiate()) pes[pe_idx] = PE(fadd, fmul, frelu, fneg, pe_idx) pe_to_ops = cluster_pes(pes, op_id_data) diff --git a/examples/braggnn.py b/examples/braggnn.py index 16ad51f..222c0fc 100644 --- a/examples/braggnn.py +++ b/examples/braggnn.py @@ -1,4 +1,5 @@ import argparse +import os from pathlib import Path import torch @@ -306,9 +307,14 @@ def make_braggn(scale, img_size=11, simplify_weights=True): if __name__ == "__main__": parser = argparse.ArgumentParser(description="make stuff") - parser.add_argument("--out_dir", type=Path, default=Path(".")) - parser.add_argument("--scale", type=int, default=4) + parser.add_argument( + "--out_dir", + type=Path, + default=Path(__file__).parent / "braggnn_bragghls_artifacts", + ) + parser.add_argument("--scale", type=int, default=1) args = parser.parse_args() args.out_dir = args.out_dir.resolve() dot_str = make_braggn(args.scale) - open(f"{args.out_dir}/braggnn_{args.scale}.mlir", "w").write(dot_str) + os.makedirs(f"{args.out_dir}", exist_ok=True) + open(f"{args.out_dir}/braggnn.mlir", "w").write(dot_str) diff --git a/examples/dot_product/dot_product.py b/examples/dot_product/dot_product.py deleted file mode 100644 index 82bdc51..0000000 --- a/examples/dot_product/dot_product.py +++ /dev/null @@ -1,45 +0,0 @@ -import argparse -import os -from pathlib import Path - -import torch -from torch import nn - -from bragghls.ir.nn import compile_nn_module_to_mlir - - -class Dot(nn.Module): - def __init__(self): - super().__init__() - - def forward(self, x, y): - return (x * y).sum() - - -def make_dot(size=11): - with torch.no_grad(): - mod = Dot() - mod.eval() - mlir_module = compile_nn_module_to_mlir( - mod, - [ - ([size], torch.float32), - ([size], torch.float32), - ], - ) - return str(mlir_module) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description="make stuff") - parser.add_argument( - "--out_dir", - type=Path, - default=Path(__file__).parent / "dot_bragghls_artifacts", - ) - parser.add_argument("--size", type=int, default=8) - args = parser.parse_args() - args.out_dir = args.out_dir.resolve() - dot_str = make_dot(args.size) - os.makedirs(f"{args.out_dir}", exist_ok=True) - open(f"{args.out_dir}/dot.mlir", "w").write(dot_str) diff --git a/requirements.txt b/requirements.txt index 9372ff6..e34e6a7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,6 +15,6 @@ pybind11 cmake ninja --f https://github.com/llvm/torch-mlir/releases ---pre -torch-mlir +# -f https://github.com/llvm/torch-mlir/releases +# --pre +# torch-mlir diff --git a/tests/run_tests.sh b/tests/run_tests.sh old mode 100644 new mode 100755