From 74603c3adfd6667675c05f5648d5deb94e041fa0 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Sat, 1 Jun 2024 20:08:48 -0400 Subject: [PATCH 1/2] feat: add cfg printer using mermaid.js syntax --- hir/src/function.rs | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/hir/src/function.rs b/hir/src/function.rs index d12f3bc31..78dd91c9a 100644 --- a/hir/src/function.rs +++ b/hir/src/function.rs @@ -479,6 +479,10 @@ impl Function { pub fn builder(&mut self) -> FunctionBuilder { FunctionBuilder::new(self) } + + pub fn cfg_printer(&self) -> impl fmt::Display + '_ { + CfgPrinter { function: self } + } } impl fmt::Debug for Function { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { @@ -617,3 +621,53 @@ impl PartialEq for Function { self.dfg.imports == other.dfg.imports } } + +struct CfgPrinter<'a> { + function: &'a Function, +} +impl<'a> fmt::Display for CfgPrinter<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use std::collections::{BTreeSet, VecDeque}; + + f.write_str("flowchart TB\n")?; + + let mut block_q = VecDeque::from([self.function.dfg.entry_block()]); + let mut visited = BTreeSet::::default(); + while let Some(block_id) = block_q.pop_front() { + if !visited.insert(block_id) { + continue; + } + if let Some(last_inst) = self.function.dfg.last_inst(block_id) { + match self.function.dfg.analyze_branch(last_inst) { + BranchInfo::NotABranch => { + // Must be a return or unreachable, print opcode for readability + let opcode = self.function.dfg.inst(last_inst).opcode(); + writeln!(f, " {block_id} --> {opcode}")?; + } + BranchInfo::SingleDest(succ, _) => { + assert!( + self.function.dfg.is_block_linked(succ), + "reference to detached block in attached block {}", + succ + ); + writeln!(f, " {block_id} --> {succ}")?; + block_q.push_back(succ); + } + BranchInfo::MultiDest(ref jts) => { + for jt in jts { + assert!( + self.function.dfg.is_block_linked(jt.destination), + "reference to detached block in attached block {}", + jt.destination + ); + writeln!(f, " {block_id} --> {}", jt.destination)?; + block_q.push_back(jt.destination); + } + } + } + } + } + + Ok(()) + } +} From 4e49eb9c4f05784f8438a76f04281a439dada892 Mon Sep 17 00:00:00 2001 From: Paul Schoenfelder Date: Sat, 1 Jun 2024 20:09:49 -0400 Subject: [PATCH 2/2] fix(pass): refactor treeify pass This commit fixes bugs with the treeify pass that rendered it useless on non-trivial control flow graphs. In particular, it failed to properly handle loops, aside from some trivial cases. An especially egregious issue, was failing to account for how to treeify the body of a loop, while preserving control flow along loopback edges, or edges which escape loops at a join point (i.e. all control flow exiting a loop leaves along the same edge). Furthermore, it was improperly copying subgraphs during the actual treeification process, and detaching blocks from the function when they were still reachable along other paths. These issues were all caused by using outdated analyses to answer questions about blocks, while also using the real-time state of the function to answer others, resulting in a mix of old and new information that produced incorrect control flow graphs when nested loops were involved. The pass has been rewritten in such a way to ensure that all of the these issues are resolved. We now primarily rely on the real-time state of the function for most things, but also track enough state about changes made to the CFG, to be able to rely on the analyses computed on the original CFG to answer questions about relationships between blocks (since the new blocks introduced to the function by treeification, must be at least the same as the original CFG, or otherwise must adhere to the rules governing the output of treeification). Additionally, we now rely on more abstract properties of the control flow graph to determine how to apply treeification, so as to ensure that not only loops, but other forms of interesting control flow are handled properly. Specifically, we now only treeify blocks which are known to have multiple predecessors that all dominate the candidate block (or at a minimum, control flow must always flow through the predecessor first on its way to the candidate block). This ensures that we do not attempt to treeify a loop from within the loop, only the parts of a loop body which remain within the loop prior to continuation/escape. --- hir-transform/src/treeify.rs | 435 +++++++++++++++++++++-------------- 1 file changed, 268 insertions(+), 167 deletions(-) diff --git a/hir-transform/src/treeify.rs b/hir-transform/src/treeify.rs index 75c4076b8..7368b903f 100644 --- a/hir-transform/src/treeify.rs +++ b/hir-transform/src/treeify.rs @@ -1,33 +1,62 @@ -use std::{collections::VecDeque, rc::Rc}; +use std::{ + collections::{BTreeMap, VecDeque}, + rc::Rc, +}; use midenc_hir::{ self as hir, pass::{AnalysisManager, RewritePass, RewriteResult}, Block as BlockId, Value as ValueId, *, }; -use midenc_hir_analysis::{BlockPredecessor, ControlFlowGraph, DominatorTree, LoopAnalysis}; +use midenc_hir_analysis::{BlockPredecessor, ControlFlowGraph, DominatorTree, Loop, LoopAnalysis}; use midenc_session::Session; -use rustc_hash::FxHashSet; +use smallvec::{smallvec, SmallVec}; use crate::adt::ScopedMap; /// This pass rewrites the CFG of a function so that it forms a tree. /// -/// While we technically call this treeification, loop headers are preserved, so -/// there are still nodes in the CFG with multiple predecessors, but _only_ those -/// blocks which are loop headers are permitted to be unaltered. -/// -/// This transformation splits vertices with multiple predecessors, by duplicating the -/// subtree of the program rooted at those vertices. As mentioned above, we do not split -/// vertices representing loop headers, in order to preserve loops in the CFG of the resulting -/// IR. However, we can consider each loop within the overall CFG of a function to be a single -/// vertex after this transformation, and with this perspective the CFG forms a tree. Loop -/// nodes are then handled specially during codegen. -/// -/// The transformation is performed bottom-up, in CFG postorder. -/// -/// This pass also computes the set of blocks in each loop which must be terminated with `push.0` -/// to exit the containing loop. +/// While we technically call this treeification, the CFG cannot be fully converted into a +/// tree in general, as loops must be preserved (they can be copied along multiple control +/// flow paths, but we want to preserve the loop structure in the CFG). +/// +/// The treeify transformation concerns itself with any block B which has multiple predecessors +/// in the control flow graph, where for at least two of those predecessors, the predecessor is +/// always visited before B, if control flows through both. This is a slightly less restrictive +/// conditon than the dominance property, but is very much related - the primary difference being +/// that unlike dominance, what we are capturing is that the predecessor block is not along a +/// loopback edge. It is quite common for a predecessor block to always be visited first in the +/// CFG, while not dominating its successor: consider an if/else expression, where control splits +/// at the `if/else`, and rejoins afterwards, the code in the final block where control is joined +/// can only be reached after either the `if` or `else` block has executed, but neither the `if` +/// nor the `else` blocks can be considered to "dominate" the final block in the graph theoretical +/// sense. +/// +/// The actual treeification process works like so: +/// +/// 1. For each block B, in the postorder sort of the CFG, determine if B has more than one +/// predecessor P, where P appears before B in the reverse postorder sort of the CFG. a. If +/// found, treeify the block as described in subsequent steps b. Otherwise, ignore this block and +/// proceed +/// 2. For each P, clone B to a new block B', and rewrite P such that it branches to B' rather than +/// B. +/// 3. For each successor S of B: a. If S is a loop header, and S appears before B in the reverse +/// postorder sort of the CFG, then it is a loopback edge, so the corresponding edge from B' to S +/// is left intact. b. If S is a loop header, but S appears after B in the reverse postorder sort +/// of the CFG, then it is treated like other blocks (see c.) c. Otherwise, clone S to S', and +/// rewrite B' to branch to S' instead of S. +/// 4. Repeat step 2 for the successors of S, recursively, until the subgraph reachable from B +/// +/// Since we are treeifying blocks from the leaves of the CFG to the root, and because we do not +/// follow loopback edges which escape/continue an outer loop - whenever we clone a subgraph of +/// the CFG, we know that it has already been treeified, as we only start to treeify a block once +/// all of the blocks reachable via that block have been treeified. +/// +/// In short, we're trying to split blocks with multiple predecessors such that all blocks have +/// either zero or one predecessors, i.e. the CFG forms a tree. As mentioned previously, we must +/// make an exception for loop headers, which by definition must have at least one predecessor +/// which is a loopback edge, but this suits us just fine, as Miden Assembly provides control flow +/// instructions compatible with lowering from such a CFG. /// /// # Examples /// @@ -273,63 +302,108 @@ impl RewritePass for Treeify { let domtree = analyses.get_or_compute::(function, session)?; let loops = analyses.get_or_compute::(function, session)?; + // Obtain the set of all blocks we need to check for treeification in a new vector. + // + // We must do this because as we treeify the CFG, we will be updating it, as well + // as all of the analyses, such as the dominator tree, so we can't iterate it at + // the same time as we do treeification. + // + // Additionally, this set never changes - we are visiting the function bottom-up, + // so we only start to treeify a block once all of the blocks reachable via that + // block have been treeified. As a result, the tree reachable from B is already + // treeified. + let to_visit = domtree.cfg_postorder().to_vec(); + + // This outer loop visits all of the original blocks of the CFG postorder (bottom-up), + // and is simply searching for blocks of the function which meet the criteria for + // treeification. + // + // The inner loop is responsible for actually treeifying those blocks. This + // necessarily has the effect of mutating the function, and therefore requires + // us to recompute some of the analyses so that we can properly determine how + // to handle certain blocks in the portion of the CFG being treeified, namely + // loops (via loop headers). + // + // Loops require special handling, as during treeification we typically will be + // cloning blocks that belong to the portion of the CFG rooted at the block being + // treeified. However, if we are treeifying a block that belongs to a loop, we do + // not want to clone along control flow edges representing continuation or breaking + // out of an outer loop. On the other hand, if we reach a loop that is only reachable + // via the block being treeified, we do want to copy those, as each branch of the tree + // will need its own copy of that loop. + // + // To handle this, we require the ability to: + // + // * Identify loop headers (which requires the loops analysis) + // * Identify the reverse postorder index of a block (which requires the dominator tree) + // + // The dominator tree requires the control flow graph analysis, and the loop analysis + // requires the dominator tree - as a result, each time we modify the CFG, we must also + // ensure that all three analyses reflect any effects of such modifications. + // + // However, this would be very expensive to compute as frequently as would be required + // by this transformation. Instead, since the transformation is essentially just cloning + // multiple copies of various subgraphs of the original CFG, we can use the analyses of + // the original CFG as well, by mapping each copied block back to the block in the CFG + // from which it is derived. By doing so, we can determine if that block is a loop header, + // or how two blocks are sorted relative to each other in the reverse postorder, without + // having to ever recompute the three analyses mentioned above. + let mut block_infos = BlockInfos::new(cfg, domtree, loops); + + // For each block B, treeify B IFF it has multiple predecessors, where for each + // predecessor P, P appears before B in the reverse postorder sort of the CFG. + // Treeifying B involves creating a copy of B and the subgraph of the CFG rooted at B, + // for each P. + // + // The blocks are selected this way, since by splitting these nodes in the CFG, such + // that each predecessor gets its own copy of the subgraph reached via B, the CFG is + // made more tree-like. Once all nodes are split, then the CFG is either a tree, or + // a DAG that is almost a tree, with the only remaining DAG edges being loopback edges + // for loops that appear in the CFG. let mut block_q = VecDeque::::default(); let mut changed = false; - - for b in domtree.cfg_postorder().iter().copied() { - if loops.is_loop_header(b).is_some() { - // Ignore loop headers + for b in to_visit { + // Check if this block meets the conditions for treeification + let predecessors = block_infos + .cfg + .pred_iter(b) + .filter(|bp| block_infos.rpo_cmp(bp.block, b).is_lt()) + .collect::>(); + + if predecessors.len() < 2 { continue; } - // Blocks with multiple predecessors cause the CFG to form a DAG, - // we need to duplicate the CFG rooted at this block for all predecessors. - // - // While we could technically preserve one of the predecessors, we perform - // some transformations during the copy that would result in copied vs original - // trees to differ slightly, which would inhibit subsequent optimizations. - // The original subtree blocks are detached from the function. - if cfg.num_predecessors(b) > 1 { - for p in cfg.pred_iter(b) { - assert!(block_q.is_empty()); - block_q.push_back(CopyBlock::new(b, p)); - while let Some(CopyBlock { - b, - ref p, - value_map, - block_map, - }) = block_q.pop_front() + // For each predecessor, create a clone of B and all of its successors, with + // the exception of successors which are loop headers where the loop header + // appears before B in the reverse postorder sort of the CFG. Such edges are + // loopback edges to an outer loop, which must be preserved, even when cloning + // the subgraph rooted at B. + for p in predecessors { + assert!(block_q.is_empty()); + block_q.push_back(CopyBlock::new(b, p)); + let root = b; + + while let Some(CopyBlock { + b, + ref p, + value_map, + block_map, + }) = block_q.pop_front() + { + // If we enqueue a successor block to be copied, and that block is a loop header + // which appears before the root block in the CFG, then it is a loopback edge + // that escapes the portion of the CFG being treeified, and we do not want not + // actually copy it. + if block_infos.is_loop_header(b).is_some() + && block_infos.rpo_cmp(b, root).is_lt() { - // Copy this block and its children - if loops.is_loop_header(b).is_some() { - treeify_loop( - b, - p, - function, - &cfg, - &loops, - &mut block_q, - value_map, - block_map, - )?; - } else { - treeify( - b, - p, - function, - &cfg, - &loops, - &mut block_q, - value_map, - block_map, - )?; - } + continue; } - } - // After treeification, the original subtree blocks cannot possibly be - // referenced by other blocks in the function, so remove all of them - detach_tree(b, function, &cfg); + // Copy this block and its successors + treeify(b, p, function, &mut block_infos, &mut block_q, value_map, block_map)?; + } // Mark the control flow graph as modified changed = true; @@ -339,6 +413,21 @@ impl RewritePass for Treeify { // If we made any changes, we need to recompute all analyses if !changed { analyses.mark_all_preserved::(&function.id); + } else { + // Recompute the CFG and dominator tree and remove all unreachable blocks + let cfg = ControlFlowGraph::with_function(function); + let domtree = DominatorTree::with_function(function, &cfg); + let mut to_remove = vec![]; + for (b, _) in function.dfg.blocks() { + if domtree.is_reachable(b) { + continue; + } + to_remove.push(b); + } + // Remove all blocks from the function that were unreachable + for b in to_remove { + function.dfg.detach_block(b); + } } Ok(()) @@ -350,110 +439,74 @@ fn treeify( b: BlockId, p: &BlockPredecessor, function: &mut hir::Function, - cfg: &ControlFlowGraph, - loops: &LoopAnalysis, + block_infos: &mut BlockInfos, block_q: &mut VecDeque, mut value_map: ScopedMap, mut block_map: ScopedMap, ) -> anyhow::Result<()> { - // 1. Create a new block `b'`, without block arguments, + // Check if we're dealing with a loop header + let is_loop = block_infos.is_loop_header(b).is_some(); + + // 1. Create a new block `b'`, without block arguments, unless it is a loop header, + // in which case we want to preserve the block arguments, just with new value ids let b_prime = function.dfg.create_block_after(p.block); block_map.insert(b, b_prime); - // 2. Initialize a lookup table of old value defs to new value defs, seed it by mapping the - // block arguments of `b` to the values passed from the predecessor - match function.dfg.analyze_branch(p.inst) { - BranchInfo::SingleDest(_, args) => { - value_map.extend(function.dfg.block_args(b).iter().copied().zip(args.iter().copied())); + block_infos.insert_copy(b_prime, b); + + // 2. Remap values in the cloned block: + // + // * If this is a loop header, we need to replace references to the old block arguments with the + // new block arguments. + // * If this is not a loop header, then we need to replace references to the block arguments + // with the values which were passed as arguments in the predecessor block + if is_loop { + function.dfg.clone_block_params(b, b_prime); + for (src, dest) in function + .dfg + .block_params(b) + .iter() + .copied() + .zip(function.dfg.block_params(b_prime).iter().copied()) + { + value_map.insert(src, dest); } - BranchInfo::MultiDest(ref jts) => { - for jt in jts.iter() { - if jt.destination == b { - value_map.extend( - function.dfg.block_args(b).iter().copied().zip(jt.args.iter().copied()), - ); - break; - } + } else { + match function.dfg.analyze_branch(p.inst) { + BranchInfo::SingleDest(_, args) => { + value_map + .extend(function.dfg.block_args(b).iter().copied().zip(args.iter().copied())); } + BranchInfo::MultiDest(ref jts) => { + let jt = jts.iter().find(|jt| jt.destination == b).unwrap(); + value_map.extend( + function.dfg.block_args(b).iter().copied().zip(jt.args.iter().copied()), + ); + } + BranchInfo::NotABranch => unreachable!(), } - BranchInfo::NotABranch => unreachable!(), } - // 3. Update the predecessor instruction to reference the new block, remove block arguments. + + // 3. Update the predecessor instruction to reference the new block, remove block arguments if + // this is not a loop header. + let mut seen = false; // Only update the first occurrance of this predecessor update_predecessor(function, p, |dest, dest_args, pool| { - if *dest == b { + if *dest == b && !seen { + seen = true; *dest = b_prime; - dest_args.clear(pool); + if !is_loop { + dest_args.clear(pool); + } } }); - // 4. Copy contents of `b` to `b'`, inserting defs in the lookup table, and mapping operands to - // their new "corrected" values - copy_instructions(b, b_prime, function, &mut value_map, &block_map); - // 5. Recursively copy all children of `b` to `b_prime` - copy_children(b, b_prime, function, cfg, loops, block_q, value_map, block_map) -} + assert!(seen); -#[allow(clippy::too_many_arguments)] -fn treeify_loop( - b: BlockId, - p: &BlockPredecessor, - function: &mut hir::Function, - cfg: &ControlFlowGraph, - loops: &LoopAnalysis, - block_q: &mut VecDeque, - mut value_map: ScopedMap, - mut block_map: ScopedMap, -) -> anyhow::Result<()> { - // 1. Create new block, b', with a new set of block arguments matching the original, - // populate the value map with rewrites for the original block argument values - let b_prime = function.dfg.create_block_after(p.block); - block_map.insert(b, b_prime); - function.dfg.clone_block_params(b, b_prime); - for (src, dest) in function - .dfg - .block_params(b) - .iter() - .copied() - .zip(function.dfg.block_params(b_prime).iter().copied()) - { - value_map.insert(src, dest); - } - // 2. Update the predecessor instruction to reference the new block, leave block arguments - // unchanged - update_predecessor(function, p, |dest, _, _| { - if *dest == b { - *dest = b_prime; - } - }); - // 3. Copy contents of `b` to `b'`, inserting defs in the lookup table, and mapping operands to + // 4. Copy contents of `b` to `b'`, inserting defs in the lookup table, and mapping operands to // their new "corrected" values copy_instructions(b, b_prime, function, &mut value_map, &block_map); - // 4. Recursively copy all children of `b` to `b_prime` - copy_children(b, b_prime, function, cfg, loops, block_q, value_map, block_map) -} -/// Detach `root`, and all of it's reachable children, from the layout of `function` -/// -/// When called, it is assumed that `root` has been cloned to a new block, -/// along with all of it's reachable children, and its predecessor rewritten -/// to refer to the new block instead. As a result, `root` should no longer be -/// reachable in the CFG, along with its children, as they would have been cloned -/// as well. -/// -/// NOTE: This does not delete the block data attached to the function, only the -/// presence of the block in the layout of the function. -fn detach_tree(root: BlockId, function: &mut hir::Function, cfg: &ControlFlowGraph) { - let mut delete_q = VecDeque::::default(); - let mut visited = FxHashSet::::default(); - delete_q.push_back(root); - visited.insert(root); - while let Some(block) = delete_q.pop_front() { - function.dfg.detach_block(block); - for b in cfg.succ_iter(block) { - // Skip blocks we've already seen - if visited.insert(b) { - delete_q.push_back(b); - } - } - } + // 5. Clone the children of `b` and append to `b_prime`, but do not clone children of `b` that + // are loop headers, only clone the edge. + copy_children(b, b_prime, function, block_q, value_map, block_map) } #[allow(clippy::too_many_arguments)] @@ -461,8 +514,6 @@ fn copy_children( b: BlockId, b_prime: BlockId, function: &mut hir::Function, - cfg: &ControlFlowGraph, - loops: &LoopAnalysis, block_q: &mut VecDeque, value_map: ScopedMap, block_map: ScopedMap, @@ -471,21 +522,23 @@ fn copy_children( inst: function.dfg.last_inst(b_prime).expect("expected non-empty block"), block: b_prime, }; + let successors = match function.dfg.analyze_branch(function.dfg.last_inst(b).unwrap()) { + BranchInfo::NotABranch => return Ok(()), + BranchInfo::SingleDest(dest, _) => smallvec![dest], + BranchInfo::MultiDest(ref jts) => { + SmallVec::<[_; 2]>::from_iter(jts.iter().map(|jt| jt.destination)) + } + }; let value_map = Rc::new(value_map); let block_map = Rc::new(block_map); - for succ in cfg.succ_iter(b) { - // If we've already seen this successor, and it is a loop header, then - // we don't want to copy it, but we do want to replace the reference to - // this block with its copy + + for succ in successors { if let Some(succ_prime) = block_map.get(&succ) { - if loops.is_loop_header(succ).is_some() { - update_predecessor(function, &pred, |dest, _, _| { - if dest == &succ { - *dest = *succ_prime; - } - }); - continue; - } + update_predecessor(function, &pred, |dest, _, _| { + if dest == &succ { + *dest = *succ_prime; + } + }); } block_q.push_back(CopyBlock { @@ -643,6 +696,54 @@ where } } +struct BlockInfos { + blocks: BTreeMap, + cfg: Rc, + domtree: Rc, + loops: Rc, +} +impl BlockInfos { + pub fn new( + cfg: Rc, + domtree: Rc, + loops: Rc, + ) -> Self { + Self { + blocks: Default::default(), + cfg, + domtree, + loops, + } + } + + pub fn insert_copy(&mut self, copied: BlockId, original: BlockId) { + let resolved = self.to_original_block(original); + self.blocks.insert(copied, resolved); + } + + pub fn is_loop_header(&self, block_id: BlockId) -> Option { + let resolved = self.to_original_block(block_id); + self.loops.is_loop_header(resolved) + } + + pub fn rpo_cmp(&self, a: BlockId, b: BlockId) -> core::cmp::Ordering { + let a_orig = self.to_original_block(a); + let b_orig = self.to_original_block(b); + self.domtree.rpo_cmp_block(a_orig, b_orig) + } + + fn to_original_block(&self, mut block_id: BlockId) -> BlockId { + loop { + if let Some(copied_from) = self.blocks.get(&block_id).copied() { + block_id = copied_from; + continue; + } + + break block_id; + } + } +} + #[cfg(test)] mod tests { use midenc_hir::{