diff --git a/Cargo.toml b/Cargo.toml index cf02c73c..99947b3a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,5 +7,6 @@ members = [ "crates/parser", "crates/filecheck", "crates/triple", + "crates/macros", "crates/interpreter", ] diff --git a/crates/ir/Cargo.toml b/crates/ir/Cargo.toml index 0d6789d5..9888a450 100644 --- a/crates/ir/Cargo.toml +++ b/crates/ir/Cargo.toml @@ -20,5 +20,6 @@ smallvec = "1.7.0" rustc-hash = "2.0.0" dyn-clone = "1.0.4" sonatina-triple = { path = "../triple", version = "0.0.3-alpha" } +macros = { package = "sonatina-macros", path = "../macros", version = "0.0.3-alpha" } indexmap = "2.0.0" dot2 = { git = "https://github.com/sanpii/dot2.rs.git" } diff --git a/crates/ir/src/inst/arith.rs b/crates/ir/src/inst/arith.rs new file mode 100644 index 00000000..a198d90b --- /dev/null +++ b/crates/ir/src/inst/arith.rs @@ -0,0 +1,93 @@ +use macros::Inst; + +use crate::Value; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Neg { + #[inst(value)] + arg: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Add { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Mul { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Sub { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct Sdiv { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct Udiv { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct Umod { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct Smod { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Shl { + #[inst(value)] + bits: Value, + #[inst(value)] + value: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Shr { + #[inst(value)] + bits: Value, + #[inst(value)] + value: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Sar { + #[inst(value)] + bits: Value, + #[inst(value)] + value: Value, +} diff --git a/crates/ir/src/inst/cast.rs b/crates/ir/src/inst/cast.rs new file mode 100644 index 00000000..b184b1a0 --- /dev/null +++ b/crates/ir/src/inst/cast.rs @@ -0,0 +1,31 @@ +use macros::Inst; + +use crate::{Type, Value}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Sext { + #[inst(value)] + from: Value, + ty: Type, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Zext { + #[inst(value)] + from: Value, + ty: Type, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Trunc { + #[inst(value)] + from: Value, + ty: Type, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Bitcast { + #[inst(value)] + from: Value, + ty: Type, +} diff --git a/crates/ir/src/inst/cmp.rs b/crates/ir/src/inst/cmp.rs new file mode 100644 index 00000000..4e106ae6 --- /dev/null +++ b/crates/ir/src/inst/cmp.rs @@ -0,0 +1,89 @@ +use macros::Inst; + +use crate::Value; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Lt { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Gt { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Slt { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Sgt { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Le { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Ge { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Sle { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Sge { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Eq { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Ne { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct IsZero { + #[inst(value)] + lhs: Value, +} diff --git a/crates/ir/src/inst/control_flow.rs b/crates/ir/src/inst/control_flow.rs new file mode 100644 index 00000000..7e7fef65 --- /dev/null +++ b/crates/ir/src/inst/control_flow.rs @@ -0,0 +1,51 @@ +use macros::Inst; +use smallvec::SmallVec; + +use crate::{module::FuncRef, Block, Type, Value}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Inst)] +pub struct Jump { + dest: Block, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Inst)] +pub struct Br { + #[inst(value)] + cond: Value, + + z_dest: Block, + nz_dest: Block, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Inst)] +pub struct BrTable { + #[inst(value)] + scrutinee: Value, + #[inst(value)] + table: Vec<(Value, Block)>, + + default: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Inst)] +pub struct Phi { + #[inst(value)] + values: Vec<(Value, Block)>, + ty: Type, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct Call { + #[inst(value)] + args: SmallVec<[Value; 8]>, + callee: FuncRef, + ret_ty: Type, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct Return { + #[inst(value)] + arg: Option, +} diff --git a/crates/ir/src/inst/data.rs b/crates/ir/src/inst/data.rs new file mode 100644 index 00000000..c74a2ec1 --- /dev/null +++ b/crates/ir/src/inst/data.rs @@ -0,0 +1,25 @@ +use macros::Inst; +use smallvec::SmallVec; + +use crate::Value; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Mload { + #[inst(value)] + addr: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct Mstore { + #[inst(value)] + value: Value, + #[inst(value)] + addr: Value, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash, Inst)] +pub struct Gep { + #[inst(value)] + values: SmallVec<[Value; 8]>, +} diff --git a/crates/ir/src/inst/evm/inst_set.rs b/crates/ir/src/inst/evm/inst_set.rs new file mode 100644 index 00000000..8fa789b3 --- /dev/null +++ b/crates/ir/src/inst/evm/inst_set.rs @@ -0,0 +1,93 @@ +use crate::inst::*; +use macros::inst_set; + +#[inst_set(InstKind = "EvmInstKind")] +pub struct EvmInstSet( + arith::Add, + arith::Mul, + arith::Sub, + arith::Sdiv, + arith::Udiv, + arith::Umod, + arith::Smod, + arith::Shl, + arith::Shr, + arith::Sar, + cast::Sext, + cast::Zext, + cast::Trunc, + cast::Bitcast, + cmp::Lt, + cmp::Gt, + cmp::Slt, + cmp::Sgt, + cmp::Le, + cmp::Ge, + cmp::Sge, + cmp::Eq, + cmp::Ne, + cmp::IsZero, + control_flow::Jump, + control_flow::Br, + control_flow::Phi, + control_flow::BrTable, + control_flow::Call, + control_flow::Return, + data::Mload, + data::Mstore, + data::Gep, + logic::Not, + logic::And, + logic::Or, + logic::Xor, + evm::EvmStop, + evm::EvmAddMod, + evm::EvmMulMod, + evm::EvmExp, + evm::EvmByte, + evm::EvmKeccak256, + evm::EvmAddress, + evm::EvmBalance, + evm::EvmOrigin, + evm::EvmCaller, + evm::EvmCallValue, + evm::EvmCallDataLoad, + evm::EvmCallDataCopy, + evm::EvmCodeSize, + evm::EvmCodeCopy, + evm::EvmExtCodeCopy, + evm::EvmReturnDataSize, + evm::EvmReturnDataCopy, + evm::EvmExtCodeHash, + evm::EvmBlockHash, + evm::EvmCoinBase, + evm::EvmTimestamp, + evm::EvmNumber, + evm::EvmPrevRandao, + evm::EvmGasLimit, + evm::EvmChainId, + evm::EvmSelfBalance, + evm::EvmBaseFee, + evm::EvmBlobHash, + evm::EvmBlobBaseFee, + evm::EvmMstore8, + evm::EvmSload, + evm::EvmSstore, + evm::EvmMsize, + evm::EvmGas, + evm::EvmTload, + evm::EvmTstore, + evm::EvmLog0, + evm::EvmLog1, + evm::EvmLog2, + evm::EvmLog3, + evm::EvmLog4, + evm::EvmCreate, + evm::EvmCall, + evm::EvmReturn, + evm::EvmDelegateCall, + evm::EvmCreate2, + evm::EvmStaticCall, + evm::EvmRevert, + evm::EvmSelfDestruct, +); diff --git a/crates/ir/src/inst/evm/mod.rs b/crates/ir/src/inst/evm/mod.rs new file mode 100644 index 00000000..6b2b0af6 --- /dev/null +++ b/crates/ir/src/inst/evm/mod.rs @@ -0,0 +1,398 @@ +use macros::Inst; +pub mod inst_set; + +use crate::Value; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmStop {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmAddMod { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, + #[inst(value)] + modulus: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmMulMod { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, + #[inst(value)] + modulus: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmExp { + #[inst(value)] + base: Value, + #[inst(value)] + exponent: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmByte { + #[inst(value)] + pos: Value, + #[inst(value)] + value: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmKeccak256 { + #[inst(value)] + addr: Value, + #[inst(value)] + len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmAddress {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmBalance { + #[inst(value)] + contract_addr: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmOrigin {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmCaller {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmCallValue {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmCallDataLoad { + data_offset: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmCallDataCopy { + #[inst(value)] + dst_addr: Value, + #[inst(value)] + data_offset: Value, + #[inst(value)] + len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmCodeSize {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmCodeCopy { + #[inst(value)] + dst_addr: Value, + #[inst(value)] + code_offset: Value, + #[inst(value)] + len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmGasPrice {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmExtCodeSize { + #[inst(value)] + ext_addr: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmExtCodeCopy { + #[inst(value)] + ext_addr: Value, + #[inst(value)] + dst_addr: Value, + #[inst(value)] + code_offset: Value, + #[inst(value)] + len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmReturnDataSize {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmReturnDataCopy { + #[inst(value)] + dst_addr: Value, + #[inst(value)] + data_offset: Value, + #[inst(value)] + len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmExtCodeHash { + #[inst(value)] + ext_addr: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmBlockHash { + #[inst(value)] + block_num: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmCoinBase { + #[inst(value)] + block_num: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmTimestamp {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmNumber {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmPrevRandao {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmGasLimit {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmChainId {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmSelfBalance {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmBaseFee {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmBlobHash { + #[inst(value)] + idx: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmBlobBaseFee {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmMstore8 { + #[inst(value)] + addr: Value, + #[inst(value)] + val: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmSload { + #[inst(value)] + key: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmSstore { + #[inst(value)] + key: Value, + #[inst(value)] + val: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmMsize {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct EvmGas {} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmTload { + #[inst(value)] + key: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmTstore { + #[inst(value)] + key: Value, + #[inst(value)] + val: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmLog0 { + #[inst(value)] + addr: Value, + #[inst(value)] + len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmLog1 { + #[inst(value)] + addr: Value, + #[inst(value)] + len: Value, + #[inst(value)] + topic0: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmLog2 { + #[inst(value)] + addr: Value, + #[inst(value)] + len: Value, + #[inst(value)] + topic0: Value, + #[inst(value)] + topic1: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmLog3 { + #[inst(value)] + addr: Value, + #[inst(value)] + len: Value, + #[inst(value)] + topic0: Value, + #[inst(value)] + topic1: Value, + #[inst(value)] + topic2: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmLog4 { + #[inst(value)] + addr: Value, + #[inst(value)] + len: Value, + #[inst(value)] + topic0: Value, + #[inst(value)] + topic1: Value, + #[inst(value)] + topic2: Value, + #[inst(value)] + topic3: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmCreate { + #[inst(value)] + val: Value, + #[inst(value)] + addr: Value, + #[inst(value)] + len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmCall { + #[inst(value)] + gas: Value, + #[inst(value)] + addr: Value, + #[inst(value)] + val: Value, + #[inst(value)] + arg_addr: Value, + #[inst(value)] + arg_len: Value, + #[inst(value)] + ret_addr: Value, + #[inst(value)] + ret_offset: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmReturn { + #[inst(value)] + addr: Value, + #[inst(value)] + len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmDelegateCall { + #[inst(value)] + gas: Value, + #[inst(value)] + ext_addr: Value, + #[inst(value)] + arg_addr: Value, + #[inst(value)] + arg_len: Value, + #[inst(value)] + ret_addr: Value, + #[inst(value)] + ret_len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmCreate2 { + #[inst(value)] + val: Value, + #[inst(value)] + addr: Value, + #[inst(value)] + len: Value, + #[inst(value)] + salt: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmStaticCall { + #[inst(value)] + gas: Value, + #[inst(value)] + ext_addr: Value, + #[inst(value)] + arg_addr: Value, + #[inst(value)] + arg_len: Value, + #[inst(value)] + ret_addr: Value, + #[inst(value)] + ret_len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmRevert { + #[inst(value)] + addr: Value, + #[inst(value)] + len: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +#[inst(has_side_effect)] +pub struct EvmSelfDestruct { + #[inst(value)] + addr: Value, +} diff --git a/crates/ir/src/inst/inst_set.rs b/crates/ir/src/inst/inst_set.rs new file mode 100644 index 00000000..2a5d14df --- /dev/null +++ b/crates/ir/src/inst/inst_set.rs @@ -0,0 +1,219 @@ +use super::{arith, cast, cmp, control_flow, data, evm, logic, Inst}; + +use macros::define_inst_set_base; + +define_inst_set_base! { + /// This trait is used to determine whether a certain instruction set includes a specific inst in runtime. + /// If a certain instruction set `IS` implements `HasInst`, + /// the corresponding `has_i(&self) -> Option<&dyn HasInst>` method always returns `Some`. + /// + /// Since all instruction set implements `HasInst` if it containst `Inst`, + /// this trait is naturally intened to be used as a trait object. + /// + /// NOTE: Do NOT implement this trait manually, use `sonatina-macro::inst_set` instead. + trait InstSetBase { + arith::Neg, + arith::Add, + arith::Mul, + arith::Sub, + arith::Sdiv, + arith::Udiv, + arith::Umod, + arith::Smod, + arith::Shl, + arith::Shr, + arith::Sar, + cmp::Lt, + cmp::Gt, + cmp::Slt, + cmp::Sgt, + cmp::Le, + cmp::Ge, + cmp::Sle, + cmp::Sge, + cmp::Eq, + cmp::Ne, + cmp::IsZero, + logic::Not, + logic::And, + logic::Or, + logic::Xor, + cast::Sext, + cast::Zext, + cast::Trunc, + cast::Bitcast, + data::Mload, + data::Mstore, + data::Gep, + control_flow::Call, + control_flow::Jump, + control_flow::Br, + control_flow::BrTable, + control_flow::Return, + control_flow::Phi, + // Evm specific + evm::EvmStop, + evm::EvmAddMod, + evm::EvmMulMod, + evm::EvmExp, + evm::EvmByte, + evm::EvmKeccak256, + evm::EvmAddress, + evm::EvmBalance, + evm::EvmOrigin, + evm::EvmCaller, + evm::EvmCallValue, + evm::EvmCallDataLoad, + evm::EvmCallDataCopy, + evm::EvmCodeSize, + evm::EvmCodeCopy, + evm::EvmGasPrice, + evm::EvmExtCodeSize, + evm::EvmExtCodeCopy, + evm::EvmReturnDataSize, + evm::EvmReturnDataCopy, + evm::EvmExtCodeHash, + evm::EvmBlockHash, + evm::EvmCoinBase, + evm::EvmTimestamp, + evm::EvmNumber, + evm::EvmPrevRandao, + evm::EvmGasLimit, + evm::EvmChainId, + evm::EvmSelfBalance, + evm::EvmBaseFee, + evm::EvmBlobHash, + evm::EvmBlobBaseFee, + evm::EvmMstore8, + evm::EvmSload, + evm::EvmSstore, + evm::EvmMsize, + evm::EvmGas, + evm::EvmTload, + evm::EvmTstore, + evm::EvmLog0, + evm::EvmLog1, + evm::EvmLog2, + evm::EvmLog3, + evm::EvmLog4, + evm::EvmCreate, + evm::EvmCall, + evm::EvmReturn, + evm::EvmDelegateCall, + evm::EvmCreate2, + evm::EvmStaticCall, + evm::EvmRevert, + evm::EvmSelfDestruct, + } +} + +/// This trait provides the concrete mapping from `Inst` to corresponding enum variant. +/// All instruction set that are defined by `sonatina_macros::inst_set` automatically defines an enum which represents all instructions in the set. +/// e.g. +/// +/// ```rust,ignore +/// use sonatina_ir::inst::basic::*; +/// #[inst_set(InstKind = "InstKind")] +/// struct InstSet(Add, Sub); +/// ``` +/// defines +/// +/// ```rust +/// use sonatina_ir::inst::arith::*; +/// enum InstKind<'i> { +/// Add(&'i Add), +/// Sub(&'i Sub), +/// } +/// enum InstKindMut<'i> { +/// Add(&'i mut Add), +/// Sub(&'i mut Sub), +/// } +/// ``` +/// +/// Assuming that the all instructions are created with this instruction set, +/// the cast(resolution) from dynamic inst object to this enum always succeed. +/// +/// This macro provides the way to these safe downcast, and allow us to focus on the +/// restricted concrete instruction set, instead of "all possible" instructions. +/// +pub trait InstSetExt: InstSetBase { + type InstKind<'i>; + type InstKindMut<'i>; + + fn resolve_inst<'i>(&self, inst: &'i dyn Inst) -> Self::InstKind<'i>; + fn resolve_inst_mut<'i>(&self, inst: &'i mut dyn Inst) -> Self::InstKindMut<'i>; +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::Value; + use arith::*; + use control_flow::*; + use logic::*; + use macros::inst_set; + + #[inst_set(InstKind = "TestInstKind")] + struct TestInstSet(Add, Sub, Not, Phi, Jump); + + #[test] + fn ctor() { + let _ = TestInstSet::new(); + } + + #[test] + fn test_cast_isa() { + let inst_set = TestInstSet::new(); + assert!(inst_set.has_add().is_some()); + assert!(inst_set.has_sub().is_some()); + assert!(inst_set.has_not().is_some()); + assert!(inst_set.has_phi().is_some()); + assert!(inst_set.has_jump().is_some()); + + assert!(inst_set.has_lt().is_none()); + assert!(inst_set.has_br().is_none()); + } + + #[test] + fn inst_creation() { + let inst_set = TestInstSet::new(); + let v = Value::from_u32(1); + let _add = Add::new(&inst_set, v, v); + let _sub = Sub::new(&inst_set, v, v); + } + + #[test] + fn inst_resolution() { + let inst_set = TestInstSet::new(); + let mut insts: Vec> = Vec::new(); + + let value = Value::from_u32(1); + let add = Add::new(&inst_set, value, value); + insts.push(Box::new(add)); + let sub = Sub::new(&inst_set, value, value); + insts.push(Box::new(sub)); + let not = Not::new(&inst_set, value); + insts.push(Box::new(not)); + + let resolved = inst_set.resolve_inst(insts[0].as_ref()); + assert!(matches!(resolved, TestInstKind::Add(_))); + let resolved = inst_set.resolve_inst(insts[1].as_ref()); + assert!(matches!(resolved, TestInstKind::Sub(_))); + let resolved = inst_set.resolve_inst(insts[2].as_ref()); + assert!(matches!(resolved, TestInstKind::Not(_))); + + let resolved = inst_set.resolve_inst_mut(insts[0].as_mut()); + assert!(matches!(resolved, TestInstKindMut::Add(_))); + let resolved = inst_set.resolve_inst_mut(insts[1].as_mut()); + assert!(matches!(resolved, TestInstKindMut::Sub(_))); + let resolved = inst_set.resolve_inst_mut(insts[2].as_mut()); + assert!(matches!(resolved, TestInstKindMut::Not(_))); + } +} + +pub(super) mod sealed { + /// This trait has two roles, + /// 1. works as a sealed trait. + /// 2. ensure that an `Inst` is definitely registered to the `InstGroup`. + pub trait Registered {} +} diff --git a/crates/ir/src/inst/logic.rs b/crates/ir/src/inst/logic.rs new file mode 100644 index 00000000..e617bdff --- /dev/null +++ b/crates/ir/src/inst/logic.rs @@ -0,0 +1,33 @@ +use macros::Inst; + +use crate::Value; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Not { + #[inst(value)] + arg: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct And { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Or { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Inst)] +pub struct Xor { + #[inst(value)] + lhs: Value, + #[inst(value)] + rhs: Value, +} diff --git a/crates/ir/src/inst/mod.rs b/crates/ir/src/inst/mod.rs new file mode 100644 index 00000000..4843418e --- /dev/null +++ b/crates/ir/src/inst/mod.rs @@ -0,0 +1,114 @@ +pub mod arith; +pub mod cast; +pub mod cmp; +pub mod control_flow; +pub mod data; +pub mod evm; +pub mod inst_set; +pub mod logic; + +use std::any::{Any, TypeId}; + +use smallvec::SmallVec; + +use crate::Value; + +pub trait Inst: inst_set::sealed::Registered + Any { + fn visit_values(&self, f: &mut dyn FnMut(Value)); + fn visit_values_mut(&mut self, f: &mut dyn FnMut(&mut Value)); + fn has_side_effect(&self) -> bool; + fn as_text(&self) -> &'static str; +} + +/// This trait works as a "proof" that a specific ISA contains `I`, +/// and then allows a construction and reflection of type `I` in that specific ISA context. +pub trait HasInst { + fn is(&self, inst: &dyn Inst) -> bool { + inst.type_id() == TypeId::of::() + } +} + +pub(crate) trait ValueVisitable { + fn visit_with(&self, f: &mut dyn FnMut(Value)); + fn visit_mut_with(&mut self, f: &mut dyn FnMut(&mut Value)); +} + +impl ValueVisitable for Value { + fn visit_with(&self, f: &mut dyn FnMut(Value)) { + f(*self) + } + + fn visit_mut_with(&mut self, f: &mut dyn FnMut(&mut Value)) { + f(self) + } +} + +impl ValueVisitable for Option +where + V: ValueVisitable, +{ + fn visit_with(&self, f: &mut dyn FnMut(Value)) { + if let Some(value) = self { + value.visit_with(f) + } + } + + fn visit_mut_with(&mut self, f: &mut dyn FnMut(&mut Value)) { + if let Some(value) = self.as_mut() { + value.visit_mut_with(f) + } + } +} + +impl ValueVisitable for (V, T) +where + V: ValueVisitable, +{ + fn visit_with(&self, f: &mut dyn FnMut(Value)) { + self.0.visit_with(f) + } + + fn visit_mut_with(&mut self, f: &mut dyn FnMut(&mut Value)) { + self.0.visit_mut_with(f) + } +} + +impl ValueVisitable for Vec +where + V: ValueVisitable, +{ + fn visit_with(&self, f: &mut dyn FnMut(Value)) { + self.iter().for_each(|v| v.visit_with(f)) + } + + fn visit_mut_with(&mut self, f: &mut dyn FnMut(&mut Value)) { + self.iter_mut().for_each(|v| v.visit_mut_with(f)) + } +} + +impl ValueVisitable for [V] +where + V: ValueVisitable, +{ + fn visit_with(&self, f: &mut dyn FnMut(Value)) { + self.iter().for_each(|v| v.visit_with(f)) + } + + fn visit_mut_with(&mut self, f: &mut dyn FnMut(&mut Value)) { + self.iter_mut().for_each(|v| v.visit_mut_with(f)) + } +} + +impl ValueVisitable for SmallVec<[V; N]> +where + V: ValueVisitable, + [V; N]: smallvec::Array, +{ + fn visit_with(&self, f: &mut dyn FnMut(Value)) { + self.iter().for_each(|v| v.visit_with(f)) + } + + fn visit_mut_with(&mut self, f: &mut dyn FnMut(&mut Value)) { + self.iter_mut().for_each(|v| v.visit_mut_with(f)) + } +} diff --git a/crates/ir/src/lib.rs b/crates/ir/src/lib.rs index d36813f2..0635e983 100644 --- a/crates/ir/src/lib.rs +++ b/crates/ir/src/lib.rs @@ -6,6 +6,7 @@ pub mod function; pub mod global_variable; pub mod graphviz; pub mod insn; +pub mod inst; pub mod ir_writer; pub mod isa; pub mod layout; @@ -24,8 +25,21 @@ pub use function::{Function, Signature}; pub use global_variable::{GlobalVariable, GlobalVariableData}; pub use graphviz::render_to; pub use insn::{BranchInfo, DataLocationKind, Insn, InsnData}; +pub use inst::{ + inst_set::{InstSetBase, InstSetExt}, + HasInst, Inst, +}; pub use layout::Layout; pub use linkage::Linkage; pub use module::Module; pub use types::Type; pub use value::{Immediate, Value, ValueData}; + +pub(crate) use inst::ValueVisitable; + +pub mod prelude { + pub use crate::inst::{ + inst_set::{InstSetBase, InstSetExt}, + HasInst, Inst, + }; +} diff --git a/crates/macros/Cargo.toml b/crates/macros/Cargo.toml new file mode 100644 index 00000000..0952fe7a --- /dev/null +++ b/crates/macros/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "sonatina-macros" +version = "0.0.3-alpha" +edition = "2021" +authors = ["Sonatina Developers"] +license = "Apache-2.0" +readme = "../../README.md" +homepage = "https://github.com/fe-lang/sonatina/tree/main/crates/ir" +repository = "https://github.com/fe-lang/sonatina" +categories = ["compilers", "wasm"] +keywords = ["compiler", "evm", "wasm", "smart-contract"] + +[lib] +proc-macro = true + +[dependencies] +syn = { version = "1.0", features = ["full"] } +proc-macro2 = "1.0" +quote = "1.0" diff --git a/crates/macros/src/inst.rs b/crates/macros/src/inst.rs new file mode 100644 index 00000000..8834ae4e --- /dev/null +++ b/crates/macros/src/inst.rs @@ -0,0 +1,226 @@ +use super::convert_to_snake; + +use quote::quote; + +pub fn derive_inst(item: proc_macro::TokenStream) -> proc_macro::TokenStream { + let item_struct = syn::parse_macro_input!(item as syn::ItemStruct); + + match InstStruct::new(item_struct).and_then(InstStruct::build) { + Ok(impls) => quote! { + #impls + } + .into(), + + Err(e) => e.to_compile_error().into(), + } +} + +struct InstStruct { + struct_name: syn::Ident, + has_side_effect: bool, + fields: Vec, +} + +struct InstField { + ident: syn::Ident, + ty: syn::Type, + value: bool, +} + +impl InstStruct { + fn new(item_struct: syn::ItemStruct) -> syn::Result { + let has_side_effect = Self::check_side_effect_attr(&item_struct)?; + + let struct_ident = item_struct.ident; + + let fields = Self::parse_fields(&item_struct.fields)?; + + if item_struct.generics.lt_token.is_some() { + return Err(syn::Error::new_spanned( + item_struct.generics, + "generics is not allowed for inst types", + )); + } + + Ok(Self { + struct_name: struct_ident, + has_side_effect, + fields, + }) + } + + fn build(self) -> syn::Result { + let ctor = self.make_ctor(); + let accessors = self.make_accessors(); + let cast_fn = self.make_cast_fn(); + + let struct_name = &self.struct_name; + let impl_inst = self.impl_inst(); + Ok(quote! { + impl #struct_name { + #ctor + + #accessors + + #cast_fn + } + + #impl_inst + }) + } + + fn check_side_effect_attr(item_struct: &syn::ItemStruct) -> syn::Result { + let mut has_side_effect = false; + + for attr in &item_struct.attrs { + if attr.path.is_ident("inst") { + let meta = attr.parse_args::()?; + if let syn::Meta::Path(path) = meta { + if path.is_ident("has_side_effect") { + has_side_effect = true; + } + } + } + } + + Ok(has_side_effect) + } + + fn parse_fields(fields: &syn::Fields) -> syn::Result> { + let syn::Fields::Named(fields) = fields else { + return Err(syn::Error::new_spanned( + fields, + "tuple struct is not allowed for inst types", + )); + }; + + let mut inst_fields = Vec::new(); + + for field in &fields.named { + let mut value = false; + + if !matches!(field.vis, syn::Visibility::Inherited) { + return Err(syn::Error::new_spanned( + field, + "public visibility is not allowed", + )); + } + + for attr in &field.attrs { + if attr.path.is_ident("inst") { + let meta = attr.parse_args::()?; + if let syn::Meta::Path(path) = meta { + if path.is_ident("value") { + value = true; + } else { + return Err(syn::Error::new_spanned(attr, "only `value` is allowed")); + } + } + } + } + + inst_fields.push(InstField { + ident: field.ident.clone().unwrap(), + ty: field.ty.clone(), + value, + }); + } + + Ok(inst_fields) + } + + fn make_ctor(&self) -> proc_macro2::TokenStream { + let ctor_args = self.fields.iter().map(|f| { + let ident = &f.ident; + let ty = &f.ty; + quote! {#ident: #ty} + }); + + let field_names = self.fields.iter().map(|f| &f.ident); + quote! { + #[allow(clippy::too_many_arguments)] + pub fn new(hi: &dyn crate::HasInst, #(#ctor_args),*) -> Self { + Self { + #(#field_names: #field_names),* + } + } + } + } + + fn make_accessors(&self) -> proc_macro2::TokenStream { + let accessors = self.fields.iter().map(|f| { + let ident = &f.ident; + let ty = &f.ty; + let getter = quote::format_ident!("{ident}"); + let get_mut = quote::format_ident!("{ident}_mut"); + quote! { + pub fn #getter(&self) -> &#ty { + &self.#ident + } + + pub fn #get_mut(&mut self) -> &mut #ty{ + &mut self.#ident + } + } + }); + + quote! { + #(#accessors)* + } + } + + fn make_cast_fn(&self) -> proc_macro2::TokenStream { + quote! { + pub fn cast<'i>(hi: &dyn crate::HasInst, inst: &'i dyn crate::Inst) -> Option<&'i Self> { + if hi.is(inst) { + unsafe { Some(&*(inst as *const dyn crate::Inst as *const Self)) } + } else { + None + } + } + + pub fn cast_mut<'i>( + hi: &dyn crate::HasInst, + inst: &'i mut dyn crate::Inst, + ) -> Option<&'i mut Self> { + if hi.is(inst) { + unsafe { Some(&mut *(inst as *mut dyn crate::Inst as *mut Self)) } + } else { + None + } + } + } + } + + fn impl_inst(&self) -> proc_macro2::TokenStream { + let struct_name = &self.struct_name; + let has_side_effect = self.has_side_effect; + let visit_fields: Vec<_> = self + .fields + .iter() + .filter(|f| f.value) + .map(|f| &f.ident) + .collect(); + let text_form = convert_to_snake(&self.struct_name.to_string()); + + quote! { + impl crate::Inst for #struct_name { + fn visit_values(&self, f: &mut dyn FnMut(crate::Value)) { + #(crate::ValueVisitable::visit_with(&self.#visit_fields, (f));)* + } + + fn visit_values_mut(&mut self, f: &mut dyn FnMut(&mut crate::Value)) { + #(crate::ValueVisitable::visit_mut_with(&mut self.#visit_fields, (f));)* + } + + fn has_side_effect(&self) -> bool { + #has_side_effect + } + + fn as_text(&self) -> &'static str { + #text_form + } + } + } + } +} diff --git a/crates/macros/src/inst_set.rs b/crates/macros/src/inst_set.rs new file mode 100644 index 00000000..49a78cdf --- /dev/null +++ b/crates/macros/src/inst_set.rs @@ -0,0 +1,273 @@ +use quote::quote; + +use crate::{convert_to_snake, inst_set_base}; + +pub fn define_inst_set( + attr: proc_macro::TokenStream, + item: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + let attr_args = syn::parse_macro_input!(attr as syn::AttributeArgs); + let item_struct = syn::parse_macro_input!(item as syn::ItemStruct); + + match InstSet::new(attr_args, item_struct).and_then(InstSet::build) { + Ok(impls) => quote! { + #impls + } + .into(), + + Err(e) => e.to_compile_error().into(), + } +} + +struct InstSet { + vis: syn::Visibility, + ident: syn::Ident, + insts: Vec, + inst_kind_name: syn::Ident, + inst_kind_mut_name: syn::Ident, +} + +impl InstSet { + fn new(args: Vec, s: syn::ItemStruct) -> syn::Result { + let ident = s.ident; + let vis = s.vis; + let insts = Self::parse_insts(&s.fields)?; + let inst_kind_ident = Self::parse_inst_kind_name(&args)?; + let inst_kind_mut_ident = quote::format_ident!("{inst_kind_ident}Mut"); + + Ok(Self { + vis, + ident, + insts, + inst_kind_name: inst_kind_ident, + inst_kind_mut_name: inst_kind_mut_ident, + }) + } + + fn build(self) -> syn::Result { + let inst_set = self.define_inst_set(); + let inherent_methods = self.impl_inherent_methods(); + + let has_inst_impls = self.impl_has_inst(); + let inst_set_base_impl = self.impl_inst_set_base(); + let inst_set_ext_impl = self.impl_inst_set_ext(); + + let inst_kind = self.define_inst_kind(); + + Ok(quote! { + #inst_set + #inherent_methods + + #has_inst_impls + #inst_set_base_impl + #inst_set_ext_impl + + #inst_kind + }) + } + + fn parse_inst_kind_name(args: &[syn::NestedMeta]) -> syn::Result { + let make_err = || { + Err(syn::Error::new( + proc_macro2::Span::call_site(), + "`#[inst_set(InstKind = \"{InstKindName}\")]` is required", + )) + }; + + if args.len() != 1 { + return make_err(); + } + + let syn::NestedMeta::Meta(syn::Meta::NameValue(name_value)) = &args[0] else { + return make_err(); + }; + + let inst_kind_name = match (name_value.path.get_ident(), &name_value.lit) { + (Some(ident), syn::Lit::Str(s)) if ident == "InstKind" => s.value(), + _ => return make_err(), + }; + + Ok(syn::Ident::new( + &inst_kind_name, + proc_macro2::Span::call_site(), + )) + } + + fn parse_insts(fields: &syn::Fields) -> syn::Result> { + let syn::Fields::Unnamed(fields) = fields else { + return Err(syn::Error::new_spanned( + fields, + "only tuple struct is allowed", + )); + }; + + let mut insts = Vec::with_capacity(fields.unnamed.len()); + for f in fields.unnamed.iter() { + let syn::Type::Path(p) = &f.ty else { + return Err(syn::Error::new_spanned( + f, + "expected path to inst type here", + )); + }; + insts.push(p.path.clone()); + } + + Ok(insts) + } + + fn define_inst_set(&self) -> proc_macro2::TokenStream { + let ident = &self.ident; + let vis = &self.vis; + quote! { + #vis struct #ident { + #[allow(clippy::type_complexity)] + table: ::rustc_hash::FxHashMap< + std::any::TypeId, + ( + &'static for<'i> fn(&Self, &'i dyn crate::Inst) -> ::InstKind<'i>, + &'static for<'i> fn( + &Self, + &'i mut dyn crate::Inst, + ) -> ::InstKindMut<'i>, + ), + >, + + } + } + } + + fn define_inst_kind(&self) -> proc_macro2::TokenStream { + let lt = syn::Lifetime::new("'i", proc_macro2::Span::call_site()); + + let variants = self.insts.iter().map(|p| { + let variant_name = self.variant_name_from_inst_path(p); + quote! { #variant_name(&#lt #p) } + }); + let variants_mut = self.insts.iter().map(|p| { + let variant_name = self.variant_name_from_inst_path(p); + quote! { #variant_name(&#lt mut #p) } + }); + + let inst_kind_name = &self.inst_kind_name; + let inst_kind_mut_name = quote::format_ident!("{inst_kind_name}Mut"); + + let vis = &self.vis; + + quote! { + #vis enum #inst_kind_name<#lt> { + #(#variants),* + } + + #vis enum #inst_kind_mut_name<#lt> { + #(#variants_mut),* + } + } + } + + fn impl_inherent_methods(&self) -> proc_macro2::TokenStream { + let insert_table_ent = |p: &syn::Path| { + let inst_name_snake = convert_to_snake(&p.segments.last().unwrap().ident.to_string()); + let cast_fn_name = quote::format_ident!("cast_{inst_name_snake}"); + let cast_mut_fn_name = quote::format_ident!("{cast_fn_name}_mut"); + let ident = &self.ident; + let inst_kind_name = &self.inst_kind_name; + let inst_kind_mut_name = &self.inst_kind_mut_name; + let variant_name = self.variant_name_from_inst_path(p); + + quote! { + let tid = std::any::TypeId::of::<#p>(); + fn #cast_fn_name<'i>(self_: &#ident, inst: &'i dyn crate::Inst) -> #inst_kind_name<'i> { + let inst = #p::cast(self_, inst).unwrap(); + #inst_kind_name::#variant_name(inst) + } + fn #cast_mut_fn_name<'i>(self_: &#ident, inst: &'i mut dyn crate::Inst) -> #inst_kind_mut_name<'i> { + let inst = #p::cast_mut(self_, inst).unwrap(); + #inst_kind_mut_name::#variant_name(inst) + } + + let f: &'static for<'a, 'i> fn(&'a #ident, &'i dyn crate::Inst) -> #inst_kind_name<'i> = + &(#cast_fn_name as for<'a, 'i> fn(&'a #ident, &'i dyn crate::Inst) -> #inst_kind_name<'i>); + let f_mut: &'static for<'a, 'i> fn(&'a #ident, &'i mut dyn crate::Inst) -> #inst_kind_mut_name<'i> = + &(#cast_mut_fn_name as for<'a, 'i> fn(&'a #ident, &'i mut dyn crate::Inst) -> #inst_kind_mut_name<'i>); + table.insert(tid, (f, f_mut)); + + } + }; + + let insert_ents = self.insts.iter().map(insert_table_ent); + let ctor = quote! { + pub(crate) fn new() -> Self { + let mut table = ::rustc_hash::FxHashMap::default(); + #(#insert_ents)* + Self { table } + } + }; + + let ident = &self.ident; + quote! { + impl #ident { + #ctor + } + } + } + + fn impl_has_inst(&self) -> proc_macro2::TokenStream { + let ident = &self.ident; + let impls = self.insts.iter().map(|p| { + quote! { + impl crate::HasInst<#p> for #ident {} + } + }); + + quote! { + #(#impls)* + } + } + + fn impl_inst_set_base(&self) -> proc_macro2::TokenStream { + let methods = self.insts.iter().map(|p| { + let method_name = inst_set_base::path_to_method_name(p); + quote! { + fn #method_name(&self) -> Option<&dyn crate::HasInst<#p>> { + Some(self) + } + } + }); + + let ident = &self.ident; + quote! { + impl crate::InstSetBase for #ident { + #(#methods)* + } + } + } + + fn impl_inst_set_ext(&self) -> proc_macro2::TokenStream { + let ident = &self.ident; + let inst_kind_name = &self.inst_kind_name; + let inst_kind_mut_name = &self.inst_kind_mut_name; + + quote! { + impl crate::prelude::InstSetExt for #ident { + type InstKind<'i> = #inst_kind_name<'i>; + type InstKindMut<'i> = #inst_kind_mut_name<'i>; + + fn resolve_inst<'i>(&self, inst: &'i dyn crate::Inst) -> Self::InstKind<'i> { + let tid = inst.type_id(); + debug_assert!(self.table.contains_key(&tid)); + self.table[&tid].0(self, inst) + } + + fn resolve_inst_mut<'i>(&self, inst: &'i mut dyn crate::Inst) -> Self::InstKindMut<'i> { + let tid = (*inst).type_id(); + debug_assert!(self.table.contains_key(&tid)); + self.table[&tid].1(self, inst) + } + } + } + } + + fn variant_name_from_inst_path<'a>(&self, p: &'a syn::Path) -> &'a syn::Ident { + &p.segments.last().unwrap().ident + } +} diff --git a/crates/macros/src/inst_set_base.rs b/crates/macros/src/inst_set_base.rs new file mode 100644 index 00000000..6801c54f --- /dev/null +++ b/crates/macros/src/inst_set_base.rs @@ -0,0 +1,84 @@ +use quote::quote; + +use crate::convert_to_snake; + +pub fn define_inst_set_base(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + let def = syn::parse_macro_input! {input as TraitDefinition}; + match def.build() { + Ok(ts) => quote! {#ts}.into(), + Err(e) => e.to_compile_error().into(), + } +} + +struct TraitDefinition { + attrs: Vec, + insts: syn::punctuated::Punctuated, +} + +impl TraitDefinition { + fn build(self) -> syn::Result { + let trait_def = self.define_trait(); + let impls = self.impl_registered(); + + Ok(quote! { + #trait_def + #impls + }) + } + + fn define_trait(&self) -> proc_macro2::TokenStream { + let methods = self.insts.iter().map(|path| { + let method_name = path_to_method_name(path); + quote! { + fn #method_name(&self) -> Option<&dyn crate::HasInst<#path>> { None } + } + }); + let attrs = &self.attrs; + + quote! { + #(#attrs)* + pub trait InstSetBase { + #(#methods)* + } + } + } + + fn impl_registered(&self) -> proc_macro2::TokenStream { + let impls = self.insts.iter().map(|path| { + quote! { + impl crate::inst::inst_set::sealed::Registered for #path {} + } + }); + + quote! { + #(#impls)* + } + } +} + +impl syn::parse::Parse for TraitDefinition { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let attrs = input.call(syn::Attribute::parse_outer)?; + input.parse::()?; + let ident = input.parse::()?; + if ident != "InstSetBase" { + return Err(syn::Error::new_spanned( + ident, + "the trait name must be `InstSetBase`", + )); + } + let content; + syn::braced!(content in input); + + let insts = + syn::punctuated::Punctuated::::parse_terminated(&content)?; + + Ok(Self { attrs, insts }) + } +} + +pub(super) fn path_to_method_name(p: &syn::Path) -> syn::Ident { + let ident = &p.segments.last().as_ref().unwrap().ident; + let s_ident = convert_to_snake(&ident.to_string()); + quote::format_ident!("has_{s_ident}") +} diff --git a/crates/macros/src/lib.rs b/crates/macros/src/lib.rs new file mode 100644 index 00000000..8b850045 --- /dev/null +++ b/crates/macros/src/lib.rs @@ -0,0 +1,112 @@ +mod inst; +mod inst_set; +mod inst_set_base; + +/// A derive macro to define each instruction type. +/// This macro dervies the `Isnt` trait for the macro, +/// and implements a consructor and acccessors for each fields. +/// +/// # Usage +/// ```rust, ignore +/// use sonatina_macros::Inst; +/// +/// #[derive(Inst)] +/// #[inst(has_side_effect)] +/// struct MStore { +/// #[inst(value)] +/// lhs: Value, +/// #[inst(value)] +/// rhs: Value, +/// } +/// ``` +/// +/// # Arguments +/// - `has_side_effect`: Marks the instruction as having a side effect. +/// - `value`: Marks the field that contains value, +/// the specified field must implements `sonatina-ir::inst::ValueVisitable` trait. +/// +/// # Usage +#[proc_macro_derive(Inst, attributes(inst))] +pub fn derive_inst(item: proc_macro::TokenStream) -> proc_macro::TokenStream { + inst::derive_inst(item) +} + +#[proc_macro] +pub fn define_inst_set_base(input: proc_macro::TokenStream) -> proc_macro::TokenStream { + inst_set_base::define_inst_set_base(input) +} + +/// A macro to define an instruction set that is specific to an target arch. +/// In sonatina, an InstructionSet is defined as a type that implements `HasInst<{Inst}>` for all `{Inst}` it contains, +/// and also implements `InstSetBase` and `InstSetExt`. +/// This macro automatically implements these traits and modify the type definition to enable an effective cast of instruction. +/// +/// # Usage +/// ```rust, ignore +/// #[inst_set(InstKind = "TestInstKind")] +/// struct TestInstSet(Add, Sub); +/// ``` +/// +/// # Arguments +/// ## InstKind = "TestInstKind"` +/// This arguments specifies an `enum` used in `InstSetExt::InstKind`. This enum is also generated automatically. +/// In the abobe example, the below enum is generated, and can be obtained via `InstSetExt::resolve_inst` method. +/// ```rust, ignore +/// enum TestInstKind<'i> { +/// Add(&'i Add), +/// Sub(&'i Sub), +/// } +/// ``` +#[proc_macro_attribute] +pub fn inst_set( + attr: proc_macro::TokenStream, + input: proc_macro::TokenStream, +) -> proc_macro::TokenStream { + inst_set::define_inst_set(attr, input) +} + +/// Converts a given string to snake case. +/// +/// The function iterates through each character in the string. If the character is uppercase, +/// it checks if the previous character was also uppercase. If it wasn't, it adds an underscore before +/// the current character. It then converts the character to lowercase and adds it to the result string. +/// e.g., +/// * `FooBar -> foo_bar` +/// * `FooBAR -> foo_bar` +fn convert_to_snake(s: &str) -> String { + let mut res = String::new(); + let mut is_upper = false; + for (i, c) in s.chars().enumerate() { + if c.is_uppercase() { + if !is_upper && i != 0 { + res.push('_'); + } + is_upper = true; + } else { + is_upper = false; + } + + res.push(c.to_ascii_lowercase()); + } + + res +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_convert_to_snake() { + let snake = "foo_bar_baz"; + assert_eq!(convert_to_snake("FooBarBaz"), snake); + assert_eq!(convert_to_snake("FOoBarBaz"), snake); + assert_eq!(convert_to_snake("FOoBArBAZ"), snake); + } + + #[test] + fn test_convert_to_snake2() { + let snake = "foo"; + assert_eq!(convert_to_snake("Foo"), snake); + } +}