diff --git a/README.md b/README.md index 9faf61d9..b548d0c4 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,129 @@ -# regorus +# Regorus -THIS REPOSITORY IS IN ACTIVE DEVELOPMENT AND NOT INTENDED FOR PRODUCTION USE. +**Regorus** is + + - *Rego*-*Rus(t)* - A fast, light-weight [Rego](https://www.openpolicyagent.org/docs/latest/policy-language/) interpreter written in Rust. + - *Rigorous* - A rigorous enforcer of well-defined Rego semantics. + +Regorus is available as a library that can be easily integrated into your Rust projects. + + +> **Warning** +> While Regorus is highly performant and can interpret complex Rego policies, it does not yet pass the full [OPA test-suite](https://www.openpolicyagent.org/docs/latest/ir/#test-suite). +> We are actively working to achieve full OPA compliance. Meanwhile, Regorus should be considered +> **experimental and used with discretion**. + +## Getting Started + +[regorus](examples/regorus.rs) is an example program that shows how to integrate Regorus into your project and evaluate Rego policies. + +To build it, do + + cargo build -r --example regorus + + +Check that the regorus example program is working + + $ target/release/examples/regorus + Usage: regorus + + Commands: + eval Evaluate a Rego Query + lex Tokenize a Rego policy + parse Parse q Rego policy + help Print this message or the help of the given subcommand(s) + + Options: + -h, --help Print help + -V, --version Print version + + +First, let's evaluate a simple Rego expression `1*2+3` + + target/release/examples/regorus eval "1*2+3" + +This produces the following output + + { + "result": [ + { + "expressions": [ + { + "value": 5, + "text": "1*2+3", + "location": { + "row": 1, + "col": 1 + } + } + ] + } + ] + } + +Next, evaluate a sample [policy](examples/example.rego) and [input](examples/input.json) (borrowed from [Rego tutorial](https://www.openpolicyagent.org/docs/latest/#2-try-opa-eval)): + + target/release/examples/regorus eval -d examples/example.rego -i examples/input.json data.example + +Finally, evaluate real-world [policies](tests/aci/) used in Azure Container Instances (ACI) + + target/release/examples/regorus eval -d tests/aci/framework.rego \ + -d tests/aci/policy.rego \ + -d tests/aci/api.rego \ + -d tests/aci/data.json \ + -i tests/aci/input.json \ + data.policy.mount_overlay=x + + +## ACI Policies + +Regorus successfully passes the ACI policy test-suite. It is fast and can run each of the tests in a few milliseconds. + + $ cargo test -r --test aci + Finished release [optimized + debuginfo] target(s) in 0.05s + Running tests/aci/main.rs (target/release/deps/aci-2cd8d21a893a2450) + aci/mount_device passed 3.863292ms + aci/mount_overlay passed 3.6905ms + aci/scratch_mount passed 3.643041ms + aci/create_container passed 5.046333ms + aci/shutdown_container passed 3.632ms + aci/scratch_unmount passed 3.631333ms + aci/unmount_overlay passed 3.609916ms + aci/unmount_device passed 3.626875ms + aci/load_fragment passed 4.045167ms + +Run the ACI policies in the `tests/aci` directory, using data `tests/aci/data.json` and input `tests/aci/input.json`: + + target/release/examples/regorus eval \ + -b tests/aci \ + -d tests/aci/data.json \ + -i tests/aci/input.json \ + data.framework.mount_overlay=x + +Verify that [OPA](https://github.com/open-policy-agent/opa/releases) produces the same output + + diff <(target/release/examples/regorus eval -b tests/aci -d tests/aci/data.json -i tests/aci/input.json data.framework.mount_overlay=x) <(opa eval -b tests/aci -d tests/aci/data.json -i tests/aci/input.json data.framework.mount_overlay=x) + +## Performance + +To check how fast Regorus runs on your system, first install a tool like [hyperfine](https://github.com/sharkdp/hyperfine). + + cargo install hyperfine + +Then benchmark evaluation of the ACI policies, + + $ hyperfine "target/release/examples/regorus eval -b tests/aci -d tests/aci/data.json -i tests/aci/input.json data.framework.mount_overlay=x" + Benchmark 1: target/release/examples/regorus eval -b tests/aci -d tests/aci/data.json -i tests/aci/input.json data.framework.mount_overlay=x + Time (mean ± σ): 4.6 ms ± 0.2 ms [User: 4.1 ms, System: 0.4 ms] + Range (min … max): 4.4 ms … 6.0 ms 422 runs + +Compare it with OPA + + $ hyperfine "opa eval -b tests/aci -d tests/aci/data.json -i tests/aci/input.json data.framework.mount_overlay=x" + Benchmark 1: opa eval -b tests/aci -d tests/aci/data.json -i tests/aci/input.json data.framework.mount_overlay=x + Time (mean ± σ): 45.2 ms ± 0.6 ms [User: 68.8 ms, System: 5.1 ms] + Range (min … max): 43.8 ms … 46.7 ms 62 runs -**Regorus** is a Rego interpreter, analyzer and checker written in Rust. -**Regorus** also aims to be a rigorous enforcer of formally defined Rego semantics. ## Contributing diff --git a/examples/example.rego b/examples/example.rego new file mode 100644 index 00000000..acd62dae --- /dev/null +++ b/examples/example.rego @@ -0,0 +1,26 @@ +package example + +default allow := false # unless otherwise defined, allow is false + +allow := true { # allow is true if... + count(violation) == 0 # there are zero violations. +} + +violation[server.id] { # a server is in the violation set if... + some server + public_server[server] # it exists in the 'public_server' set and... + server.protocols[_] == "http" # it contains the insecure "http" protocol. +} + +violation[server.id] { # a server is in the violation set if... + server := input.servers[_] # it exists in the input.servers collection and... + server.protocols[_] == "telnet" # it contains the "telnet" protocol. +} + +public_server[server] { # a server exists in the public_server set if... + some i, j + server := input.servers[_] # it exists in the input.servers collection and... + server.ports[_] == input.ports[i].id # it references a port in the input.ports collection and... + input.ports[i].network == input.networks[j].id # the port references a network in the input.networks collection and... + input.networks[j].public # the network is public. +} \ No newline at end of file diff --git a/examples/input.json b/examples/input.json new file mode 100644 index 00000000..af31e00c --- /dev/null +++ b/examples/input.json @@ -0,0 +1,20 @@ +{ + "servers": [ + {"id": "app", "protocols": ["https", "ssh"], "ports": ["p1", "p2", "p3"]}, + {"id": "db", "protocols": ["mysql"], "ports": ["p3"]}, + {"id": "cache", "protocols": ["memcache"], "ports": ["p3"]}, + {"id": "ci", "protocols": ["http"], "ports": ["p1", "p2"]}, + {"id": "busybox", "protocols": ["telnet"], "ports": ["p1"]} + ], + "networks": [ + {"id": "net1", "public": false}, + {"id": "net2", "public": false}, + {"id": "net3", "public": true}, + {"id": "net4", "public": true} + ], + "ports": [ + {"id": "p1", "network": "net1"}, + {"id": "p2", "network": "net3"}, + {"id": "p3", "network": "net2"} + ] +} diff --git a/examples/regorus.rs b/examples/regorus.rs index 06d995aa..1bdb296b 100644 --- a/examples/regorus.rs +++ b/examples/regorus.rs @@ -5,6 +5,7 @@ use anyhow::{bail, Result}; use clap::{Parser, Subcommand}; fn rego_eval( + bundles: &[String], files: &[String], input: Option, query: String, @@ -13,11 +14,30 @@ fn rego_eval( // Create engine. let mut engine = regorus::Engine::new(); + // Load files from given bundles. + for dir in bundles.iter() { + let entries = + std::fs::read_dir(dir).or_else(|e| bail!("failed to read bundle {dir}.\n{e}"))?; + // Loop through each entry in the bundle folder. + for entry in entries { + let entry = entry.or_else(|e| bail!("failed to unwrap entry. {e}"))?; + let path = entry.path(); + + // Process only .rego files. + match (path.is_file(), path.extension()) { + (true, Some(ext)) if ext == "rego" => {} + _ => continue, + } + + engine.add_policy_from_file(entry.path())?; + } + } + // Load given files. for file in files.iter() { if file.ends_with(".rego") { // Read policy file. - engine.add_policy_from_file(file.to_string())?; + engine.add_policy_from_file(file)?; } else { // Read data file. let data = if file.ends_with(".json") { @@ -92,6 +112,10 @@ fn rego_parse(file: String) -> Result<()> { enum RegorusCommand { /// Evaluate a Rego Query. Eval { + /// Directories containing Rego files. + #[arg(long, short, value_name = "bundle")] + bundles: Vec, + /// Policy or data files. Rego, json or yaml. #[arg(long, short, value_name = "policy.rego|data.json|data.yaml")] data: Vec, @@ -137,11 +161,12 @@ fn main() -> Result<()> { let cli = Cli::parse(); match cli.command { RegorusCommand::Eval { + bundles, data, input, query, trace, - } => rego_eval(&data, input, query, trace), + } => rego_eval(&bundles, &data, input, query, trace), RegorusCommand::Lex { file, verbose } => rego_lex(file, verbose), RegorusCommand::Parse { file } => rego_parse(file), } diff --git a/src/engine.rs b/src/engine.rs index ef8b9b8e..8cb61f2d 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -9,6 +9,9 @@ use crate::scheduler::*; use crate::utils::gather_functions; use crate::value::*; +use std::convert::AsRef; +use std::path::Path; + use anyhow::Result; #[derive(Clone)] @@ -42,7 +45,7 @@ impl Engine { Ok(()) } - pub fn add_policy_from_file(&mut self, path: String) -> Result<()> { + pub fn add_policy_from_file>(&mut self, path: P) -> Result<()> { let source = Source::from_file(path)?; let mut parser = Parser::new(&source)?; self.modules.push(Ref::new(parser.parse()?)); diff --git a/src/interpreter.rs b/src/interpreter.rs index a930d102..b83947cd 100644 --- a/src/interpreter.rs +++ b/src/interpreter.rs @@ -52,10 +52,23 @@ impl Default for Interpreter { } } +#[derive(Debug, Clone, Serialize)] +pub struct Location { + pub row: u16, + pub col: u16, +} + +#[derive(Debug, Clone, Serialize)] +pub struct Expression { + pub value: Value, + pub text: Rc, + pub location: Location, +} + #[derive(Debug, Clone, Serialize)] pub struct QueryResult { // Expressions is shown first to match OPA. - pub expressions: Vec, + pub expressions: Vec, #[serde(skip_serializing_if = "Value::is_empty_object")] pub bindings: Value, } @@ -911,19 +924,15 @@ impl Interpreter { Ok(count > 0) } - fn make_expression_result(span: &Span, v: &Value) -> Value { - let mut loc = BTreeMap::new(); - loc.insert(Value::String("row".into()), Value::from(span.line as i64)); - loc.insert(Value::String("col".into()), Value::from(span.col as i64)); - - let mut expr = BTreeMap::new(); - expr.insert(Value::String("value".into()), v.clone()); - expr.insert(Value::String("location".into()), Value::from_map(loc)); - expr.insert( - Value::String("text".into()), - Value::String(span.text().to_string().into()), - ); - Value::from_map(expr) + fn make_expression_result(span: &Span, v: &Value) -> Expression { + Expression { + value: v.clone(), + text: span.text().to_string().into(), + location: Location { + row: span.line, + col: span.col, + }, + } } fn eval_stmt_impl(&mut self, stmt: &LiteralStmt, stmts: &[&LiteralStmt]) -> Result { @@ -1300,7 +1309,10 @@ impl Interpreter { .insert(Value::String(name.to_string().into()), value.clone()); } } - if result.expressions.iter().all(|v| v != &Value::Undefined) + if result + .expressions + .iter() + .all(|v| v.value != Value::Undefined) && !result.expressions.is_empty() { ctx.results.result.push(result); @@ -1419,7 +1431,10 @@ impl Interpreter { .insert(Value::String(name.to_string().into()), value.clone()); } } - if result.expressions.iter().all(|v| v != &Value::Undefined) + if result + .expressions + .iter() + .all(|v| v.value != Value::Undefined) && !result.expressions.is_empty() { ctx.results.result.push(result); @@ -2524,13 +2539,22 @@ impl Interpreter { for (k, ord) in schedule.order.iter() { if k == query { for idx in 0..results.result.len() { - let mut ordered_expressions = vec![Value::Undefined; ord.len()]; + let e = Expression { + value: Value::Undefined, + text: "".into(), + location: Location { row: 0, col: 0 }, + }; + let mut ordered_expressions = + vec![e; results.result[idx].expressions.len()]; for (expr_idx, value) in results.result[idx].expressions.iter().enumerate() { let orig_idx = ord[expr_idx] as usize; ordered_expressions[orig_idx] = value.clone(); } - if !ordered_expressions.iter().any(|v| v == &Value::Undefined) { + if !ordered_expressions + .iter() + .any(|v| v.value == Value::Undefined) + { results.result[idx].expressions = ordered_expressions; } } diff --git a/src/lexer.rs b/src/lexer.rs index 35be5241..a95f2710 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -5,6 +5,9 @@ use core::fmt::{Debug, Formatter}; use core::iter::Peekable; use core::str::CharIndices; +use std::convert::AsRef; +use std::path::Path; + use crate::value::Value; use anyhow::{anyhow, bail, Result}; @@ -113,12 +116,16 @@ impl Source { } } - pub fn from_file(path: String) -> Result { + pub fn from_file>(path: P) -> Result { let contents = match std::fs::read_to_string(&path) { Ok(c) => c, - Err(e) => bail!("Failed to read {path}. {e}"), + Err(e) => bail!("Failed to read {}. {e}", path.as_ref().display()), }; - Ok(Self::new(path, contents)) + // TODO: retain path instead of converting to string + Ok(Self::new( + path.as_ref().to_string_lossy().to_string(), + contents, + )) } pub fn file(&self) -> &String { diff --git a/tests/aci/data.json b/tests/aci/data.json new file mode 100644 index 00000000..2b4b9f4c --- /dev/null +++ b/tests/aci/data.json @@ -0,0 +1,12 @@ +{ + "metadata": { + "devices": { + "/run/layers/p0-layer0": "1b80f120dbd88e4355d6241b519c3e25290215c469516b49dece9cf07175a766", + "/run/layers/p0-layer1": "e769d7487cc314d3ee748a4440805317c19262c7acd2fdbdb0d47d2e4613a15c", + "/run/layers/p0-layer2": "eb36921e1f82af46dfe248ef8f1b3afb6a5230a64181d960d10237a08cd73c79", + "/run/layers/p0-layer3": "41d64cdeb347bf236b4c13b7403b633ff11f1cf94dbc7cf881a44d6da88c5156", + "/run/layers/p0-layer4": "4dedae42847c704da891a28c25d32201a1ae440bce2aecccfa8e6f03b97a6a6c", + "/run/layers/p0-layer5": "fe84c9d5bfddd07a2624d00333cf13c1a9c941f3a261f13ead44fc6a93bc0e7a" + } + } +} diff --git a/tests/aci/input.json b/tests/aci/input.json new file mode 100644 index 00000000..93c068b0 --- /dev/null +++ b/tests/aci/input.json @@ -0,0 +1,12 @@ +{ + "containerID": "container0", + "layerPaths": [ + "/run/layers/p0-layer0", + "/run/layers/p0-layer1", + "/run/layers/p0-layer2", + "/run/layers/p0-layer3", + "/run/layers/p0-layer4", + "/run/layers/p0-layer5" + ], + "target": "/run/gcs/c/container0/rootfs" +} diff --git a/tests/aci/main.rs b/tests/aci/main.rs index 4cfc6625..338f0c6c 100644 --- a/tests/aci/main.rs +++ b/tests/aci/main.rs @@ -48,7 +48,7 @@ fn eval_test_case(dir: &Path, case: &TestCase) -> Result { values.push(if !qr.bindings.is_empty_object() { qr.bindings.clone() } else if let Some(v) = qr.expressions.last() { - v["value"].clone() + v.value.clone() } else { Value::Undefined }); diff --git a/tests/interpreter/mod.rs b/tests/interpreter/mod.rs index 57b6a7f4..37cf5010 100644 --- a/tests/interpreter/mod.rs +++ b/tests/interpreter/mod.rs @@ -180,7 +180,7 @@ fn push_query_results(query_results: QueryResults, results: &mut Vec) { if !query_result.bindings.is_empty_object() { results.push(query_result.bindings.clone()); } else if let Some(v) = query_result.expressions.last() { - results.push(v["value"].clone()); + results.push(v.value.clone()); } } } diff --git a/tests/opa.rs b/tests/opa.rs index 047ef475..fbe24662 100644 --- a/tests/opa.rs +++ b/tests/opa.rs @@ -57,7 +57,7 @@ fn eval_test_case(case: &TestCase) -> Result { values.push(if !qr.bindings.is_empty_object() { qr.bindings.clone() } else if let Some(v) = qr.expressions.last() { - v["value"].clone() + v.value.clone() } else { Value::Undefined });