diff --git a/cli/src/commands/scan.rs b/cli/src/commands/scan.rs index 38f43596..ad0ae3a3 100644 --- a/cli/src/commands/scan.rs +++ b/cli/src/commands/scan.rs @@ -634,7 +634,7 @@ mod output_handler { use std::collections::HashMap; #[derive(serde::Serialize)] - struct JsonPattern { + struct PatternJson { identifier: String, offset: usize, r#match: String, @@ -645,7 +645,7 @@ mod output_handler { } #[derive(serde::Serialize)] - struct JsonRule { + struct RuleJson { identifier: String, #[serde(skip_serializing_if = "Option::is_none")] namespace: Option, @@ -654,13 +654,13 @@ mod output_handler { #[serde(skip_serializing_if = "Option::is_none")] tags: Option>, #[serde(skip_serializing_if = "Option::is_none")] - strings: Option>, + strings: Option>, } #[derive(serde::Serialize)] struct JsonOutput<'a> { path: &'a str, - rules: &'a [JsonRule], + rules: &'a [RuleJson], } #[derive(serde::Serialize)] @@ -672,14 +672,14 @@ mod output_handler { fn rules_to_json( output_options: &OutputOptions, scan_results: &mut dyn ExactSizeIterator, - ) -> Vec { + ) -> Vec { scan_results .filter(move |rule| { output_options.only_tag.as_ref().map_or(true, |only_tag| { rule.tags().any(|tag| tag.identifier() == only_tag) }) }) - .map(move |rule| JsonRule { + .map(move |rule| RuleJson { identifier: rule.identifier().to_string(), namespace: output_options .include_namespace @@ -702,7 +702,7 @@ mod output_handler { fn patterns_to_json( patterns: Patterns<'_, '_>, string_limit: usize, - ) -> Vec { + ) -> Vec { patterns .flat_map(|pattern| { let identifier = pattern.identifier(); @@ -729,7 +729,7 @@ mod output_handler { ) .collect::(); - JsonPattern { + PatternJson { identifier: identifier.to_owned(), offset: match_range.start, r#match: string, @@ -760,7 +760,7 @@ mod output_handler { output: &Sender, ); /// Called when the last file has been scanned. - fn on_done(&self, _output: &Sender) {} + fn on_done(&self, _output: &Sender); } pub(super) struct TextOutputHandler { @@ -922,6 +922,10 @@ mod output_handler { } } } + + fn on_done(&self, _output: &Sender) { + // Nothing to do here. + } } pub(super) struct NdJsonOutputHandler { @@ -963,20 +967,103 @@ mod output_handler { output.send(Message::Info(line)).unwrap(); } + + fn on_done(&self, _output: &Sender) { + // Nothing to do here. + } + } + + #[derive(serde::Serialize, Clone)] + struct StringJson { + identifier: String, + offset: usize, + r#match: String, + #[serde(skip_serializing_if = "Option::is_none")] + xor_key: Option, + #[serde(skip_serializing_if = "Option::is_none")] + plaintext: Option, + } + + #[derive(serde::Serialize, Clone)] + struct HitJson { + rule: String, + file: String, + #[serde(skip_serializing_if = "Option::is_none")] + meta: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + tags: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + strings: Option>, + } + + #[derive(serde::Serialize)] + struct OutputJson { + version: String, + hits: Vec, } pub(super) struct JsonOutputHandler { output_options: OutputOptions, - matches: std::sync::Arc>>>, + output_buffer: std::sync::Arc>>, } impl JsonOutputHandler { pub(super) fn new(output_options: OutputOptions) -> Self { - let matches = std::sync::Arc::new(Mutex::new(HashMap::new())); - Self { output_options, matches } + let output_buffer = Default::default(); + Self { output_options, output_buffer } } } + fn patterns_to_string_jsons( + patterns: Patterns<'_, '_>, + string_limit: usize, + ) -> Vec { + patterns + .flat_map(|pattern| { + let identifier = pattern.identifier(); + + pattern.matches().map(|pattern_match| { + let match_range = pattern_match.range(); + let match_data = pattern_match.data(); + + let more_bytes_message = + match match_data.len().saturating_sub(string_limit) { + 0 => None, + n => Some(format!(" ... {} more bytes", n)), + }; + + let string = match_data + .iter() + .take(string_limit) + .flat_map(|char| char.escape_ascii()) + .map(|c| c as char) + .chain( + more_bytes_message + .iter() + .flat_map(|msg| msg.chars()), + ) + .collect::(); + + StringJson { + identifier: identifier.to_owned(), + offset: match_range.start, + r#match: string.clone(), + xor_key: pattern_match.xor_key(), + plaintext: pattern_match.xor_key().map(|xor_key| { + match_data + .iter() + .take(string_limit) + .map(|char| char ^ xor_key) + .flat_map(|char| char.escape_ascii()) + .map(|char| char as char) + .collect() + }), + } + }) + }) + .collect() + } + impl OutputHandler for JsonOutputHandler { fn on_file_scanned( &self, @@ -992,37 +1079,93 @@ mod output_handler { .map(|s| s.to_string()) .unwrap_or_default(); - let mut matches = self.matches.lock().unwrap(); + // prepare the increment *outside* the critical section + let hits = scan_results + .filter(|rule| { + self.output_options.only_tag.as_ref().map_or( + true, + |only_tag| { + rule.tags().any(|tag| tag.identifier() == only_tag) + }, + ) + }) + .map(|rule| { + let meta = self.output_options.include_meta.then(|| { + rule.metadata() + .map(|(meta_key, meta_val)| { + let meta_key = meta_key.to_owned(); + let meta_val = serde_json::to_value(meta_val) + .expect( + "Derived Serialize impl should never fail", + ); + + (meta_key, meta_val) + }) + .collect::>() + }); + + let file = path.clone(); + + let tags = self.output_options.include_tags.then(|| { + rule.tags() + .map(|t| t.identifier().to_string()) + .collect::>() + }); + + let strings = self.output_options.include_strings.map( + |strings_limit| { + patterns_to_string_jsons( + rule.patterns(), + strings_limit, + ) + }, + ); + + HitJson { + rule: rule.identifier().to_string(), + meta, + file, + tags, + strings, + } + }); - matches - .entry(path) - .or_default() - .extend(rules_to_json(&self.output_options, scan_results)); + { + let mut lock = self.output_buffer.lock().unwrap(); + lock.extend(hits); + } } fn on_done(&self, output: &Sender) { - let matches = self.matches.lock().unwrap(); - - let json = if self.output_options.count_only { - let json_output = matches - .iter() - .map(|(path, rules)| JsonCountOutput { - path, - count: rules.len(), - }) - .collect::>(); - - serde_json::to_string_pretty(&json_output).unwrap_or_default() - } else { - let json_output = matches - .iter() - .map(|(path, rules)| JsonOutput { path, rules }) - .collect::>(); - - serde_json::to_string_pretty(&json_output).unwrap_or_default() + let hits = { + let mut lock = self.output_buffer.lock().unwrap(); + std::mem::take(&mut *lock) }; + let version = env!("CARGO_PKG_VERSION").to_string(); + + let rendered_json = match self.output_options.count_only { + true => { + let json_output = hits + .iter() + .fold(HashMap::new(), |mut acc, it| { + *acc.entry(&it.file).or_insert(0) += 1; + acc + }) + .into_iter() + .map(|(path, count)| JsonCountOutput { path, count }) + .collect::>(); + + serde_json::to_string_pretty(&json_output) + } + false => { + let output_json = OutputJson { hits, version }; + + serde_json::to_string_pretty(&output_json) + } + } + .expect("Derived Serialize impl should never fail"); - output.send(Message::Info(json)).unwrap(); + output.send(Message::Info(rendered_json)).unwrap(); } } }