Skip to content

Commit

Permalink
feat: detect simple cases of empty intersections
Browse files Browse the repository at this point in the history
  • Loading branch information
Aloso committed Dec 21, 2024
1 parent 06ae013 commit 86ecc24
Show file tree
Hide file tree
Showing 9 changed files with 48 additions and 14 deletions.
14 changes: 9 additions & 5 deletions pomsky-lib/src/diagnose/compile_error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ pub(crate) enum CompileErrorKind {
},
NestedTest,
BadIntersection,
EmptyIntersection,
}

impl CompileErrorKind {
Expand All @@ -114,11 +115,6 @@ impl core::fmt::Display for CompileErrorKind {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
match self {
CompileErrorKind::ParseError(kind) => write!(f, "Parse error: {kind}"),
CompileErrorKind::BadIntersection => write!(
f,
"Intersecting these expressions is not supported. Only character sets \
can be intersected."
),
CompileErrorKind::Unsupported(feature, flavor) => match feature {
Feature::SpecificUnicodeProp => write!(
f,
Expand Down Expand Up @@ -223,6 +219,14 @@ impl core::fmt::Display for CompileErrorKind {
),
_ => write!(f, "This kind of lookbehind is not supported in the {flavor:?} flavor"),
},
CompileErrorKind::BadIntersection => write!(
f,
"Intersecting these expressions is not supported. Only character sets \
can be intersected."
),
CompileErrorKind::EmptyIntersection => {
write!(f, "Intersection of expressions that do not overlap")
}
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions pomsky-lib/src/diagnose/diagnostic_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ diagnostic_code! {
UnsupportedInLookbehind = 320,
LookbehindNotConstantLength = 321,
BadIntersection = 322,
EmptyIntersection = 323,

// Warning indicating something might not be supported
PossiblyUnsupported = 400,
Expand Down Expand Up @@ -234,6 +235,7 @@ impl<'a> From<&'a CompileErrorKind> for DiagnosticCode {
C::LookbehindNotConstantLength { .. } => Self::LookbehindNotConstantLength,
C::NestedTest => Self::NestedTest,
C::BadIntersection => Self::BadIntersection,
C::EmptyIntersection => Self::EmptyIntersection,
}
}
}
Expand Down
4 changes: 3 additions & 1 deletion pomsky-lib/src/diagnose/diagnostic_kind.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@ impl From<&CompileErrorKind> for DiagnosticKind {
| K::NameUsedMultipleTimes(_)
| K::UnknownVariable { .. }
| K::RelativeRefZero => DiagnosticKind::Resolve,
K::EmptyClassNegated { .. } | K::IllegalNegation { .. } => DiagnosticKind::Invalid,
K::EmptyClassNegated { .. } | K::IllegalNegation { .. } | K::EmptyIntersection => {
DiagnosticKind::Invalid
}
K::CaptureInLet
| K::ReferenceInLet
| K::RecursiveVariable
Expand Down
18 changes: 14 additions & 4 deletions pomsky-lib/src/exprs/char_class/char_set_item.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ impl RegexCompoundCharSet {
self
}

pub(crate) fn add(mut self, other: RegexCharSet) -> Regex {
pub(crate) fn add(mut self, other: RegexCharSet) -> Option<Regex> {
if other.negative && self.intersections.iter().all(|i| i.negative) {
let mut intersections = self.intersections.into_iter();
let mut char_set = intersections.next().expect("Intersection is empty");
Expand All @@ -32,13 +32,19 @@ impl RegexCompoundCharSet {
if self.negative {
char_set = char_set.negate();
}
Regex::CharSet(char_set)
} else {
Some(Regex::CharSet(char_set))
} else if self.may_intersect(&other) {
self.intersections.push(other);
Regex::CompoundCharSet(self)
Some(Regex::CompoundCharSet(self))
} else {
None
}
}

fn may_intersect(&self, other: &RegexCharSet) -> bool {
self.intersections.iter().any(|set| set.may_intersect(other))
}

pub(crate) fn codegen(&self, buf: &mut String, flavor: RegexFlavor) {
if self.negative {
buf.push_str("[^");
Expand Down Expand Up @@ -76,6 +82,10 @@ impl RegexCharSet {
self
}

pub(crate) fn may_intersect(&self, other: &Self) -> bool {
self.negative || other.negative || self.set.may_intersect(&other.set)
}

pub(crate) fn codegen(&self, buf: &mut String, flavor: RegexFlavor, inside_compound: bool) {
if self.set.len() == 1 {
if let Some(range) = self.set.ranges().next() {
Expand Down
4 changes: 3 additions & 1 deletion pomsky-lib/src/exprs/intersection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ impl Compile for Intersection {
let (first_span, first) = rules.next().expect("Intersection is empty");

let regex = rules.try_fold(first?, |a, (right_span, b)| match as_sets(a, b?) {
Ok((left, right)) => Ok(left.add(right)),
Ok((left, right)) => left
.add(right)
.ok_or_else(|| CompileErrorKind::EmptyIntersection.at(first_span.join(right_span))),
Err(kind) => Err(kind.at(first_span.join(right_span))),
})?;

Expand Down
6 changes: 6 additions & 0 deletions pomsky-lib/src/unicode_set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,12 @@ impl UnicodeSet {
self.add(range);
}
}

pub(crate) fn may_intersect(&self, other: &UnicodeSet) -> bool {
!self.props.is_empty()
|| !other.props.is_empty()
|| other.ranges.iter().any(|range| self.ranges.contains(range))
}
}

struct MaxTwoArray<T> {
Expand Down
6 changes: 6 additions & 0 deletions pomsky-lib/tests/testcases/errors/empty_set_negated.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#! expect=error
![s !s]
-----
ERROR: This negated character class matches nothing
HELP: The group is empty because it contains both `space` and `!space`, which together match every code point
SPAN: 1..7
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#! expect=error
'a' & 'b'
-----
ERROR: Intersection of expressions that do not overlap
SPAN: 0..9
3 changes: 0 additions & 3 deletions pomsky-lib/tests/testcases/intersections/strings.txt

This file was deleted.

0 comments on commit 86ecc24

Please sign in to comment.