Skip to content

Commit

Permalink
refactor: improve parser
Browse files Browse the repository at this point in the history
  • Loading branch information
Aloso committed Dec 4, 2024
1 parent 59991c3 commit aeb05d0
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 189 deletions.
24 changes: 1 addition & 23 deletions pomsky-syntax/src/exprs/alternation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use crate::Span;

use super::{Literal, Rule};
use super::Rule;

/// An [alternation](https://www.regular-expressions.info/alternation.html).
/// This is a list of alternatives. Each alternative is a [`Rule`].
Expand All @@ -20,28 +20,6 @@ pub struct Alternation {
}

impl Alternation {
pub(crate) fn new_expr(rules: Vec<Rule>) -> Rule {
rules
.into_iter()
.reduce(|a, b| match (a, b) {
(Rule::Alternation(mut a), Rule::Alternation(b)) => {
a.span = a.span.join(b.span);
a.rules.extend(b.rules);
Rule::Alternation(a)
}
(Rule::Alternation(mut a), b) => {
a.span = a.span.join(b.span());
a.rules.push(b);
Rule::Alternation(a)
}
(a, b) => {
let span = a.span().join(b.span());
Rule::Alternation(Alternation { rules: vec![a, b], span })
}
})
.unwrap_or_else(|| Rule::Literal(Literal::new("".to_string(), Span::default())))
}

#[cfg(feature = "dbg")]
pub(super) fn pretty_print(&self, buf: &mut crate::PrettyPrinter, needs_parens: bool) {
if needs_parens {
Expand Down
27 changes: 0 additions & 27 deletions pomsky-syntax/src/exprs/intersection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,33 +20,6 @@ pub struct Intersection {
}

impl Intersection {
pub(crate) fn new_expr(rules: Vec<Rule>, start_span: Span) -> Option<Rule> {
rules
.into_iter()
.reduce(|a, b| match (a, b) {
(Rule::Intersection(mut a), Rule::Intersection(b)) => {
a.span = a.span.join(b.span);
a.rules.extend(b.rules);
Rule::Intersection(a)
}
(Rule::Intersection(mut a), b) => {
a.span = a.span.join(b.span());
a.rules.push(b);
Rule::Intersection(a)
}
(a, b) => {
let span = a.span().join(b.span());
Rule::Intersection(Intersection { rules: vec![a, b], span })
}
})
.map(|mut rule| {
if let Rule::Intersection(i) = &mut rule {
i.span = i.span.join(start_span)
}
rule
})
}

#[cfg(feature = "dbg")]
pub(super) fn pretty_print(&self, buf: &mut crate::PrettyPrinter, needs_parens: bool) {
if needs_parens {
Expand Down
21 changes: 8 additions & 13 deletions pomsky-syntax/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@ pub enum Token {
/// `$` (end boundary)
Dollar,
/// `%` (`\b` boundary)
BWord,
Percent,
/// `<` (word start)
AngleLeft,
/// `>` (word end)
AngleRight,

/// `*` (`*?` repetition)
Star,
Expand Down Expand Up @@ -42,28 +46,19 @@ pub enum Token {

/// `[` (open character class)
OpenBracket,

/// `-` (unicode range)
Dash,

/// `]` (close character class)
CloseBracket,

/// `-` (unicode range)
Dash,
/// `.` (any code point except newline)
Dot,

/// `>>` (positive lookahead)
LookAhead,

/// `<<` (positive lookbehind)
LookBehind,

/// `<` (word start)
AngleLeft,

/// `>` (word end)
AngleRight,

/// `::` (back reference)
DoubleColon,

Expand Down Expand Up @@ -100,7 +95,7 @@ impl core::fmt::Display for Token {
f.write_str(match self {
Token::Caret => "`^`",
Token::Dollar => "`$`",
Token::BWord => "`%`",
Token::Percent => "`%`",
Token::Star => "`*`",
Token::Plus => "`+`",
Token::QuestionMark => "`?`",
Expand Down
59 changes: 37 additions & 22 deletions pomsky-syntax/src/lexer/tokenize.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,42 @@ macro_rules! reserved_word_pattern {
);
}

static SINGLE_TOKEN_LOOKUP: [Option<Token>; 127] = const {
let mut table = [const { None }; 127];
table[b'^' as usize] = Some(Token::Caret);
table[b'$' as usize] = Some(Token::Dollar);
table[b'%' as usize] = Some(Token::Percent);
table[b'<' as usize] = Some(Token::AngleLeft);
table[b'>' as usize] = Some(Token::AngleRight);
table[b'*' as usize] = Some(Token::Star);
table[b'+' as usize] = Some(Token::Plus);
table[b'?' as usize] = Some(Token::QuestionMark);
table[b'|' as usize] = Some(Token::Pipe);
table[b'&' as usize] = Some(Token::Ampersand);
table[b':' as usize] = Some(Token::Colon);
table[b')' as usize] = Some(Token::CloseParen);
table[b'{' as usize] = Some(Token::OpenBrace);
table[b'}' as usize] = Some(Token::CloseBrace);
table[b',' as usize] = Some(Token::Comma);
table[b'!' as usize] = Some(Token::Not);
table[b'[' as usize] = Some(Token::OpenBracket);
table[b']' as usize] = Some(Token::CloseBracket);
table[b'-' as usize] = Some(Token::Dash);
table[b'.' as usize] = Some(Token::Dot);
table[b';' as usize] = Some(Token::Semicolon);
table[b'=' as usize] = Some(Token::Equals);
table
};

fn lookup_single(c: char) -> Option<Token> {
let c = c as u32;
if c < 128 {
SINGLE_TOKEN_LOOKUP[c as usize]
} else {
None
}
}

pub(crate) fn tokenize(mut input: &str) -> Vec<(Token, Span)> {
let mut result = vec![];
let mut offset = 0;
Expand All @@ -64,28 +100,7 @@ pub(crate) fn tokenize(mut input: &str) -> Vec<(Token, Span)> {
if input.starts_with("<<") => (2, Token::LookBehind);
if input.starts_with("::") => (2, Token::DoubleColon);

if c == '^' => (1, Token::Caret);
if c == '$' => (1, Token::Dollar);
if c == '<' => (1, Token::AngleLeft);
if c == '>' => (1, Token::AngleRight);
if c == '%' => (1, Token::BWord);
if c == '*' => (1, Token::Star);
if c == '+' => (1, Token::Plus);
if c == '?' => (1, Token::QuestionMark);
if c == '|' => (1, Token::Pipe);
if c == '&' => (1, Token::Ampersand);
if c == ':' => (1, Token::Colon);
if c == ')' => (1, Token::CloseParen);
if c == '{' => (1, Token::OpenBrace);
if c == '}' => (1, Token::CloseBrace);
if c == ',' => (1, Token::Comma);
if c == '!' => (1, Token::Not);
if c == '[' => (1, Token::OpenBracket);
if c == '-' => (1, Token::Dash);
if c == ']' => (1, Token::CloseBracket);
if c == '.' => (1, Token::Dot);
if c == ';' => (1, Token::Semicolon);
if c == '=' => (1, Token::Equals);
if let Some(token) = lookup_single(c) => (1, token);

if c == '\'' => match input[1..].find('\'') {
Some(len_inner) => (len_inner + 2, Token::String),
Expand Down
Loading

0 comments on commit aeb05d0

Please sign in to comment.