Skip to content

Commit

Permalink
fix: double underline process
Browse files Browse the repository at this point in the history
  • Loading branch information
sbwtw committed Nov 2, 2024
1 parent b194d98 commit afb345e
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 23 deletions.
62 changes: 40 additions & 22 deletions lib/src/parser/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -344,39 +344,46 @@ impl<'input> StLexer<'input> {
}
}

fn parse_identifier(&mut self, mut tok: Token, ch: char) -> Option<LexerResult> {
let mut str = String::from(ch);
let mut underline = ch == '_';
/// Identifier, Keywords or literal with type annotation prefix
fn parse_words(&mut self, mut tok: Token, ch: char) -> Option<LexerResult> {
let mut str = String::with_capacity(32);
str.push(ch);

// if ch == '_' && !self.options.allow_multiple_underline{
// if let Some(c) = self.buffer.peek1() {
// if self.is_valid_identifier_character(c) {
// return
// }
// }
// }

loop {
let next = self.buffer.peek1();
match next {
Some('_') => {
if underline && !self.options.allow_multiple_underline {
return Some(Err(LexicalError::UnexpectedCharacter(
self.buffer.current_line(),
self.buffer.current_offset(),
'_',
)));
}

underline = true;
if let Some('_') = next {
// if current is first underline, but next character is also underline, can't eat
if !self.options.allow_multiple_underline && self.buffer.peek(2) == Some('_') {
break;
}
_ => underline = false,
}

match next {
Some(c) if self.is_valid_identifier_character(c) => {
self.buffer.consume1();
str.push(c);
}
x => {
tok.length = str.len();
tok.kind = self.keywords_or_identifier(str);
return Some(Ok(tok));
}
x => break,
}
}

tok.length = str.len();
tok.kind = self.keywords_or_identifier(str);
if self.buffer.peek1() != Some('#') || !tok.kind.is_type() {
return Some(Ok(tok));
}

// current token is type annotation prefix, like: sint#123
self.buffer.consume1();
todo!()
}

fn parse_whitespace(&mut self, mut tok: Token) -> LexerResult {
Expand Down Expand Up @@ -455,6 +462,7 @@ impl<'input> StLexer<'input> {
todo!()
}

#[inline]
fn is_valid_identifier_character(&self, ch: char) -> bool {
if self.options.allow_unicode_identifier {
ch.is_alphabetic() || ch.is_ascii_alphanumeric() || matches!(ch, '_')
Expand All @@ -463,6 +471,7 @@ impl<'input> StLexer<'input> {
}
}

#[inline]
fn is_valid_identifier_first_character(&self, ch: char) -> bool {
if self.options.allow_unicode_identifier {
ch.is_alphabetic() || matches!(ch, '_')
Expand Down Expand Up @@ -566,7 +575,7 @@ impl<'input> StLexer<'input> {
Some(c) if c.is_ascii_digit() => self.parse_number(tok, c),
Some(c) if self.is_valid_identifier_first_character(c) => {
self.buffer.consume1();
self.parse_identifier(tok, c)
self.parse_words(tok, c)
}
Some(c) => {
self.buffer.consume1();
Expand Down Expand Up @@ -693,10 +702,19 @@ mod test {

#[test]
fn test_multiple_underline() {
let s = "a_b";
let mut lexer = StLexerBuilder::new().build_str(s);

let c = lexer.next().unwrap().unwrap();
assert!(matches!(c.kind, TokenKind::Identifier(_)));
assert!(matches!(c.length, 3));

let s = "a__b";
let mut lexer = StLexerBuilder::new().build_str(s);

assert!(matches!(lexer.next(), Some(Err(_))));
let c = lexer.next().unwrap().unwrap();
assert!(matches!(c.kind, TokenKind::Identifier(_)));
assert!(matches!(c.length, 1));

let s = "a__b";
let lexer_opt = StLexerOptions::default().allow_multiple_underline(true);
Expand Down
17 changes: 16 additions & 1 deletion lib/src/parser/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,8 @@ pub enum TokenKind {
Bool,
/// 'SINT', 8 bits signed
SInt,
/// 'USINT', 8 bits unsigned
USInt,
/// 'BYTE', 8 bits unsigned
Byte,
/// 'INT', 16 bits signed
Expand Down Expand Up @@ -199,7 +201,19 @@ pub enum TokenKind {

impl TokenKind {
pub fn is_type(&self) -> bool {
matches!(self, TokenKind::Int | TokenKind::Bool)
matches!(
self,
TokenKind::Bit
| TokenKind::Bool
| TokenKind::Byte
| TokenKind::SInt
| TokenKind::USInt
| TokenKind::Int
| TokenKind::UInt
| TokenKind::Real
| TokenKind::LReal
| TokenKind::String
)
}

pub fn kind_match(&self, rhs: &TokenKind) -> bool {
Expand Down Expand Up @@ -341,6 +355,7 @@ impl From<&TokenKind> for String {
TokenKind::Bit => "BIT",
TokenKind::Bool => "BOOL",
TokenKind::SInt => "SINT",
TokenKind::USInt => "USINT",
TokenKind::Byte => "BYTE",
TokenKind::UInt => "UINT",
TokenKind::DInt => "DINT",
Expand Down

0 comments on commit afb345e

Please sign in to comment.