diff --git a/a2lmacros/src/a2lspec.rs b/a2lmacros/src/a2lspec.rs index be08e54..7b1af1f 100644 --- a/a2lmacros/src/a2lspec.rs +++ b/a2lmacros/src/a2lspec.rs @@ -21,8 +21,7 @@ pub(crate) fn a2l_specification(tokens: TokenStream) -> TokenStream { let types = build_typelist(structs, enums); - let mut typesvec: Vec<(&String, &DataItem)> = - types.iter().map(|(key, val)| (key, val)).collect(); + let mut typesvec: Vec<(&String, &DataItem)> = types.iter().collect(); typesvec.sort_by(|a, b| a.0.cmp(b.0)); let mut result = TokenStream::new(); diff --git a/a2lmacros/src/a2mlspec.rs b/a2lmacros/src/a2mlspec.rs index e759278..e8374b8 100644 --- a/a2lmacros/src/a2mlspec.rs +++ b/a2lmacros/src/a2mlspec.rs @@ -37,8 +37,7 @@ pub(crate) fn a2ml_specification(tokens: TokenStream) -> TokenStream { result.extend(generate_a2ml_constant(&spec)); let outtypes = fixup_output_datatypes(&spec); - let mut typesvec: Vec<(&String, &DataItem)> = - outtypes.iter().map(|(key, val)| (key, val)).collect(); + let mut typesvec: Vec<(&String, &DataItem)> = outtypes.iter().collect(); typesvec.sort_by(|a, b| a.0.cmp(b.0)); for (typename, a2mltype) in typesvec { diff --git a/src/lib.rs b/src/lib.rs index a7c2206..4443ac3 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -32,6 +32,7 @@ pub use a2ml::{GenericIfData, GenericIfDataTaggedItem}; pub use specification::*; #[derive(Debug, Error)] +#[non_exhaustive] pub enum A2lError { /// FileOpenError: An IoError that occurred while loading a file #[error("Failed to load {filename}: {ioerror}")] @@ -192,7 +193,7 @@ fn load_impl( }); } - let firstline = tokenresult.tokens.get(0).map(|tok| tok.line).unwrap_or(1); + let firstline = tokenresult.tokens.first().map(|tok| tok.line).unwrap_or(1); // create a context for the parser let context = ParseContext { element: "A2L_FILE".to_string(), @@ -278,7 +279,7 @@ pub fn load_fragment(a2ldata: &str) -> Result { // tokenize the input data let tokenresult = tokenizer::tokenize("(fragment)".to_string(), 0, &fixed_a2ldata) .map_err(|tokenizer_error| A2lError::TokenizerError { tokenizer_error })?; - let firstline = tokenresult.tokens.get(0).map(|tok| tok.line).unwrap_or(1); + let firstline = tokenresult.tokens.first().map(|tok| tok.line).unwrap_or(1); let context = ParseContext { element: "MODULE".to_string(), fileid: 0, diff --git a/src/parser.rs b/src/parser.rs index 9f8cd71..94fccb4 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,6 +4,8 @@ use crate::a2ml::*; use crate::tokenizer::*; use crate::A2lError; +const MAX_IDENT: usize = 1024; + struct TokenIter<'a> { tokens: &'a [A2lToken], pos: usize, @@ -31,6 +33,7 @@ pub struct ParseContext { pub inside_block: bool, } #[derive(Debug, Error)] +#[non_exhaustive] pub enum ParserError { #[error("{filename}:{error_line}: expected token of type {expected_ttype:?}, got {actual_ttype:?} (\"{actual_text}\") inside block {element} starting on line {block_line}")] UnexpectedTokenType { @@ -178,6 +181,16 @@ pub enum ParserError { block: String, }, + #[error( + "{filename}:{error_line}: The string '{ident}' in block {block} is not a valid identifier" + )] + InvalidIdentifier { + filename: String, + error_line: u32, + ident: String, + block: String, + }, + #[error("{filename}:{error_line}: A2ML parser reports {errmsg}")] A2mlError { filename: String, @@ -467,6 +480,14 @@ impl<'a> ParserState<'a> { pub fn get_identifier(&mut self, context: &ParseContext) -> Result { let token = self.expect_token(context, A2lTokenType::Identifier)?; let text = self.get_token_text(token); + if text.as_bytes()[0].is_ascii_digit() || text.len() > MAX_IDENT { + self.error_or_log(ParserError::InvalidIdentifier { + filename: self.filenames[context.fileid].to_owned(), + error_line: self.last_token_position, + block: context.element.to_owned(), + ident: text.to_owned(), + })? + } Ok(String::from(text)) } @@ -722,7 +743,11 @@ impl<'a> ParserState<'a> { if text == item_tag { break; } else { - return Err(ParserError::incorrect_end_tag(self, &errcontext, text)); + return Err(ParserError::incorrect_end_tag( + self, + &errcontext, + text, + )); } } } else { @@ -1009,4 +1034,35 @@ mod tests { let result = unescape_string(r#"\t"#); assert_eq!(result, "\t"); } + + #[test] + fn parsing_identifiers_test() { + let input_text = r##"ident 0ident 123"##; + let tokenresult = super::super::tokenizer::tokenize("test_input".to_owned(), 0, input_text); + assert!(tokenresult.is_ok()); + + let tokenresult = tokenresult.unwrap(); + let mut log_msgs = Vec::::new(); + let mut parser = ParserState::new(&tokenresult, &mut log_msgs, true); + let context = ParseContext { + element: "TEST".to_string(), + fileid: 0, + line: 0, + inside_block: true, + }; + + // identifier: ident + let res = parser.get_identifier(&context); + assert!(res.is_ok()); + let val = res.unwrap(); + assert_eq!(val, "ident"); + + // bad identifier - identifiers may not start with a number + let res = parser.get_identifier(&context); + assert!(matches!(res, Err(ParserError::InvalidIdentifier { .. }))); + + // not an identifier + let res = parser.get_identifier(&context); + assert!(res.is_err()); + } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 998726d..8f80e14 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -4,6 +4,7 @@ use thiserror::Error; use super::loader; #[derive(Debug, Error)] +#[non_exhaustive] pub enum TokenizerError { #[error("{filename}:{line}: Failed to load included file {incname}")] IncludeFileError { @@ -314,27 +315,42 @@ fn tokenize_core( while bytepos < datalen && is_numchar(filebytes[bytepos]) { bytepos += 1; } - let number = &filebytes[startpos..bytepos]; - if number == b"-" { - return Err(TokenizerError::InvalidNumericalConstant { - filename, + if bytepos == datalen || !is_identchar(filebytes[bytepos]) { + let number = &filebytes[startpos..bytepos]; + if number == b"-" { + return Err(TokenizerError::InvalidNumericalConstant { + filename, + line, + tokentext: "-".to_owned(), + }); + } else if number == b"0x" { + return Err(TokenizerError::InvalidNumericalConstant { + filename, + line, + tokentext: "0x".to_owned(), + }); + } + tokens.push(A2lToken { + ttype: A2lTokenType::Number, + startpos, + endpos: bytepos, + fileid, line, - tokentext: "-".to_owned(), }); - } else if number == b"0x" { - return Err(TokenizerError::InvalidNumericalConstant { - filename, + } else if bytepos < datalen && is_identchar(filebytes[bytepos]) { + // this is actually an identifier that starts with a number, which is not standard compliant + // it is still worth recognizing, in oder to give better error messages and also the error can be bypassed if strict is false + while bytepos < datalen && is_identchar(filebytes[bytepos]) { + bytepos += 1; + } + tokens.push(A2lToken { + ttype: A2lTokenType::Identifier, + startpos, + endpos: bytepos, + fileid, line, - tokentext: "0x".to_owned(), }); } - tokens.push(A2lToken { - ttype: A2lTokenType::Number, - startpos, - endpos: bytepos, - fileid, - line, - }); separated = false; } else { let endpos = if startpos + 10 < datalen { @@ -683,6 +699,11 @@ mod tests { let tokresult = tokenize("testcase".to_string(), 0, &data).expect("Error"); assert_eq!(tokresult.tokens.len(), 1); assert_eq!(tokresult.tokens[0].ttype, A2lTokenType::Number); + + let data = String::from("0ident"); + let tokresult = tokenize("testcase".to_string(), 0, &data).expect("Error"); + assert_eq!(tokresult.tokens.len(), 1); + assert_eq!(tokresult.tokens[0].ttype, A2lTokenType::Identifier); } #[test] diff --git a/src/writer.rs b/src/writer.rs index 5ecbbf9..3b8c531 100644 --- a/src/writer.rs +++ b/src/writer.rs @@ -1,6 +1,5 @@ use std::cmp::Ordering; use std::collections::HashSet; -use std::convert::TryInto; use std::fmt::Write; use crate::specification::BlockInfo; @@ -84,7 +83,7 @@ impl Writer { where T: std::convert::Into, { - let value_conv = value.try_into().unwrap(); + let value_conv = value.into(); self.add_whitespace(offset); if value_conv == 0f64 { self.outstring.push('0');