Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

detect invalid identifiers that start with a number #24

Merged
merged 1 commit into from
Nov 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions a2lmacros/src/a2lspec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,7 @@ pub(crate) fn a2l_specification(tokens: TokenStream) -> TokenStream {

let types = build_typelist(structs, enums);

let mut typesvec: Vec<(&String, &DataItem)> =
types.iter().map(|(key, val)| (key, val)).collect();
let mut typesvec: Vec<(&String, &DataItem)> = types.iter().collect();
typesvec.sort_by(|a, b| a.0.cmp(b.0));

let mut result = TokenStream::new();
Expand Down
3 changes: 1 addition & 2 deletions a2lmacros/src/a2mlspec.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ pub(crate) fn a2ml_specification(tokens: TokenStream) -> TokenStream {
result.extend(generate_a2ml_constant(&spec));
let outtypes = fixup_output_datatypes(&spec);

let mut typesvec: Vec<(&String, &DataItem)> =
outtypes.iter().map(|(key, val)| (key, val)).collect();
let mut typesvec: Vec<(&String, &DataItem)> = outtypes.iter().collect();
typesvec.sort_by(|a, b| a.0.cmp(b.0));

for (typename, a2mltype) in typesvec {
Expand Down
5 changes: 3 additions & 2 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ pub use a2ml::{GenericIfData, GenericIfDataTaggedItem};
pub use specification::*;

#[derive(Debug, Error)]
#[non_exhaustive]
pub enum A2lError {
/// FileOpenError: An IoError that occurred while loading a file
#[error("Failed to load {filename}: {ioerror}")]
Expand Down Expand Up @@ -192,7 +193,7 @@ fn load_impl(
});
}

let firstline = tokenresult.tokens.get(0).map(|tok| tok.line).unwrap_or(1);
let firstline = tokenresult.tokens.first().map(|tok| tok.line).unwrap_or(1);
// create a context for the parser
let context = ParseContext {
element: "A2L_FILE".to_string(),
Expand Down Expand Up @@ -278,7 +279,7 @@ pub fn load_fragment(a2ldata: &str) -> Result<Module, A2lError> {
// tokenize the input data
let tokenresult = tokenizer::tokenize("(fragment)".to_string(), 0, &fixed_a2ldata)
.map_err(|tokenizer_error| A2lError::TokenizerError { tokenizer_error })?;
let firstline = tokenresult.tokens.get(0).map(|tok| tok.line).unwrap_or(1);
let firstline = tokenresult.tokens.first().map(|tok| tok.line).unwrap_or(1);
let context = ParseContext {
element: "MODULE".to_string(),
fileid: 0,
Expand Down
58 changes: 57 additions & 1 deletion src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ use crate::a2ml::*;
use crate::tokenizer::*;
use crate::A2lError;

const MAX_IDENT: usize = 1024;

struct TokenIter<'a> {
tokens: &'a [A2lToken],
pos: usize,
Expand Down Expand Up @@ -31,6 +33,7 @@ pub struct ParseContext {
pub inside_block: bool,
}
#[derive(Debug, Error)]
#[non_exhaustive]
pub enum ParserError {
#[error("{filename}:{error_line}: expected token of type {expected_ttype:?}, got {actual_ttype:?} (\"{actual_text}\") inside block {element} starting on line {block_line}")]
UnexpectedTokenType {
Expand Down Expand Up @@ -178,6 +181,16 @@ pub enum ParserError {
block: String,
},

#[error(
"{filename}:{error_line}: The string '{ident}' in block {block} is not a valid identifier"
)]
InvalidIdentifier {
filename: String,
error_line: u32,
ident: String,
block: String,
},

#[error("{filename}:{error_line}: A2ML parser reports {errmsg}")]
A2mlError {
filename: String,
Expand Down Expand Up @@ -467,6 +480,14 @@ impl<'a> ParserState<'a> {
pub fn get_identifier(&mut self, context: &ParseContext) -> Result<String, ParserError> {
let token = self.expect_token(context, A2lTokenType::Identifier)?;
let text = self.get_token_text(token);
if text.as_bytes()[0].is_ascii_digit() || text.len() > MAX_IDENT {
self.error_or_log(ParserError::InvalidIdentifier {
filename: self.filenames[context.fileid].to_owned(),
error_line: self.last_token_position,
block: context.element.to_owned(),
ident: text.to_owned(),
})?
}
Ok(String::from(text))
}

Expand Down Expand Up @@ -722,7 +743,11 @@ impl<'a> ParserState<'a> {
if text == item_tag {
break;
} else {
return Err(ParserError::incorrect_end_tag(self, &errcontext, text));
return Err(ParserError::incorrect_end_tag(
self,
&errcontext,
text,
));
}
}
} else {
Expand Down Expand Up @@ -1009,4 +1034,35 @@ mod tests {
let result = unescape_string(r#"\t"#);
assert_eq!(result, "\t");
}

#[test]
fn parsing_identifiers_test() {
let input_text = r##"ident 0ident 123"##;
let tokenresult = super::super::tokenizer::tokenize("test_input".to_owned(), 0, input_text);
assert!(tokenresult.is_ok());

let tokenresult = tokenresult.unwrap();
let mut log_msgs = Vec::<A2lError>::new();
let mut parser = ParserState::new(&tokenresult, &mut log_msgs, true);
let context = ParseContext {
element: "TEST".to_string(),
fileid: 0,
line: 0,
inside_block: true,
};

// identifier: ident
let res = parser.get_identifier(&context);
assert!(res.is_ok());
let val = res.unwrap();
assert_eq!(val, "ident");

// bad identifier - identifiers may not start with a number
let res = parser.get_identifier(&context);
assert!(matches!(res, Err(ParserError::InvalidIdentifier { .. })));

// not an identifier
let res = parser.get_identifier(&context);
assert!(res.is_err());
}
}
53 changes: 37 additions & 16 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ use thiserror::Error;
use super::loader;

#[derive(Debug, Error)]
#[non_exhaustive]
pub enum TokenizerError {
#[error("{filename}:{line}: Failed to load included file {incname}")]
IncludeFileError {
Expand Down Expand Up @@ -314,27 +315,42 @@ fn tokenize_core(
while bytepos < datalen && is_numchar(filebytes[bytepos]) {
bytepos += 1;
}
let number = &filebytes[startpos..bytepos];
if number == b"-" {
return Err(TokenizerError::InvalidNumericalConstant {
filename,
if bytepos == datalen || !is_identchar(filebytes[bytepos]) {
let number = &filebytes[startpos..bytepos];
if number == b"-" {
return Err(TokenizerError::InvalidNumericalConstant {
filename,
line,
tokentext: "-".to_owned(),
});
} else if number == b"0x" {
return Err(TokenizerError::InvalidNumericalConstant {
filename,
line,
tokentext: "0x".to_owned(),
});
}
tokens.push(A2lToken {
ttype: A2lTokenType::Number,
startpos,
endpos: bytepos,
fileid,
line,
tokentext: "-".to_owned(),
});
} else if number == b"0x" {
return Err(TokenizerError::InvalidNumericalConstant {
filename,
} else if bytepos < datalen && is_identchar(filebytes[bytepos]) {
// this is actually an identifier that starts with a number, which is not standard compliant
// it is still worth recognizing, in oder to give better error messages and also the error can be bypassed if strict is false
while bytepos < datalen && is_identchar(filebytes[bytepos]) {
bytepos += 1;
}
tokens.push(A2lToken {
ttype: A2lTokenType::Identifier,
startpos,
endpos: bytepos,
fileid,
line,
tokentext: "0x".to_owned(),
});
}
tokens.push(A2lToken {
ttype: A2lTokenType::Number,
startpos,
endpos: bytepos,
fileid,
line,
});
separated = false;
} else {
let endpos = if startpos + 10 < datalen {
Expand Down Expand Up @@ -683,6 +699,11 @@ mod tests {
let tokresult = tokenize("testcase".to_string(), 0, &data).expect("Error");
assert_eq!(tokresult.tokens.len(), 1);
assert_eq!(tokresult.tokens[0].ttype, A2lTokenType::Number);

let data = String::from("0ident");
let tokresult = tokenize("testcase".to_string(), 0, &data).expect("Error");
assert_eq!(tokresult.tokens.len(), 1);
assert_eq!(tokresult.tokens[0].ttype, A2lTokenType::Identifier);
}

#[test]
Expand Down
3 changes: 1 addition & 2 deletions src/writer.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::cmp::Ordering;
use std::collections::HashSet;
use std::convert::TryInto;
use std::fmt::Write;

use crate::specification::BlockInfo;
Expand Down Expand Up @@ -84,7 +83,7 @@ impl Writer {
where
T: std::convert::Into<f64>,
{
let value_conv = value.try_into().unwrap();
let value_conv = value.into();
self.add_whitespace(offset);
if value_conv == 0f64 {
self.outstring.push('0');
Expand Down