From 223c95f6fcfcd4ee5740834555076fe5bf636a50 Mon Sep 17 00:00:00 2001 From: Denys Zadorozhnyi Date: Tue, 21 Jan 2025 18:10:41 +0200 Subject: [PATCH] fix: relax `Ident` and `ProcedureName` validation, use checked constructors in deserialization. --- assembly/src/ast/ident.rs | 5 +++- assembly/src/ast/procedure/name.rs | 20 +++++-------- assembly/src/library/path.rs | 45 ++++++++++-------------------- 3 files changed, 26 insertions(+), 44 deletions(-) diff --git a/assembly/src/ast/ident.rs b/assembly/src/ast/ident.rs index 399f63a18..7c8750743 100644 --- a/assembly/src/ast/ident.rs +++ b/assembly/src/ast/ident.rs @@ -108,7 +108,10 @@ impl Ident { if !source.starts_with(|c: char| c.is_ascii_alphabetic()) { return Err(IdentError::InvalidStart); } - if !source.chars().all(|c| c.is_ascii_alphabetic() || matches!(c, '_' | '0'..='9')) { + if !source + .chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '_' | ':' | '/' | '@' | '.')) + { return Err(IdentError::InvalidChars { ident: source.into() }); } Ok(()) diff --git a/assembly/src/ast/procedure/name.rs b/assembly/src/ast/procedure/name.rs index 9eb616429..3506cb931 100644 --- a/assembly/src/ast/procedure/name.rs +++ b/assembly/src/ast/procedure/name.rs @@ -333,10 +333,10 @@ impl FromStr for ProcedureName { break Err(IdentError::InvalidChars { ident: s.into() }); } }, - Some((_, c)) if c.is_ascii_lowercase() || c == '_' || c == '$' => { + Some((_, c)) if c.is_ascii_lowercase() || c == '_' || c == '$' || c == '.' => { if chars.as_str().contains(|c| match c { c if c.is_ascii_alphanumeric() => false, - '_' | '$' => false, + '_' | '$' | '.' => false, _ => true, }) { Err(IdentError::InvalidChars { ident: s.into() }) @@ -360,8 +360,8 @@ impl Serializable for ProcedureName { impl Deserializable for ProcedureName { fn read_from(source: &mut R) -> Result { let str: String = source.read()?; - let proc_name = - ProcedureName::new_unchecked(Ident::new_unchecked(Span::unknown(Arc::from(str)))); + let proc_name = ProcedureName::new(str) + .map_err(|e| DeserializationError::InvalidValue(e.to_string()))?; Ok(proc_name) } } @@ -377,15 +377,9 @@ impl proptest::prelude::Arbitrary for ProcedureName { use proptest::prelude::*; // see https://doc.rust-lang.org/rustc/symbol-mangling/v0.html#symbol-grammar-summary let all_possible_chars_in_mangled_name = - "$abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_."; - let mangled_rustc_name = ProcedureName::new_unchecked(Ident::new_unchecked(Span::new( - SourceSpan::UNKNOWN, - all_possible_chars_in_mangled_name.into(), - ))); - let plain = ProcedureName::new_unchecked(Ident::new_unchecked(Span::new( - SourceSpan::UNKNOWN, - "userfunc".into(), - ))); + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_.$"; + let mangled_rustc_name = ProcedureName::new(all_possible_chars_in_mangled_name).unwrap(); + let plain = ProcedureName::new("user_func").unwrap(); prop_oneof![Just(mangled_rustc_name), Just(plain)].boxed() } diff --git a/assembly/src/library/path.rs b/assembly/src/library/path.rs index b822d7186..a07e92d14 100644 --- a/assembly/src/library/path.rs +++ b/assembly/src/library/path.rs @@ -10,7 +10,6 @@ use core::{ }; use smallvec::smallvec; -use vm_core::debuginfo::SourceSpan; use crate::{ ast::{Ident, IdentError}, @@ -506,26 +505,23 @@ impl Deserializable for LibraryPath { let path = source.read_slice(len)?; let path = str::from_utf8(path).map_err(|e| DeserializationError::InvalidValue(e.to_string()))?; - let path = LibraryPath::new(path).unwrap_or_else(|_| { + LibraryPath::new(path).or_else(|_| { // Try to parse at least the namespace match LibraryNamespace::strip_path_prefix(path) { Ok((ns, rest)) => { - let module_id = Ident::new_unchecked(Span::new( - SourceSpan::default(), - Arc::from(rest.to_string()), - )); - LibraryPath::new_from_components(ns, [module_id]) + let module_id = Ident::new(rest).map_err(|e| { + DeserializationError::InvalidValue(format!("Invalid module id: {}", e)) + })?; + Ok(LibraryPath::new_from_components(ns, [module_id])) }, Err(_) => { - let module_id = Ident::new_unchecked(Span::new( - SourceSpan::default(), - Arc::from(path.to_string()), - )); - LibraryPath::new_from_components(LibraryNamespace::Anon, [module_id]) + let module_id = Ident::new(path).map_err(|e| { + DeserializationError::InvalidValue(format!("Invalid module id: {}", e)) + })?; + Ok(LibraryPath::new_from_components(LibraryNamespace::Anon, [module_id])) }, } - }); - Ok(path) + }) } } @@ -560,23 +556,18 @@ impl proptest::prelude::Arbitrary for LibraryPath { fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy { use proptest::prelude::*; + let wasm_cm_style = LibraryPath::new_from_components( LibraryNamespace::Anon, - [Ident::new_unchecked(Span::new( - SourceSpan::UNKNOWN, - "namespace:package/interface@1.0.0".into(), - ))], + [Ident::new("namespace:package/interface@1.0.0").unwrap()], ); let path_len_2 = LibraryPath::new_from_components( - LibraryNamespace::User("userns".into()), - [Ident::new_unchecked(Span::new(SourceSpan::UNKNOWN, "usermodule".into()))], + LibraryNamespace::User("user_ns".into()), + [Ident::new("user_module").unwrap()], ); let path_len_3 = LibraryPath::new_from_components( LibraryNamespace::User("userns".into()), - [ - Ident::new_unchecked(Span::new(SourceSpan::UNKNOWN, "userns2".into())), - Ident::new_unchecked(Span::new(SourceSpan::UNKNOWN, "usermodule".into())), - ], + [Ident::new("user_path1").unwrap(), Ident::new("user_module").unwrap()], ); prop_oneof![Just(wasm_cm_style), Just(path_len_2), Just(path_len_3)].boxed() } @@ -634,12 +625,6 @@ mod tests { let path = LibraryPath::new("foo::1bar"); assert_matches!(path, Err(PathError::InvalidComponent(IdentError::InvalidStart))); - let path = LibraryPath::new("foo::b@r"); - assert_matches!( - path, - Err(PathError::InvalidComponent(IdentError::InvalidChars { ident: _ })) - ); - let path = LibraryPath::new("#foo::bar"); assert_matches!( path,