Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce the nom-language crate #1792

Merged
merged 8 commits into from
Dec 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ default-features = false
[dev-dependencies]
doc-comment = "0.3"
proptest = "=1.0.0"

nom-language = { path = "./nom-language" }

[package.metadata.docs.rs]
features = ["alloc", "std", "docsrs"]
Expand All @@ -66,6 +66,10 @@ name = "css"
[[test]]
name = "custom_errors"

[[test]]
name = "expression_ast"
required-features = ["alloc"]

[[test]]
name = "float"

Expand Down Expand Up @@ -142,4 +146,4 @@ coveralls = { repository = "Geal/nom", branch = "main", service = "github" }
maintenance = { status = "actively-developed" }

[workspace]
members = [".", "benchmarks/"]
members = [".", "benchmarks/", "nom-language"]
1 change: 1 addition & 0 deletions benchmarks/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,4 @@ harness = false

[dev-dependencies]
codspeed-criterion-compat = "2.4.1"
nom-language = { path = "../nom-language" }
3 changes: 2 additions & 1 deletion benchmarks/benches/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@ use nom::{
bytes::{tag, take},
character::{anychar, char, multispace0, none_of},
combinator::{map, map_opt, map_res, value, verify},
error::{Error, ErrorKind, FromExternalError, ParseError, VerboseError},
error::{Error, ErrorKind, FromExternalError, ParseError},
multi::{fold, separated_list0},
number::double,
number::recognize_float,
sequence::{delimited, preceded, separated_pair},
Check, Complete, Emit, IResult, Mode, OutputM, Parser,
};
use nom_language::error::VerboseError;

use std::{collections::HashMap, marker::PhantomData, num::ParseIntError};

Expand Down
1 change: 1 addition & 0 deletions doc/choosing_a_combinator.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ The following parsers could be found on [docs.rs number section](https://docs.rs

- [`escaped`](https://docs.rs/nom/latest/nom/bytes/complete/fn.escaped.html): Matches a byte string with escaped characters
- [`escaped_transform`](https://docs.rs/nom/latest/nom/bytes/complete/fn.escaped_transform.html): Matches a byte string with escaped characters, and returns a new string with the escaped characters replaced
- [`precedence`](https://docs.rs/nom/latest/nom/precedence/fn.precedence.html): Parses an expression with regards to operator precedence

## Binary format parsing

Expand Down
3 changes: 2 additions & 1 deletion examples/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ use nom::{
bytes::complete::{escaped, tag, take_while},
character::complete::{alphanumeric1 as alphanumeric, char, one_of},
combinator::{cut, map, opt, value},
error::{context, convert_error, ContextError, ErrorKind, ParseError, VerboseError},
error::{context, ContextError, ErrorKind, ParseError},
multi::separated_list0,
number::complete::double,
sequence::{delimited, preceded, separated_pair, terminated},
Err, IResult, Parser,
};
use nom_language::error::{convert_error, VerboseError};
use std::collections::HashMap;
use std::str;

Expand Down
3 changes: 2 additions & 1 deletion examples/s_expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ use nom::{
bytes::complete::tag,
character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of},
combinator::{cut, map, map_res, opt},
error::{context, VerboseError},
error::context,
multi::many,
sequence::{delimited, preceded, terminated},
IResult, Parser,
};
use nom_language::error::VerboseError;

/// We start by defining the types that define the shape of data that we want.
/// In this case, we want something tree-like
Expand Down
11 changes: 11 additions & 0 deletions nom-language/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[package]
name = "nom-language"
version = "0.0.1"
authors = ["[email protected]"]
description = "Language parsing focused combinators for the nom parser library"
edition = "2021"
license = "MIT"
repository = "https://github.com/rust-bakery/nom"

[dependencies]
nom = { path = "..", version = "8.0.0-alpha2" }
262 changes: 262 additions & 0 deletions nom-language/src/error.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,262 @@
use std::fmt;

use nom::{
error::{ContextError, ErrorKind, FromExternalError, ParseError},
ErrorConvert,
};

/// This error type accumulates errors and their position when backtracking
/// through a parse tree. With some post processing,
/// it can be used to display user friendly error messages
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct VerboseError<I> {
/// List of errors accumulated by `VerboseError`, containing the affected
/// part of input data, and some context
pub errors: Vec<(I, VerboseErrorKind)>,
}

#[derive(Clone, Debug, Eq, PartialEq)]
/// Error context for `VerboseError`
pub enum VerboseErrorKind {
/// Static string added by the `context` function
Context(&'static str),
/// Indicates which character was expected by the `char` function
Char(char),
/// Error kind given by various nom parsers
Nom(ErrorKind),
}

impl<I> ParseError<I> for VerboseError<I> {
fn from_error_kind(input: I, kind: ErrorKind) -> Self {

Check warning on line 30 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L30

Added line #L30 was not covered by tests
VerboseError {
errors: vec![(input, VerboseErrorKind::Nom(kind))],

Check warning on line 32 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L32

Added line #L32 was not covered by tests
}
}

fn append(input: I, kind: ErrorKind, mut other: Self) -> Self {
other.errors.push((input, VerboseErrorKind::Nom(kind)));
other

Check warning on line 38 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L36-L38

Added lines #L36 - L38 were not covered by tests
}

fn from_char(input: I, c: char) -> Self {
VerboseError {
errors: vec![(input, VerboseErrorKind::Char(c))],
}
}
}

impl<I> ContextError<I> for VerboseError<I> {
fn add_context(input: I, ctx: &'static str, mut other: Self) -> Self {
other.errors.push((input, VerboseErrorKind::Context(ctx)));
other

Check warning on line 51 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L49-L51

Added lines #L49 - L51 were not covered by tests
}
}

impl<I, E> FromExternalError<I, E> for VerboseError<I> {
/// Create a new error from an input position and an external error
fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self {
Self::from_error_kind(input, kind)

Check warning on line 58 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L57-L58

Added lines #L57 - L58 were not covered by tests
}
}

impl<I: fmt::Display> fmt::Display for VerboseError<I> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "Parse error:")?;
for (input, error) in &self.errors {
match error {
VerboseErrorKind::Nom(e) => writeln!(f, "{:?} at: {}", e, input)?,
VerboseErrorKind::Char(c) => writeln!(f, "expected '{}' at: {}", c, input)?,
VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?,

Check warning on line 69 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L63-L69

Added lines #L63 - L69 were not covered by tests
}
}

Ok(())

Check warning on line 73 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L73

Added line #L73 was not covered by tests
}
}

impl<I: fmt::Debug + fmt::Display> std::error::Error for VerboseError<I> {}

impl From<VerboseError<&[u8]>> for VerboseError<Vec<u8>> {
fn from(value: VerboseError<&[u8]>) -> Self {

Check warning on line 80 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L80

Added line #L80 was not covered by tests
VerboseError {
errors: value

Check warning on line 82 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L82

Added line #L82 was not covered by tests
.errors
.into_iter()
.map(|(i, e)| (i.to_owned(), e))
.collect(),
}
}
}

impl From<VerboseError<&str>> for VerboseError<String> {
fn from(value: VerboseError<&str>) -> Self {

Check warning on line 92 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L92

Added line #L92 was not covered by tests
VerboseError {
errors: value

Check warning on line 94 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L94

Added line #L94 was not covered by tests
.errors
.into_iter()
.map(|(i, e)| (i.to_owned(), e))
.collect(),
}
}
}

impl<I> ErrorConvert<VerboseError<I>> for VerboseError<(I, usize)> {
fn convert(self) -> VerboseError<I> {

Check warning on line 104 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L104

Added line #L104 was not covered by tests
VerboseError {
errors: self.errors.into_iter().map(|(i, e)| (i.0, e)).collect(),

Check warning on line 106 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L106

Added line #L106 was not covered by tests
}
}
}

impl<I> ErrorConvert<VerboseError<(I, usize)>> for VerboseError<I> {
fn convert(self) -> VerboseError<(I, usize)> {

Check warning on line 112 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L112

Added line #L112 was not covered by tests
VerboseError {
errors: self.errors.into_iter().map(|(i, e)| ((i, 0), e)).collect(),

Check warning on line 114 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L114

Added line #L114 was not covered by tests
}
}
}

/// Transforms a `VerboseError` into a trace with input position information
///
/// The errors contain references to input data that must come from `input`,
/// because nom calculates byte offsets between them
pub fn convert_error<I: core::ops::Deref<Target = str>>(input: I, e: VerboseError<I>) -> String {
use nom::Offset;
use std::fmt::Write;

let mut result = String::new();

for (i, (substring, kind)) in e.errors.iter().enumerate() {
let offset = input.offset(substring);

if input.is_empty() {
match kind {
VerboseErrorKind::Char(c) => {
write!(&mut result, "{}: expected '{}', got empty input\n\n", i, c)

Check warning on line 135 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L133-L135

Added lines #L133 - L135 were not covered by tests
}
VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s),
VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e),

Check warning on line 138 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L137-L138

Added lines #L137 - L138 were not covered by tests
}
} else {
let prefix = &input.as_bytes()[..offset];

// Count the number of newlines in the first `offset` bytes of input
let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1;

// Find the line that includes the subslice:
// Find the *last* newline before the substring starts
let line_begin = prefix
.iter()
.rev()
.position(|&b| b == b'\n')
.map(|pos| offset - pos)

Check warning on line 152 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L152

Added line #L152 was not covered by tests
.unwrap_or(0);

// Find the full line after that newline
let line = input[line_begin..]
.lines()
.next()
.unwrap_or(&input[line_begin..])
.trim_end();

// The (1-indexed) column number is the offset of our substring into that line
let column_number = line.offset(substring) + 1;

match kind {
VerboseErrorKind::Char(c) => {
if let Some(actual) = substring.chars().next() {
write!(
&mut result,

Check warning on line 169 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L168-L169

Added lines #L168 - L169 were not covered by tests
"{i}: at line {line_number}:\n\
{line}\n\
{caret:>column$}\n\
expected '{expected}', found {actual}\n\n",
i = i,
line_number = line_number,
line = line,
caret = '^',
column = column_number,
expected = c,
actual = actual,

Check warning on line 180 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L174-L180

Added lines #L174 - L180 were not covered by tests
)
} else {
write!(
&mut result,

Check warning on line 184 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L184

Added line #L184 was not covered by tests
"{i}: at line {line_number}:\n\
{line}\n\
{caret:>column$}\n\
expected '{expected}', got end of input\n\n",
i = i,
line_number = line_number,
line = line,
caret = '^',
column = column_number,
expected = c,

Check warning on line 194 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L189-L194

Added lines #L189 - L194 were not covered by tests
)
}
}
VerboseErrorKind::Context(s) => write!(
&mut result,

Check warning on line 199 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L198-L199

Added lines #L198 - L199 were not covered by tests
"{i}: at line {line_number}, in {context}:\n\
{line}\n\
{caret:>column$}\n\n",
i = i,
line_number = line_number,
context = s,
line = line,
caret = '^',
column = column_number,

Check warning on line 208 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L203-L208

Added lines #L203 - L208 were not covered by tests
),
VerboseErrorKind::Nom(e) => write!(
&mut result,

Check warning on line 211 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L210-L211

Added lines #L210 - L211 were not covered by tests
"{i}: at line {line_number}, in {nom_err:?}:\n\
{line}\n\
{caret:>column$}\n\n",
i = i,
line_number = line_number,
nom_err = e,
line = line,
caret = '^',
column = column_number,

Check warning on line 220 in nom-language/src/error.rs

View check run for this annotation

Codecov / codecov/patch

nom-language/src/error.rs#L215-L220

Added lines #L215 - L220 were not covered by tests
),
}
}
// Because `write!` to a `String` is infallible, this `unwrap` is fine.
.unwrap();
}

result
}

#[test]
fn convert_error_panic() {
use nom::character::complete::char;
use nom::IResult;

let input = "";

let _result: IResult<_, _, VerboseError<&str>> = char('x')(input);
}

#[test]
fn issue_1027_convert_error_panic_nonempty() {
use nom::character::complete::char;
use nom::sequence::pair;
use nom::Err;
use nom::IResult;
use nom::Parser;

let input = "a";

let result: IResult<_, _, VerboseError<&str>> = pair(char('a'), char('b')).parse(input);
let err = match result.unwrap_err() {
Err::Error(e) => e,
_ => unreachable!(),
};

let msg = convert_error(input, err);
assert_eq!(
msg,
"0: at line 1:\na\n ^\nexpected \'b\', got end of input\n\n"
);
}
9 changes: 9 additions & 0 deletions nom-language/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
//! # Langage parsing combinators for the nom parser combinators library
//!
//! nom is a parser combinator library with a focus on safe parsing,
//! streaming patterns, and zero copy.
//! While nom provides general purpose combinators, this crate is targeted
//! at language parsing.

pub mod error;
pub mod precedence;
Loading
Loading