From 424e3972f3ec7c5780dee5cec9479268b43572cb Mon Sep 17 00:00:00 2001 From: Tom Kunc Date: Tue, 3 May 2022 00:05:00 +1000 Subject: [PATCH 01/11] Initial commit for feedback. --- doc/nom-guide/.gitignore | 1 + doc/nom-guide/book.toml | 6 ++ doc/nom-guide/scripts/build.sh | 6 ++ doc/nom-guide/src/SUMMARY.md | 17 ++++ doc/nom-guide/src/chapter_1.md | 82 ++++++++++++++++++++ doc/nom-guide/src/chapter_2.md | 105 +++++++++++++++++++++++++ doc/nom-guide/src/chapter_3.md | 124 ++++++++++++++++++++++++++++++ doc/nom-guide/src/chapter_4.md | 1 + doc/nom-guide/src/introduction.md | 31 ++++++++ doc/nom-guide/src/todo.md | 1 + 10 files changed, 374 insertions(+) create mode 100644 doc/nom-guide/.gitignore create mode 100644 doc/nom-guide/book.toml create mode 100755 doc/nom-guide/scripts/build.sh create mode 100644 doc/nom-guide/src/SUMMARY.md create mode 100644 doc/nom-guide/src/chapter_1.md create mode 100644 doc/nom-guide/src/chapter_2.md create mode 100644 doc/nom-guide/src/chapter_3.md create mode 100644 doc/nom-guide/src/chapter_4.md create mode 100644 doc/nom-guide/src/introduction.md create mode 100644 doc/nom-guide/src/todo.md diff --git a/doc/nom-guide/.gitignore b/doc/nom-guide/.gitignore new file mode 100644 index 00000000..7585238e --- /dev/null +++ b/doc/nom-guide/.gitignore @@ -0,0 +1 @@ +book diff --git a/doc/nom-guide/book.toml b/doc/nom-guide/book.toml new file mode 100644 index 00000000..500119ad --- /dev/null +++ b/doc/nom-guide/book.toml @@ -0,0 +1,6 @@ +[book] +authors = ["Tom Kunc"] +language = "en" +multilingual = false +src = "src" +title = "The Nom Guide (Nominomicon)" diff --git a/doc/nom-guide/scripts/build.sh b/doc/nom-guide/scripts/build.sh new file mode 100755 index 00000000..1a0764a2 --- /dev/null +++ b/doc/nom-guide/scripts/build.sh @@ -0,0 +1,6 @@ +#!/bin/bash +BOOK_ROOT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/.." +cd $BOOK_ROOT_PATH + +[[ ! -e $BOOK_ROOT_PATH/../../target ]] && (cd ../../ && cargo build) +mdbook test -L $(cd ../../ && pwd)/target/debug/deps/ diff --git a/doc/nom-guide/src/SUMMARY.md b/doc/nom-guide/src/SUMMARY.md new file mode 100644 index 00000000..65983d6c --- /dev/null +++ b/doc/nom-guide/src/SUMMARY.md @@ -0,0 +1,17 @@ +## Summary + +[Introduction](./introduction.md) + +- [Chapter 1: The Nom Way](./chapter_1.md) +- [Chapter 2: Tags and Character Classes](./chapter_2.md) +- [Chapter 3: Alternatives and Composition](./chapter_3.md) +- [Chapter 4: Custom Outputs from Functions](./chapter_4.md) +- [Chapter 5: Parsing Functions](./todo.md) +- [Chapter 6: Repeated Inputs](./todo.md) +- [Chapter 7: Simple Exercises](./todo.md) +- [Chapter 8: Custom Errors in Functions](./todo.md) +- [Chapter 9: Modifiers](./todo.md) +- [Chapter 10: Characters vs. Bytes](./todo.md) +- [Chapter 11: Streaming vs. Complete](./todo.md) +- [Chapter 12: Complex Exercises](./todo.md) + diff --git a/doc/nom-guide/src/chapter_1.md b/doc/nom-guide/src/chapter_1.md new file mode 100644 index 00000000..bb9a0004 --- /dev/null +++ b/doc/nom-guide/src/chapter_1.md @@ -0,0 +1,82 @@ +# Chapter 1: The Nom Way + +First of all, we need to understand the way that regexes and nom think about +parsing. + +A regex, in a sense, controls its whole input. Given a single input, +it decides that either some text **did** match the regex, or it **didn't**. + +```text + ┌────────┐ ┌─► Some text that matched the regex + my input───►│my regex├──►either──┤ + └────────┘ └─► None +``` + +As we mentioned above, Nom parsers are designed to be combined. +This makes the assumption that a regex controls its entire input +more difficult to maintain. So, there are three important changes +required to our mental model of a regex. + +1. Rather than just returning the text that matched + the regex, Nom tells you *both* what it parsed, and what is left + to parse. + +2. Additionally, to help with combining parsers, Nom also gives you + error information about your parser. We'll talk about this more later, + for now let's assume it's "basically" the same as the `None` we have above. + + Points 1 and 2 are illustrated in the diagram below: + +```text + ┌─► Ok( + │ text that the parser didn't touch, + │ text that matched the regex + │ ) + ┌─────────┐ │ + my input───►│my parser├──►either──┤ + └─────────┘ └─► Err(...) +``` + +3. Lastly, Nom parsers are normally anchored to the beginning of their input. + In other words, if you converted a Nom parser to regex, it would generally + begin with `/^/`. This is sensible, because it means that nom parsers must + (conceptually) be sequential -- your parser isn't going to jump + ahead and start parsing the middle of the line. + + +To represent this model of the world, nom uses the `IResult<(I, O)>` type. +The `Ok` variant has a tuple of `(remaining_input: I, output: O)`; +The `Err` variant stores an error. You can import that from: + +```rust +# extern crate nom; +use nom::IResult; +``` + +The simplest parser we can write is one which successfully does nothing. +In other words, the regex `/^/`. + +This parser should take in an `&str`. + - Since it is supposed to succeed, we know it will return the Ok Variant. + - Since it does nothing to our input, the remaining input is the same as the input. + - Since it doesn't do anything, it also should just return the unit type. + + +In other words, this code should be equivalent to the regex `/^/`. + +```rust +# extern crate nom; +# use nom::IResult; + +pub fn do_nothing_parser(input: &str) -> IResult<&str, ()> { + Ok((input, ())) +} + +match do_nothing_parser("my_input") { + Ok((remaining_input, output)) => { + assert_eq!(remaining_input, "my_input"); + assert_eq!(output, ()); + }, + Err(_) => unreachable!() +} +``` diff --git a/doc/nom-guide/src/chapter_2.md b/doc/nom-guide/src/chapter_2.md new file mode 100644 index 00000000..2d66c64b --- /dev/null +++ b/doc/nom-guide/src/chapter_2.md @@ -0,0 +1,105 @@ +# Chapter 2: Tags and Character Classes + +The simplest _useful_ regex you can write is one which +has no special characters, it just matches a string. + +Imagine, for example, the regex `/abc/`. It simply matches when the string +`"abc"` occurs. + +In `nom`, we call a simple collection of bytes a tag. Because +these are so common, there already exists a function called `tag()`. +This function returns a parser for a given string. + +
+ **Warning**: `nom` has multiple different definitions of `tag`, make sure you use this one for the
+ moment!
+
+ +```rust +# extern crate nom; +pub use nom::bytes::complete::tag; +``` + +For example, the regex `/abc/` (really, the regex `/^abc/`) +could be represented as `tag("abc")`. + +Note, that the function `tag` will return +another function, namely, a parser for the tag you requested. + +Below, we see a function using this: + +```rust +# extern crate nom; +# pub use nom::bytes::complete::tag; +# pub use nom::IResult; + +fn parse_input(input: &str) -> IResult<&str, &str> { + // note that this is really creating a function, the parser for abc + // vvvvv + // which is then called here, returning an IResult<&str, &str> + // vvvvv + tag("abc")(input) +} + + let ok_input = "abcWorld"; + + match parse_input(ok_input) { + Ok((leftover_input, output)) => { + assert_eq!(leftover_input, "World"); + assert_eq!(output, "abc"); + }, + Err(_) => unreachable!() + } + + let err_input = "defWorld"; + match parse_input(err_input) { + Ok((leftover_input, output)) => unreachable!(), + Err(_) => assert!(true), + } +``` + +If you'd like to, you can also check case insensitive `/tag/i` +with the `tag_case_insensitive`. + +## Character Classes + +Tags are incredibly useful, but they are also incredibly restrictive. +The other end of Nom's functionality is pre-written parsers that allow us to accept any of a group of characters, +rather than just accepting characters in a defined sequence. + +Here is a selection of them: + +- [`alpha0`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha0.html): Recognizes zero or more lowercase and uppercase alphabetic characters: `/[a-zA-Z]/`. [`alpha1`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha1.html) does the same but returns at least one character +- [`alphanumeric0`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric0.html): Recognizes zero or more numerical and alphabetic characters: `/[0-9a-zA-Z]/`. [`alphanumeric1`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric1.html) does the same but returns at least one character +- [`digit0`](https://docs.rs/nom/latest/nom/character/complete/fn.digit0.html): Recognizes zero or more numerical characters: `/[0-9]/`. [`digit1`](https://docs.rs/nom/latest/nom/character/complete/fn.digit1.html) does the same but returns at least one character +- [`multispace0`](https://docs.rs/nom/latest/nom/character/complete/fn.multispace0.html): Recognizes zero or more spaces, tabs, carriage returns and line feeds. [`multispace1`](https://docs.rs/nom/latest/nom/character/complete/fn.multispace1.html) does the same but returns at least one character +- [`space0`](https://docs.rs/nom/latest/nom/character/complete/fn.space0.html): Recognizes zero or more spaces and tabs. [`space1`](https://docs.rs/nom/latest/nom/character/complete/fn.space1.html) does the same but returns at least one character +- [`line_ending`](https://docs.rs/nom/latest/nom/character/complete/fn.line_ending.html): Recognizes an end of line (both `\n` and `\r\n`) +- [`newline`](https://docs.rs/nom/latest/nom/character/complete/fn.newline.html): Matches a newline character `\n` +- [`tab`](https://docs.rs/nom/latest/nom/character/complete/fn.tab.html): Matches a tab character `\t` + + +We can use these in +```rust +# extern crate nom; +# pub use nom::IResult; +pub use nom::character::complete::alpha0; +fn parser(input: &str) -> IResult<&str, &str> { + alpha0(input) +} + + let ok_input = "abc123"; + match parser(ok_input) { + Ok((remaining, letters)) => { + assert_eq!(remaining, "123"); + assert_eq!(letters, "abc"); + }, + Err(_) => unreachable!() + } + +``` + +One important note is that, due to the type signature of these functions, +it is generally best to use them within a function that returns an `IResult`. + +*TODO* : Better explaination of why. diff --git a/doc/nom-guide/src/chapter_3.md b/doc/nom-guide/src/chapter_3.md new file mode 100644 index 00000000..03cfcfd1 --- /dev/null +++ b/doc/nom-guide/src/chapter_3.md @@ -0,0 +1,124 @@ +# Chapter 3: Alternatives and Composition + +In the last chapter, we saw how to convert a simple regex into a nom parser. +In this chapter, we explore features two other very important features of Nom, +alternatives, and composition. + +## Alternatives + +In regex, we can write `/(^abc|^def)/`, which means "match either `/^abc/` or `/^def/`". +Nom gives us a similar ability through the `alt()` combinator. + +```rust +# extern crate nom; +use nom::branch::alt; +``` + +The `alt()` combinator will execute each parser in a tuple until it finds one +that does not error. If all error, then by default you are given the error from +the last error. +We can see a basic example of `alt()` below. + +```rust +# extern crate nom; +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::IResult; + +fn parse_abc_or_def(input: &str) -> IResult<&str, &str> { + alt(( + tag("abc"), + tag("def") + ))(input) +} + + match parse_abc_or_def("abcWorld") { + Ok((leftover_input, output)) => { + assert_eq!(leftover_input, "World"); + assert_eq!(output, "abc"); + }, + Err(_) => unreachable!() + } + + match parse_abc_or_def("ghiWorld") { + Ok((leftover_input, output)) => unreachable!(), + Err(_) => assert!(true), + } +``` + +## Composition + +Now that we can create more interesting regexes, we can compose them together. +The simplest way to do this is just to evaluate them in sequence: + +```rust +# extern crate nom; +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::IResult; + +fn parse_abc(input: &str) -> IResult<&str, &str> { + tag("abc")(input) +} +fn parse_def_or_ghi(input: &str) -> IResult<&str, &str> { + alt(( + tag("def"), + tag("ghi") + ))(input) +} + + let input = "abcghi"; + if let Ok((remainder, abc)) = parse_abc(input) { + if let Ok((remainder, def_or_ghi)) = parse_def_or_ghi(remainder) { + println!("first parsed: {abc}; then parsed: {def_or_ghi};"); + } + } + +``` + +Composing tags is such a common requirement that, in fact, Nom has a few built in +combinators to do it. The simplest of these is `tuple()`. The `tuple()` combinator takes a tuple of parsers, +and either returns `Ok` with a tuple of all of their successful parses, or it +returns the `Err` of the first failed parser. + +```rust +# extern crate nom; +use nom::branch::alt; +use nom::bytes::complete::{tag}; +use nom::character::complete::{digit1}; +use nom::IResult; + +fn parse_numbers_or_abc(input: &str) -> IResult<&str, &str> { + alt(( + tag("abc"), + digit1 + ))(input) +} + + + let input = "abc"; + let parsed_input = parse_numbers_or_abc(input); + match parsed_input { + Ok((_, matched_str)) => assert_eq!(matched_str, "abc"), + Err(_) => unreachable!() + } + + + let input = "def"; + let parsed_input = parse_numbers_or_abc(input); + match parsed_input { + Ok(_) => unreachable!(), + Err(_) => assert!(true) + } +``` + + +## Extra Nom Tools + +After using `alt()` and `tuple()`, you might also be interested in the `permutation()` parser, which +requires all of the parsers it contains to succeed, but in any order. + +```rust +# extern crate nom; +use nom::branch::permutation; +``` diff --git a/doc/nom-guide/src/chapter_4.md b/doc/nom-guide/src/chapter_4.md new file mode 100644 index 00000000..989179cc --- /dev/null +++ b/doc/nom-guide/src/chapter_4.md @@ -0,0 +1 @@ +# Chapter 4: Custom Outputs from Functions diff --git a/doc/nom-guide/src/introduction.md b/doc/nom-guide/src/introduction.md new file mode 100644 index 00000000..19733e4c --- /dev/null +++ b/doc/nom-guide/src/introduction.md @@ -0,0 +1,31 @@ +# The Nom Guide + +Welcome to The Nom Guide (or, the nominomicon); a guide to using the Nom parser for great good. +This guide is written to take you from an understanding of Regular Expressions, to an understanding +of Nom. + +This guide assumes that you are: + - Wanting to learn Nom, + - Already familiar with regular expressions (at least, somewhat), and + - Already familiar with Rust. + +Nom is a parser-combinator library. In other words, it gives you tools to define: + - "parsers" (a function that takes an input, and gives back an output), and + - "combinators" (functions that take parsers, and _combine_ them together!). + +By combining parsers with combinators, you can build complex parsers up from +simpler ones. These complex parsers are enough to understand HTML, mkv or Python! + +Before we set off, it's important to list some caveats: + - This guide is for Nom7. Nom has undergone significant changes, so if + you are searching for documentation or StackOverflow answers, you may + find older documentation. Some common indicators that it is an old version are: + - Documentation older than 21st August, 2021 + - Use of the `named!` macro + - Use of `CompleteStr` or `CompleteByteArray`. + - Nom can parse (almost) anything; but this guide will focus entirely on parsing + complete `&str` into things. + +And finally, some nomenclature: + - In this guide, regexes will be denoted inside slashes (for example `/abc/`) + to distinguish them from regular strings. diff --git a/doc/nom-guide/src/todo.md b/doc/nom-guide/src/todo.md new file mode 100644 index 00000000..9bdc15ed --- /dev/null +++ b/doc/nom-guide/src/todo.md @@ -0,0 +1 @@ +# To Be Completed From fd4a9d3261259be19ce4e5e8a9378538458da98e Mon Sep 17 00:00:00 2001 From: Tom Kunc Date: Fri, 1 Jul 2022 21:07:09 +1000 Subject: [PATCH 02/11] Add chapters 4 thru 7 --- doc/nom-guide/scripts/build.sh | 5 + doc/nom-guide/src/SUMMARY.md | 14 ++- doc/nom-guide/src/chapter_1.md | 91 ++++++++--------- doc/nom-guide/src/chapter_2.md | 84 +++++++++------- doc/nom-guide/src/chapter_3.md | 104 +++++++++++-------- doc/nom-guide/src/chapter_4.md | 162 +++++++++++++++++++++++++++++- doc/nom-guide/src/chapter_5.md | 64 ++++++++++++ doc/nom-guide/src/chapter_6.md | 39 +++++++ doc/nom-guide/src/chapter_7.md | 10 ++ doc/nom-guide/src/introduction.md | 17 ++-- 10 files changed, 439 insertions(+), 151 deletions(-) create mode 100644 doc/nom-guide/src/chapter_5.md create mode 100644 doc/nom-guide/src/chapter_6.md create mode 100644 doc/nom-guide/src/chapter_7.md diff --git a/doc/nom-guide/scripts/build.sh b/doc/nom-guide/scripts/build.sh index 1a0764a2..f08c34e2 100755 --- a/doc/nom-guide/scripts/build.sh +++ b/doc/nom-guide/scripts/build.sh @@ -1,6 +1,11 @@ #!/bin/bash +command="build" + +[[ "$1" == "serve" ]] && command="serve" + BOOK_ROOT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/.." cd $BOOK_ROOT_PATH [[ ! -e $BOOK_ROOT_PATH/../../target ]] && (cd ../../ && cargo build) mdbook test -L $(cd ../../ && pwd)/target/debug/deps/ +mdbook $command diff --git a/doc/nom-guide/src/SUMMARY.md b/doc/nom-guide/src/SUMMARY.md index 65983d6c..e38d56ae 100644 --- a/doc/nom-guide/src/SUMMARY.md +++ b/doc/nom-guide/src/SUMMARY.md @@ -6,12 +6,10 @@ - [Chapter 2: Tags and Character Classes](./chapter_2.md) - [Chapter 3: Alternatives and Composition](./chapter_3.md) - [Chapter 4: Custom Outputs from Functions](./chapter_4.md) -- [Chapter 5: Parsing Functions](./todo.md) -- [Chapter 6: Repeated Inputs](./todo.md) -- [Chapter 7: Simple Exercises](./todo.md) -- [Chapter 8: Custom Errors in Functions](./todo.md) -- [Chapter 9: Modifiers](./todo.md) -- [Chapter 10: Characters vs. Bytes](./todo.md) -- [Chapter 11: Streaming vs. Complete](./todo.md) -- [Chapter 12: Complex Exercises](./todo.md) +- [Chapter 5: Repeating with Predicates](./chapter_5.md) +- [Chapter 6: Repeating Parsers](./chapter_6.md) +- [Chapter 7: Using Errors from Outside Nom](./chapter_7.md) +- [Chapter 8: Streaming vs. Complete](./todo.md) +- [Chapter 9: Characters vs. Bytes](./todo.md) +- [Chapter 10: Exercises and Further Reading](./todo.md) diff --git a/doc/nom-guide/src/chapter_1.md b/doc/nom-guide/src/chapter_1.md index bb9a0004..d4813923 100644 --- a/doc/nom-guide/src/chapter_1.md +++ b/doc/nom-guide/src/chapter_1.md @@ -1,82 +1,75 @@ # Chapter 1: The Nom Way -First of all, we need to understand the way that regexes and nom think about -parsing. - -A regex, in a sense, controls its whole input. Given a single input, -it decides that either some text **did** match the regex, or it **didn't**. - -```text - ┌────────┐ ┌─► Some text that matched the regex - my input───►│my regex├──►either──┤ - └────────┘ └─► None -``` - -As we mentioned above, Nom parsers are designed to be combined. -This makes the assumption that a regex controls its entire input -more difficult to maintain. So, there are three important changes -required to our mental model of a regex. - -1. Rather than just returning the text that matched - the regex, Nom tells you *both* what it parsed, and what is left - to parse. - -2. Additionally, to help with combining parsers, Nom also gives you - error information about your parser. We'll talk about this more later, - for now let's assume it's "basically" the same as the `None` we have above. - - Points 1 and 2 are illustrated in the diagram below: +First of all, we need to understand the way that nom thinks about parsing. +As discussed in the introduction, nom lets us build simple parsers, and +then combine them (using "combinators"). + +Let's discuss what a "parser" actually does. A parser takes an input and returns +a result, where: + - `Ok` indicates the parser successfully found what it was looking for; or + - `Err` indicates the parser could not find what it was looking for. + +Parsers do more than just return a binary "success"/"failure" code. If +the parser was successful, then it will return a tuple. The first field of the +tuple will contain everything the parser did not process. The second will contain +everything the parser processed. The idea is that a parser can happily parse the first +*part* of an input, without being able to parse the whole thing. + +If the parser failed, then there are multiple errors that could be returned. +For simplicity, however, in the next chapters we will leave these unexplored. ```text ┌─► Ok( - │ text that the parser didn't touch, - │ text that matched the regex + │ what the parser didn't touch, + │ what matched the regex │ ) ┌─────────┐ │ my input───►│my parser├──►either──┤ └─────────┘ └─► Err(...) ``` -3. Lastly, Nom parsers are normally anchored to the beginning of their input. - In other words, if you converted a Nom parser to regex, it would generally - begin with `/^/`. This is sensible, because it means that nom parsers must - (conceptually) be sequential -- your parser isn't going to jump - ahead and start parsing the middle of the line. - To represent this model of the world, nom uses the `IResult<(I, O)>` type. The `Ok` variant has a tuple of `(remaining_input: I, output: O)`; -The `Err` variant stores an error. You can import that from: +whereas the `Err` variant stores an error. + +You can import that from: ```rust # extern crate nom; use nom::IResult; ``` +You'll note that `I` and `O` are parameterized -- while most of the examples in this book +will be with `&str` (i.e. parsing a string); they do not have to be strings; nor do they +have to be the same type (consider the simple example where `I = &str`, and `O = u64` -- this +parses a string into an unsigned integer.) + +Let's write our first parser! The simplest parser we can write is one which successfully does nothing. -In other words, the regex `/^/`. -This parser should take in an `&str`. - - Since it is supposed to succeed, we know it will return the Ok Variant. - - Since it does nothing to our input, the remaining input is the same as the input. - - Since it doesn't do anything, it also should just return the unit type. +This parser should take in an `&str`: + - Since it is supposed to succeed, we know it will return the Ok Variant. + - Since it does nothing to our input, the remaining input is the same as the input. + - Since it doesn't parse anything, it also should just return an empty string. -In other words, this code should be equivalent to the regex `/^/`. ```rust # extern crate nom; # use nom::IResult; +# use std::error::Error; -pub fn do_nothing_parser(input: &str) -> IResult<&str, ()> { - Ok((input, ())) +pub fn do_nothing_parser(input: &str) -> IResult<&str, &str> { + Ok((input, "")) } -match do_nothing_parser("my_input") { - Ok((remaining_input, output)) => { - assert_eq!(remaining_input, "my_input"); - assert_eq!(output, ()); - }, - Err(_) => unreachable!() +fn main() -> Result<(), Box> { + let (remaining_input, output) = do_nothing_parser("my_input")?; + assert_eq!(remaining_input, "my_input"); + assert_eq!(output, ""); +# Ok(()) } ``` + +It's that easy! diff --git a/doc/nom-guide/src/chapter_2.md b/doc/nom-guide/src/chapter_2.md index 2d66c64b..043a37bb 100644 --- a/doc/nom-guide/src/chapter_2.md +++ b/doc/nom-guide/src/chapter_2.md @@ -1,37 +1,50 @@ # Chapter 2: Tags and Character Classes -The simplest _useful_ regex you can write is one which +The simplest _useful_ parser you can write is one which has no special characters, it just matches a string. -Imagine, for example, the regex `/abc/`. It simply matches when the string -`"abc"` occurs. - In `nom`, we call a simple collection of bytes a tag. Because these are so common, there already exists a function called `tag()`. This function returns a parser for a given string. -
  **Warning**: `nom` has multiple different definitions of `tag`, make sure you use this one for the
  moment!
-
-```rust +```rust,ignore # extern crate nom; pub use nom::bytes::complete::tag; ``` -For example, the regex `/abc/` (really, the regex `/^abc/`) -could be represented as `tag("abc")`. +For example, code to parse the string `"abc"` could be represented as `tag("abc")`. + +If you have not programmed in a language where functions are values, the type signature of them +tag function might be a surprise: + +```rust,ignore +pub fn tag>( + tag: T +) -> impl Fn(Input) -> IResult where + Input: InputTake + Compare, + T: InputLength + Clone, +``` + +Or, for the case where `Input` and `T` are both `&str`, and simplifying slightly: -Note, that the function `tag` will return -another function, namely, a parser for the tag you requested. +```rust,ignore +fn tag(tag: &str) -> (impl Fn(&str) -> IResult<&str, Error>) +``` + +In other words, this function `tag` *returns a function*. The function it returns is a +parser, taking a `&str` and returning an `IResult`. Functions creating parsers and +returning them is a common pattern in Nom, so it is useful to call out. -Below, we see a function using this: +Below, we have implemented a function that uses `tag`. ```rust # extern crate nom; # pub use nom::bytes::complete::tag; # pub use nom::IResult; +# use std::error::Error; fn parse_input(input: &str) -> IResult<&str, &str> { // note that this is really creating a function, the parser for abc @@ -41,25 +54,18 @@ fn parse_input(input: &str) -> IResult<&str, &str> { tag("abc")(input) } - let ok_input = "abcWorld"; - - match parse_input(ok_input) { - Ok((leftover_input, output)) => { - assert_eq!(leftover_input, "World"); - assert_eq!(output, "abc"); - }, - Err(_) => unreachable!() - } - - let err_input = "defWorld"; - match parse_input(err_input) { - Ok((leftover_input, output)) => unreachable!(), - Err(_) => assert!(true), - } +fn main() -> Result<(), Box> { + let (leftover_input, output) = parse_input("abcWorld")?; + assert_eq!(leftover_input, "World"); + assert_eq!(output, "abc"); + + assert!(parse_input("defWorld").is_err()); +# Ok(()) +} ``` -If you'd like to, you can also check case insensitive `/tag/i` -with the `tag_case_insensitive`. +If you'd like to, you can also check tags without case-sensitivity +with the [`tag_no_case`](https://docs.rs/nom/latest/nom/bytes/complete/fn.tag_no_case.html) function. ## Character Classes @@ -83,23 +89,23 @@ We can use these in ```rust # extern crate nom; # pub use nom::IResult; +# use std::error::Error; pub use nom::character::complete::alpha0; fn parser(input: &str) -> IResult<&str, &str> { alpha0(input) } - let ok_input = "abc123"; - match parser(ok_input) { - Ok((remaining, letters)) => { - assert_eq!(remaining, "123"); - assert_eq!(letters, "abc"); - }, - Err(_) => unreachable!() - } - +fn main() -> Result<(), Box> { + let (remaining, letters) = parser("abc123")?; + assert_eq!(remaining, "123"); + assert_eq!(letters, "abc"); + +# Ok(()) +} ``` One important note is that, due to the type signature of these functions, it is generally best to use them within a function that returns an `IResult`. -*TODO* : Better explaination of why. +If you don't, some of the information around the type of the `tag` function must be +manually specified, which can lead to verbose code or confusing errors. diff --git a/doc/nom-guide/src/chapter_3.md b/doc/nom-guide/src/chapter_3.md index 03cfcfd1..f42a9233 100644 --- a/doc/nom-guide/src/chapter_3.md +++ b/doc/nom-guide/src/chapter_3.md @@ -1,12 +1,16 @@ # Chapter 3: Alternatives and Composition -In the last chapter, we saw how to convert a simple regex into a nom parser. -In this chapter, we explore features two other very important features of Nom, -alternatives, and composition. +In the last chapter, we saw how to create simple parsers using the `tag` function; +and some of Nom's prebuilt parsers. + +In this chapter, we explore two other widely used features of Nom: +alternatives and composition. ## Alternatives -In regex, we can write `/(^abc|^def)/`, which means "match either `/^abc/` or `/^def/`". +Sometimes, we might want to choose between two parsers; and we're happy with +either being used. + Nom gives us a similar ability through the `alt()` combinator. ```rust @@ -17,6 +21,7 @@ use nom::branch::alt; The `alt()` combinator will execute each parser in a tuple until it finds one that does not error. If all error, then by default you are given the error from the last error. + We can see a basic example of `alt()` below. ```rust @@ -24,6 +29,7 @@ We can see a basic example of `alt()` below. use nom::branch::alt; use nom::bytes::complete::tag; use nom::IResult; +# use std::error::Error; fn parse_abc_or_def(input: &str) -> IResult<&str, &str> { alt(( @@ -32,18 +38,14 @@ fn parse_abc_or_def(input: &str) -> IResult<&str, &str> { ))(input) } - match parse_abc_or_def("abcWorld") { - Ok((leftover_input, output)) => { - assert_eq!(leftover_input, "World"); - assert_eq!(output, "abc"); - }, - Err(_) => unreachable!() - } - - match parse_abc_or_def("ghiWorld") { - Ok((leftover_input, output)) => unreachable!(), - Err(_) => assert!(true), - } +fn main() -> Result<(), Box> { + let (leftover_input, output) = parse_abc_or_def("abcWorld")?; + assert_eq!(leftover_input, "World"); + assert_eq!(output, "abc"); + + assert!(parse_abc_or_def("ghiWorld").is_err()); +# Ok(()) +} ``` ## Composition @@ -56,6 +58,7 @@ The simplest way to do this is just to evaluate them in sequence: use nom::branch::alt; use nom::bytes::complete::tag; use nom::IResult; +# use std::error::Error; fn parse_abc(input: &str) -> IResult<&str, &str> { tag("abc")(input) @@ -67,13 +70,14 @@ fn parse_def_or_ghi(input: &str) -> IResult<&str, &str> { ))(input) } +fn main() -> Result<(), Box> { let input = "abcghi"; - if let Ok((remainder, abc)) = parse_abc(input) { - if let Ok((remainder, def_or_ghi)) = parse_def_or_ghi(remainder) { - println!("first parsed: {abc}; then parsed: {def_or_ghi};"); - } - } + let (remainder, abc) = parse_abc(input)?; + let (remainder, def_or_ghi) = parse_def_or_ghi(remainder)?; + println!("first parsed: {abc}; then parsed: {def_or_ghi};"); +# Ok(()) +} ``` Composing tags is such a common requirement that, in fact, Nom has a few built in @@ -81,44 +85,58 @@ combinators to do it. The simplest of these is `tuple()`. The `tuple()` combinat and either returns `Ok` with a tuple of all of their successful parses, or it returns the `Err` of the first failed parser. +```rust +# extern crate nom; +use nom::sequence::tuple; +``` + + ```rust # extern crate nom; use nom::branch::alt; -use nom::bytes::complete::{tag}; +use nom::sequence::tuple; +use nom::bytes::complete::tag_no_case; use nom::character::complete::{digit1}; use nom::IResult; +# use std::error::Error; -fn parse_numbers_or_abc(input: &str) -> IResult<&str, &str> { +fn parse_base(input: &str) -> IResult<&str, &str> { alt(( - tag("abc"), - digit1 + tag_no_case("a"), + tag_no_case("t"), + tag_no_case("c"), + tag_no_case("g") ))(input) } +fn parse_pair(input: &str) -> IResult<&str, (&str, &str)> { + // the many_m_n combinator might also be appropriate here. + tuple(( + parse_base, + parse_base, + ))(input) +} - let input = "abc"; - let parsed_input = parse_numbers_or_abc(input); - match parsed_input { - Ok((_, matched_str)) => assert_eq!(matched_str, "abc"), - Err(_) => unreachable!() - } +fn main() -> Result<(), Box> { + let (remaining, parsed) = parse_pair("aTcG")?; + assert_eq!(parsed, ("a", "T")); + assert_eq!(remaining, "cG"); + assert!(parse_pair("Dct").is_err()); - let input = "def"; - let parsed_input = parse_numbers_or_abc(input); - match parsed_input { - Ok(_) => unreachable!(), - Err(_) => assert!(true) - } +# Ok(()) +} ``` ## Extra Nom Tools -After using `alt()` and `tuple()`, you might also be interested in the `permutation()` parser, which -requires all of the parsers it contains to succeed, but in any order. +After using `alt()` and `tuple()`, you might also be interested in a few other parsers that do similar things: -```rust -# extern crate nom; -use nom::branch::permutation; -``` +| combinator | usage | input | output | comment | +|---|---|---|---|---| +| [delimited](https://docs.rs/nom/latest/nom/sequence/fn.delimited.html) | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `Ok(("cd", "ab"))` || +| [preceded](https://docs.rs/nom/latest/nom/sequence/fn.preceded.html) | `preceded(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "XY"))` || +| [terminated](https://docs.rs/nom/latest/nom/sequence/fn.terminated.html) | `terminated(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "ab"))` || +| [pair](https://docs.rs/nom/latest/nom/sequence/fn.pair.html) | `pair(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", ("ab", "XY")))` || +| [separated_pair](https://docs.rs/nom/latest/nom/sequence/fn.separated_pair.html) | `separated_pair(tag("hello"), char(','), tag("world"))` | `"hello,world!"` | `Ok(("!", ("hello", "world")))` || diff --git a/doc/nom-guide/src/chapter_4.md b/doc/nom-guide/src/chapter_4.md index 989179cc..20bde8bd 100644 --- a/doc/nom-guide/src/chapter_4.md +++ b/doc/nom-guide/src/chapter_4.md @@ -1 +1,161 @@ -# Chapter 4: Custom Outputs from Functions +# Chapter 4: Parsers With Custom Return Types + +So far, we have seen mostly functions that take an `&str`, and return a +`IResult<&str, &str>`. Splitting strings into smaller strings is certainly useful, +but it's not the only thing Nom is capable of! + +A useful operation when parsing is to convert between types; for example +parsing from `&str` to another primitive, like `bool`. + +All we need to do for our parser to return a different type is to change +the second type parameter of `IResult` to the desired return type. +For example, to return a bool, return a `IResult<&str, bool>`. + +Recall that the first type parameter of the `IResult` is the input +type, so even if you're returning something different, if your input +is a `&str`, the first type argument of `IResult` should be also. + +Until you have read the chapter on Errors, we strongly suggest avoiding +the use of parsers built into Rust (like `str.parse`); as they require +special handling to work well with Nom. + +That said, one Nom-native way of doing a type conversion is to use the +[`value`](https://docs.rs/nom/latest/nom/combinator/fn.value.html) combinator +to convert from a successful parse to a particular value. + +The following code converts from a string containing `"true"` or `"false"`, +to the corresponding `bool`. + +```rust +# extern crate nom; +# use std::error::Error; +use nom::IResult; +use nom::bytes::complete::tag; +use nom::combinator::value; +use nom::branch::alt; + +fn parse_bool(input: &str) -> IResult<&str, bool> { + // either, parse `"true"` -> `true`; `"false"` -> `false`, or error. + alt(( + value(true, tag("true")), + value(false, tag("false")), + ))(input) +} + +fn main() -> Result<(), Box> { + // Parses the `"true"` out. + let (remaining, parsed) = parse_bool("true|false")?; + assert_eq!(parsed, true); + assert_eq!(remaining, "|false"); + + // If we forget about the "|", we get an error. + let parsing_error = parse_bool(remaining); + assert!(parsing_error.is_err()); + + // Skipping the first byte gives us `false`! + let (remaining, parsed) = parse_bool(&remaining[1..])?; + assert_eq!(parsed, false); + assert_eq!(remaining, ""); + + + +# Ok(()) +} +``` + +## Nom's in-built parser functions + +Nom has a wide array of parsers built in. Here is a list of +[parsers which recognize specific characters](https://docs.rs/nom/latest/nom/character/complete/index.html). + +Some of them we have seen before in Chapter 2, but now we also can try out the parsers that return different +types, like `i32`. An example of this parser is shown in the next section. + +## Building a More Complex Example + +A more complex example of parsing custom types might be parsing a 2D coordinate. + +Let us try to figure out how to design this. + + - We know that we want to take a string, like `"(3, -2)"`, and convert into + a `Coordinate` struct. + - We can split this into three parts: + +```ignore +(vvvvvvvvvvvvv) # The outer brackets. + vvvv , vvvv # The comma, separating values. + 3 -2 # The actual integers. +``` + + - So, we will need three parsers, to deal with this: + 1. A parser for integers, which will deal with the raw numbers. + 2. A parser for comma seperated pair, which will split it up into integers. + 3. A parser for the outer brackets. + + - We can see below how we achieve this: + +```rust +# extern crate nom; +# use std::error::Error; +use nom::IResult; +use nom::bytes::complete::tag; +use nom::sequence::{separated_pair, delimited}; + +// This is the type we will parse into. +#[derive(Debug,PartialEq)] +pub struct Coordinate { + pub x: i32, + pub y: i32, +} + +// 1. Nom has an in-built i32 parser. +use nom::character::complete::i32; + +// 2. Use the `separated_pair` parser to combine two parsers (in this case, +// both `i32`), ignoring something in-between. +fn parse_integer_pair(input: &str) -> IResult<&str, (i32, i32)> { + separated_pair( + i32, + tag(", "), + i32 + )(input) +} + +// 3. Use the `delimited` parser to apply a parser, ignoring the results +// of two surrounding parsers. +fn parse_coordinate(input: &str) -> IResult<&str, Coordinate> { + let (remaining, (x, y)) = delimited( + tag("("), + parse_integer_pair, + tag(")") + )(input)?; + + // Note: we could construct this by implementing `From` on `Coordinate`, + // We don't, just so it's obvious what's happening. + Ok((remaining, Coordinate {x, y})) + +} + +fn main() -> Result<(), Box> { + let (_, parsed) = parse_coordinate("(3, 5)")?; + assert_eq!(parsed, Coordinate {x: 3, y: 5}); + + let (_, parsed) = parse_coordinate("(2, -4)")?; + assert_eq!(parsed, Coordinate {x: 2, y: -4}); + + let parsing_error = parse_coordinate("(3,)"); + assert!(parsing_error.is_err()); + + let parsing_error = parse_coordinate("(,3)"); + assert!(parsing_error.is_err()); + + let parsing_error = parse_coordinate("Ferris"); + assert!(parsing_error.is_err()); + + +# Ok(()) +} +``` + +As an exercise, you might want to explore how to make this parser deal gracefully with +whitespace in the input. diff --git a/doc/nom-guide/src/chapter_5.md b/doc/nom-guide/src/chapter_5.md new file mode 100644 index 00000000..b23dd589 --- /dev/null +++ b/doc/nom-guide/src/chapter_5.md @@ -0,0 +1,64 @@ +# Chapter 5: Repeating with Predicates + +Just as, when programming, the humble while loop unlocks many useful +features; in Nom, repeating a parser multiple times can be incredibly useful + +There are, however, two ways of including repeating functionality into Nom -- +parsers which are governed by a predicate; and combinators which repeat +a parser. + +## Parsers which use a predicate + +A `predicate` is a function which returns a boolean value (i.e. given some input, +it returns `true` or `false`). These are incredibly common when parsing -- for instance, +a predicate `is_vowel` might decide whether a character is an english vowel (a, e, i, o or u). + +These can be used to make parsers that Nom hasn't built in. For instance, the below +parser will take as many vowels as possible. + +There are a few different categories of predicate parsers that are worth mentioning: + + - For bytes, there are three different categories of parser: `take_till`, `take_until`, and `take_while`. + `take_till` will continue consuming input until its input meets the predicate. + `take_while` will continue consuming input until its input *does not* meet the predicate. + `take_until` looks a lot like a predicate parser, but simply consumes until the first + occurence of the pattern of bytes. + - Some parsers have a "twin" with a `1` at the end of their name -- for example, `take_while` + has `take_while1`. The difference between them is that `take_while` could return an empty + slice if the first byte does not satisfy a predicate. `take_while1` returns an error if + the predicate is not met. + - As a special case, `take_while_m_n` is like `take_while`, but guarantees that it will consume + at least `m` bytes, and no more than `n` bytes. + + +```rust +# extern crate nom; +# use std::error::Error; +use nom::IResult; +use nom::bytes::complete::{tag, take_until, take_while}; +use nom::character::{is_space}; +use nom::sequence::{terminated}; + +fn parse_sentence(input: &str) -> IResult<&str, &str> { + terminated(take_until("."), take_while(|c| c == '.' || c == ' '))(input) +} + +fn main() -> Result<(), Box> { + let (remaining, parsed) = parse_sentence("I am Tom. I write Rust.")?; + assert_eq!(parsed, "I am Tom"); + assert_eq!(remaining, "I write Rust."); + + let parsing_error = parse_sentence("Not a sentence (no period at the end)"); + assert!(parsing_error.is_err()); + + +# Ok(()) +} +``` + For detailed examples, see their documentation, shown below: + +| combinator | usage | input | output | comment | +|---|---|---|---|---| + | [take_while](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_while.html) | `take_while(is_alphabetic)` | `"abc123"` | `Ok(("123", "abc"))` |Returns the longest list of bytes for which the provided function returns true. `take_while1` does the same, but must return at least one character. `take_while_m_n` does the same, but must return between `m` and `n` characters.| +| [take_till](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_till.html) | `take_till(is_alphabetic)` | `"123abc"` | `Ok(("abc", "123"))` |Returns the longest list of bytes or characters until the provided function returns true. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while`: `take_till(f)` is equivalent to `take_while(\|c\| !f(c))`| +| [take_until](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_until.html) | `take_until("world")` | `"Hello world"` | `Ok(("world", "Hello "))` |Returns the longest list of bytes or characters until the provided tag is found. `take_until1` does the same, but must return at least one character| diff --git a/doc/nom-guide/src/chapter_6.md b/doc/nom-guide/src/chapter_6.md new file mode 100644 index 00000000..4f807ab3 --- /dev/null +++ b/doc/nom-guide/src/chapter_6.md @@ -0,0 +1,39 @@ +# Chapter 6: Repeating Parsers + +A single parser which repeats a predicate is useful, but more useful still is a combinator that +repeats a parser. Nom has multiple combinators which operate on this principle; the most obvious of +which is `many0`, which applies a parser as many times as possible; and returns a vector of +the results of those parses. Here is an example: + +```rust +# extern crate nom; +# use std::error::Error; +use nom::IResult; +use nom::multi::many0; +use nom::bytes::complete::tag; + +fn parser(s: &str) -> IResult<&str, Vec<&str>> { + many0(tag("abc"))(s) +} + +fn main() { + assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); + assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); + assert_eq!(parser("123123"), Ok(("123123", vec![]))); + assert_eq!(parser(""), Ok(("", vec![]))); +} +``` + +There are many different parsers to choose from: + +| combinator | usage | input | output | comment | +|---|---|---|---|---| +| [count](https://docs.rs/nom/latest/nom/multi/fn.count.html) | `count(take(2), 3)` | `"abcdefgh"` | `Ok(("gh", vec!["ab", "cd", "ef"]))` |Applies the child parser a specified number of times| +| [many0](https://docs.rs/nom/latest/nom/multi/fn.many0.html) | `many0(tag("ab"))` | `"abababc"` | `Ok(("c", vec!["ab", "ab", "ab"]))` |Applies the parser 0 or more times and returns the list of results in a Vec. `many1` does the same operation but must return at least one element| +| [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| +| [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| +| [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element| +| [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time| +| [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| +| [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times| + diff --git a/doc/nom-guide/src/chapter_7.md b/doc/nom-guide/src/chapter_7.md new file mode 100644 index 00000000..0645753e --- /dev/null +++ b/doc/nom-guide/src/chapter_7.md @@ -0,0 +1,10 @@ +# Chapter 7: Using Errors from Outside Nom + +[Nom has other documentation about errors, so in place of this chapter, read this page.](https://github.com/Geal/nom/blob/main/doc/error_management.md) + +## Particular Notes + + - It's particularly useful to use the `map_res` function. It allows you to + convert an external error to a Nom error. For an example, + see [the Nom example on the front page](https://github.com/Geal/nom#example). + diff --git a/doc/nom-guide/src/introduction.md b/doc/nom-guide/src/introduction.md index 19733e4c..7152ac6d 100644 --- a/doc/nom-guide/src/introduction.md +++ b/doc/nom-guide/src/introduction.md @@ -1,12 +1,11 @@ -# The Nom Guide +# The Nominomicon -Welcome to The Nom Guide (or, the nominomicon); a guide to using the Nom parser for great good. -This guide is written to take you from an understanding of Regular Expressions, to an understanding -of Nom. +Welcome to Nominomicon; a guide to using the Nom parser for great good. +This guide will give you an introduction to the theory and practice of +using Nom. -This guide assumes that you are: +This guide assumes only that you are: - Wanting to learn Nom, - - Already familiar with regular expressions (at least, somewhat), and - Already familiar with Rust. Nom is a parser-combinator library. In other words, it gives you tools to define: @@ -23,9 +22,5 @@ Before we set off, it's important to list some caveats: - Documentation older than 21st August, 2021 - Use of the `named!` macro - Use of `CompleteStr` or `CompleteByteArray`. - - Nom can parse (almost) anything; but this guide will focus entirely on parsing + - Nom can parse (almost) anything; but this guide will focus almost entirely on parsing complete `&str` into things. - -And finally, some nomenclature: - - In this guide, regexes will be denoted inside slashes (for example `/abc/`) - to distinguish them from regular strings. From 94252a492e0075d9cabc5b1cdc78f285d18afc97 Mon Sep 17 00:00:00 2001 From: Tom Kunc Date: Fri, 1 Jul 2022 21:11:57 +1000 Subject: [PATCH 03/11] Update Xiretza's tuple issue --- doc/nom-guide/src/chapter_1.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/nom-guide/src/chapter_1.md b/doc/nom-guide/src/chapter_1.md index d4813923..6b0d1f7f 100644 --- a/doc/nom-guide/src/chapter_1.md +++ b/doc/nom-guide/src/chapter_1.md @@ -29,7 +29,7 @@ For simplicity, however, in the next chapters we will leave these unexplored. ``` -To represent this model of the world, nom uses the `IResult<(I, O)>` type. +To represent this model of the world, nom uses the `IResult` type. The `Ok` variant has a tuple of `(remaining_input: I, output: O)`; whereas the `Err` variant stores an error. From 1b45c9b663e516fa7583b3e76fbe88beba678ed6 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 20 Feb 2023 09:59:56 -0600 Subject: [PATCH 04/11] docs(tutorial): Initial layout --- doc/nom-guide/.gitignore | 1 - doc/nom-guide/book.toml | 6 ------ doc/nom-guide/scripts/build.sh | 11 ----------- doc/nom-guide/src/SUMMARY.md | 15 --------------- doc/nom-guide/src/todo.md | 1 - {doc/nom-guide/src => src/_tutorial}/chapter_1.md | 0 {doc/nom-guide/src => src/_tutorial}/chapter_2.md | 0 {doc/nom-guide/src => src/_tutorial}/chapter_3.md | 0 {doc/nom-guide/src => src/_tutorial}/chapter_4.md | 0 {doc/nom-guide/src => src/_tutorial}/chapter_5.md | 0 {doc/nom-guide/src => src/_tutorial}/chapter_6.md | 0 {doc/nom-guide/src => src/_tutorial}/chapter_7.md | 0 .../src/introduction.md => src/_tutorial/mod.md | 0 13 files changed, 34 deletions(-) delete mode 100644 doc/nom-guide/.gitignore delete mode 100644 doc/nom-guide/book.toml delete mode 100755 doc/nom-guide/scripts/build.sh delete mode 100644 doc/nom-guide/src/SUMMARY.md delete mode 100644 doc/nom-guide/src/todo.md rename {doc/nom-guide/src => src/_tutorial}/chapter_1.md (100%) rename {doc/nom-guide/src => src/_tutorial}/chapter_2.md (100%) rename {doc/nom-guide/src => src/_tutorial}/chapter_3.md (100%) rename {doc/nom-guide/src => src/_tutorial}/chapter_4.md (100%) rename {doc/nom-guide/src => src/_tutorial}/chapter_5.md (100%) rename {doc/nom-guide/src => src/_tutorial}/chapter_6.md (100%) rename {doc/nom-guide/src => src/_tutorial}/chapter_7.md (100%) rename doc/nom-guide/src/introduction.md => src/_tutorial/mod.md (100%) diff --git a/doc/nom-guide/.gitignore b/doc/nom-guide/.gitignore deleted file mode 100644 index 7585238e..00000000 --- a/doc/nom-guide/.gitignore +++ /dev/null @@ -1 +0,0 @@ -book diff --git a/doc/nom-guide/book.toml b/doc/nom-guide/book.toml deleted file mode 100644 index 500119ad..00000000 --- a/doc/nom-guide/book.toml +++ /dev/null @@ -1,6 +0,0 @@ -[book] -authors = ["Tom Kunc"] -language = "en" -multilingual = false -src = "src" -title = "The Nom Guide (Nominomicon)" diff --git a/doc/nom-guide/scripts/build.sh b/doc/nom-guide/scripts/build.sh deleted file mode 100755 index f08c34e2..00000000 --- a/doc/nom-guide/scripts/build.sh +++ /dev/null @@ -1,11 +0,0 @@ -#!/bin/bash -command="build" - -[[ "$1" == "serve" ]] && command="serve" - -BOOK_ROOT_PATH="$( cd "$(dirname "$0")" >/dev/null 2>&1 ; pwd -P )/.." -cd $BOOK_ROOT_PATH - -[[ ! -e $BOOK_ROOT_PATH/../../target ]] && (cd ../../ && cargo build) -mdbook test -L $(cd ../../ && pwd)/target/debug/deps/ -mdbook $command diff --git a/doc/nom-guide/src/SUMMARY.md b/doc/nom-guide/src/SUMMARY.md deleted file mode 100644 index e38d56ae..00000000 --- a/doc/nom-guide/src/SUMMARY.md +++ /dev/null @@ -1,15 +0,0 @@ -## Summary - -[Introduction](./introduction.md) - -- [Chapter 1: The Nom Way](./chapter_1.md) -- [Chapter 2: Tags and Character Classes](./chapter_2.md) -- [Chapter 3: Alternatives and Composition](./chapter_3.md) -- [Chapter 4: Custom Outputs from Functions](./chapter_4.md) -- [Chapter 5: Repeating with Predicates](./chapter_5.md) -- [Chapter 6: Repeating Parsers](./chapter_6.md) -- [Chapter 7: Using Errors from Outside Nom](./chapter_7.md) -- [Chapter 8: Streaming vs. Complete](./todo.md) -- [Chapter 9: Characters vs. Bytes](./todo.md) -- [Chapter 10: Exercises and Further Reading](./todo.md) - diff --git a/doc/nom-guide/src/todo.md b/doc/nom-guide/src/todo.md deleted file mode 100644 index 9bdc15ed..00000000 --- a/doc/nom-guide/src/todo.md +++ /dev/null @@ -1 +0,0 @@ -# To Be Completed diff --git a/doc/nom-guide/src/chapter_1.md b/src/_tutorial/chapter_1.md similarity index 100% rename from doc/nom-guide/src/chapter_1.md rename to src/_tutorial/chapter_1.md diff --git a/doc/nom-guide/src/chapter_2.md b/src/_tutorial/chapter_2.md similarity index 100% rename from doc/nom-guide/src/chapter_2.md rename to src/_tutorial/chapter_2.md diff --git a/doc/nom-guide/src/chapter_3.md b/src/_tutorial/chapter_3.md similarity index 100% rename from doc/nom-guide/src/chapter_3.md rename to src/_tutorial/chapter_3.md diff --git a/doc/nom-guide/src/chapter_4.md b/src/_tutorial/chapter_4.md similarity index 100% rename from doc/nom-guide/src/chapter_4.md rename to src/_tutorial/chapter_4.md diff --git a/doc/nom-guide/src/chapter_5.md b/src/_tutorial/chapter_5.md similarity index 100% rename from doc/nom-guide/src/chapter_5.md rename to src/_tutorial/chapter_5.md diff --git a/doc/nom-guide/src/chapter_6.md b/src/_tutorial/chapter_6.md similarity index 100% rename from doc/nom-guide/src/chapter_6.md rename to src/_tutorial/chapter_6.md diff --git a/doc/nom-guide/src/chapter_7.md b/src/_tutorial/chapter_7.md similarity index 100% rename from doc/nom-guide/src/chapter_7.md rename to src/_tutorial/chapter_7.md diff --git a/doc/nom-guide/src/introduction.md b/src/_tutorial/mod.md similarity index 100% rename from doc/nom-guide/src/introduction.md rename to src/_tutorial/mod.md From 012a377b082a7910737c5fcf8b3de4eadabf6e12 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 20 Feb 2023 10:03:33 -0600 Subject: [PATCH 05/11] docs(tutorial): Prep for ft change --- src/_tutorial/chapter_1.md | 150 ++++++++--------- src/_tutorial/chapter_2.md | 222 ++++++++++++------------- src/_tutorial/chapter_3.md | 284 ++++++++++++++++---------------- src/_tutorial/chapter_4.md | 322 ++++++++++++++++++------------------- src/_tutorial/chapter_5.md | 128 +++++++-------- src/_tutorial/chapter_6.md | 78 ++++----- src/_tutorial/chapter_7.md | 20 +-- src/_tutorial/mod.md | 52 +++--- 8 files changed, 628 insertions(+), 628 deletions(-) diff --git a/src/_tutorial/chapter_1.md b/src/_tutorial/chapter_1.md index 6b0d1f7f..fd339089 100644 --- a/src/_tutorial/chapter_1.md +++ b/src/_tutorial/chapter_1.md @@ -1,75 +1,75 @@ -# Chapter 1: The Nom Way - -First of all, we need to understand the way that nom thinks about parsing. -As discussed in the introduction, nom lets us build simple parsers, and -then combine them (using "combinators"). - -Let's discuss what a "parser" actually does. A parser takes an input and returns -a result, where: - - `Ok` indicates the parser successfully found what it was looking for; or - - `Err` indicates the parser could not find what it was looking for. - -Parsers do more than just return a binary "success"/"failure" code. If -the parser was successful, then it will return a tuple. The first field of the -tuple will contain everything the parser did not process. The second will contain -everything the parser processed. The idea is that a parser can happily parse the first -*part* of an input, without being able to parse the whole thing. - -If the parser failed, then there are multiple errors that could be returned. -For simplicity, however, in the next chapters we will leave these unexplored. - -```text - ┌─► Ok( - │ what the parser didn't touch, - │ what matched the regex - │ ) - ┌─────────┐ │ - my input───►│my parser├──►either──┤ - └─────────┘ └─► Err(...) -``` - - -To represent this model of the world, nom uses the `IResult` type. -The `Ok` variant has a tuple of `(remaining_input: I, output: O)`; -whereas the `Err` variant stores an error. - -You can import that from: - -```rust -# extern crate nom; -use nom::IResult; -``` - -You'll note that `I` and `O` are parameterized -- while most of the examples in this book -will be with `&str` (i.e. parsing a string); they do not have to be strings; nor do they -have to be the same type (consider the simple example where `I = &str`, and `O = u64` -- this -parses a string into an unsigned integer.) - -Let's write our first parser! -The simplest parser we can write is one which successfully does nothing. - -This parser should take in an `&str`: - - - Since it is supposed to succeed, we know it will return the Ok Variant. - - Since it does nothing to our input, the remaining input is the same as the input. - - Since it doesn't parse anything, it also should just return an empty string. - - -```rust -# extern crate nom; -# use nom::IResult; -# use std::error::Error; - -pub fn do_nothing_parser(input: &str) -> IResult<&str, &str> { - Ok((input, "")) -} - -fn main() -> Result<(), Box> { - let (remaining_input, output) = do_nothing_parser("my_input")?; - assert_eq!(remaining_input, "my_input"); - assert_eq!(output, ""); -# Ok(()) -} -``` - -It's that easy! +//! # Chapter 1: The Nom Way +//! +//! First of all, we need to understand the way that nom thinks about parsing. +//! As discussed in the introduction, nom lets us build simple parsers, and +//! then combine them (using "combinators"). +//! +//! Let's discuss what a "parser" actually does. A parser takes an input and returns +//! a result, where: +//! - `Ok` indicates the parser successfully found what it was looking for; or +//! - `Err` indicates the parser could not find what it was looking for. +//! +//! Parsers do more than just return a binary "success"/"failure" code. If +//! the parser was successful, then it will return a tuple. The first field of the +//! tuple will contain everything the parser did not process. The second will contain +//! everything the parser processed. The idea is that a parser can happily parse the first +//! *part* of an input, without being able to parse the whole thing. +//! +//! If the parser failed, then there are multiple errors that could be returned. +//! For simplicity, however, in the next chapters we will leave these unexplored. +//! +//! ```text +//! ┌─► Ok( +//! │ what the parser didn't touch, +//! │ what matched the regex +//! │ ) +//! ┌─────────┐ │ +//! my input───►│my parser├──►either──┤ +//! └─────────┘ └─► Err(...) +//! ``` +//! +//! +//! To represent this model of the world, nom uses the `IResult` type. +//! The `Ok` variant has a tuple of `(remaining_input: I, output: O)`; +//! whereas the `Err` variant stores an error. +//! +//! You can import that from: +//! +//! ```rust +//! # extern crate nom; +//! use nom::IResult; +//! ``` +//! +//! You'll note that `I` and `O` are parameterized -- while most of the examples in this book +//! will be with `&str` (i.e. parsing a string); they do not have to be strings; nor do they +//! have to be the same type (consider the simple example where `I = &str`, and `O = u64` -- this +//! parses a string into an unsigned integer.) +//! +//! Let's write our first parser! +//! The simplest parser we can write is one which successfully does nothing. +//! +//! This parser should take in an `&str`: +//! +//! - Since it is supposed to succeed, we know it will return the Ok Variant. +//! - Since it does nothing to our input, the remaining input is the same as the input. +//! - Since it doesn't parse anything, it also should just return an empty string. +//! +//! +//! ```rust +//! # extern crate nom; +//! # use nom::IResult; +//! # use std::error::Error; +//! +//! pub fn do_nothing_parser(input: &str) -> IResult<&str, &str> { +//! Ok((input, "")) +//! } +//! +//! fn main() -> Result<(), Box> { +//! let (remaining_input, output) = do_nothing_parser("my_input")?; +//! assert_eq!(remaining_input, "my_input"); +//! assert_eq!(output, ""); +//! # Ok(()) +//! } +//! ``` +//! +//! It's that easy! diff --git a/src/_tutorial/chapter_2.md b/src/_tutorial/chapter_2.md index 043a37bb..aa567c9f 100644 --- a/src/_tutorial/chapter_2.md +++ b/src/_tutorial/chapter_2.md @@ -1,111 +1,111 @@ -# Chapter 2: Tags and Character Classes - -The simplest _useful_ parser you can write is one which -has no special characters, it just matches a string. - -In `nom`, we call a simple collection of bytes a tag. Because -these are so common, there already exists a function called `tag()`. -This function returns a parser for a given string. - - **Warning**: `nom` has multiple different definitions of `tag`, make sure you use this one for the - moment! - -```rust,ignore -# extern crate nom; -pub use nom::bytes::complete::tag; -``` - -For example, code to parse the string `"abc"` could be represented as `tag("abc")`. - -If you have not programmed in a language where functions are values, the type signature of them -tag function might be a surprise: - -```rust,ignore -pub fn tag>( - tag: T -) -> impl Fn(Input) -> IResult where - Input: InputTake + Compare, - T: InputLength + Clone, -``` - -Or, for the case where `Input` and `T` are both `&str`, and simplifying slightly: - -```rust,ignore -fn tag(tag: &str) -> (impl Fn(&str) -> IResult<&str, Error>) -``` - -In other words, this function `tag` *returns a function*. The function it returns is a -parser, taking a `&str` and returning an `IResult`. Functions creating parsers and -returning them is a common pattern in Nom, so it is useful to call out. - -Below, we have implemented a function that uses `tag`. - -```rust -# extern crate nom; -# pub use nom::bytes::complete::tag; -# pub use nom::IResult; -# use std::error::Error; - -fn parse_input(input: &str) -> IResult<&str, &str> { - // note that this is really creating a function, the parser for abc - // vvvvv - // which is then called here, returning an IResult<&str, &str> - // vvvvv - tag("abc")(input) -} - -fn main() -> Result<(), Box> { - let (leftover_input, output) = parse_input("abcWorld")?; - assert_eq!(leftover_input, "World"); - assert_eq!(output, "abc"); - - assert!(parse_input("defWorld").is_err()); -# Ok(()) -} -``` - -If you'd like to, you can also check tags without case-sensitivity -with the [`tag_no_case`](https://docs.rs/nom/latest/nom/bytes/complete/fn.tag_no_case.html) function. - -## Character Classes - -Tags are incredibly useful, but they are also incredibly restrictive. -The other end of Nom's functionality is pre-written parsers that allow us to accept any of a group of characters, -rather than just accepting characters in a defined sequence. - -Here is a selection of them: - -- [`alpha0`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha0.html): Recognizes zero or more lowercase and uppercase alphabetic characters: `/[a-zA-Z]/`. [`alpha1`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha1.html) does the same but returns at least one character -- [`alphanumeric0`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric0.html): Recognizes zero or more numerical and alphabetic characters: `/[0-9a-zA-Z]/`. [`alphanumeric1`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric1.html) does the same but returns at least one character -- [`digit0`](https://docs.rs/nom/latest/nom/character/complete/fn.digit0.html): Recognizes zero or more numerical characters: `/[0-9]/`. [`digit1`](https://docs.rs/nom/latest/nom/character/complete/fn.digit1.html) does the same but returns at least one character -- [`multispace0`](https://docs.rs/nom/latest/nom/character/complete/fn.multispace0.html): Recognizes zero or more spaces, tabs, carriage returns and line feeds. [`multispace1`](https://docs.rs/nom/latest/nom/character/complete/fn.multispace1.html) does the same but returns at least one character -- [`space0`](https://docs.rs/nom/latest/nom/character/complete/fn.space0.html): Recognizes zero or more spaces and tabs. [`space1`](https://docs.rs/nom/latest/nom/character/complete/fn.space1.html) does the same but returns at least one character -- [`line_ending`](https://docs.rs/nom/latest/nom/character/complete/fn.line_ending.html): Recognizes an end of line (both `\n` and `\r\n`) -- [`newline`](https://docs.rs/nom/latest/nom/character/complete/fn.newline.html): Matches a newline character `\n` -- [`tab`](https://docs.rs/nom/latest/nom/character/complete/fn.tab.html): Matches a tab character `\t` - - -We can use these in -```rust -# extern crate nom; -# pub use nom::IResult; -# use std::error::Error; -pub use nom::character::complete::alpha0; -fn parser(input: &str) -> IResult<&str, &str> { - alpha0(input) -} - -fn main() -> Result<(), Box> { - let (remaining, letters) = parser("abc123")?; - assert_eq!(remaining, "123"); - assert_eq!(letters, "abc"); - -# Ok(()) -} -``` - -One important note is that, due to the type signature of these functions, -it is generally best to use them within a function that returns an `IResult`. - -If you don't, some of the information around the type of the `tag` function must be -manually specified, which can lead to verbose code or confusing errors. +//! # Chapter 2: Tags and Character Classes +//! +//! The simplest _useful_ parser you can write is one which +//! has no special characters, it just matches a string. +//! +//! In `nom`, we call a simple collection of bytes a tag. Because +//! these are so common, there already exists a function called `tag()`. +//! This function returns a parser for a given string. +//! +//! **Warning**: `nom` has multiple different definitions of `tag`, make sure you use this one for the +//! moment! +//! +//! ```rust,ignore +//! # extern crate nom; +//! pub use nom::bytes::complete::tag; +//! ``` +//! +//! For example, code to parse the string `"abc"` could be represented as `tag("abc")`. +//! +//! If you have not programmed in a language where functions are values, the type signature of them +//! tag function might be a surprise: +//! +//! ```rust,ignore +//! pub fn tag>( +//! tag: T +//! ) -> impl Fn(Input) -> IResult where +//! Input: InputTake + Compare, +//! T: InputLength + Clone, +//! ``` +//! +//! Or, for the case where `Input` and `T` are both `&str`, and simplifying slightly: +//! +//! ```rust,ignore +//! fn tag(tag: &str) -> (impl Fn(&str) -> IResult<&str, Error>) +//! ``` +//! +//! In other words, this function `tag` *returns a function*. The function it returns is a +//! parser, taking a `&str` and returning an `IResult`. Functions creating parsers and +//! returning them is a common pattern in Nom, so it is useful to call out. +//! +//! Below, we have implemented a function that uses `tag`. +//! +//! ```rust +//! # extern crate nom; +//! # pub use nom::bytes::complete::tag; +//! # pub use nom::IResult; +//! # use std::error::Error; +//! +//! fn parse_input(input: &str) -> IResult<&str, &str> { +//! // note that this is really creating a function, the parser for abc +//! // vvvvv +//! // which is then called here, returning an IResult<&str, &str> +//! // vvvvv +//! tag("abc")(input) +//! } +//! +//! fn main() -> Result<(), Box> { +//! let (leftover_input, output) = parse_input("abcWorld")?; +//! assert_eq!(leftover_input, "World"); +//! assert_eq!(output, "abc"); +//! +//! assert!(parse_input("defWorld").is_err()); +//! # Ok(()) +//! } +//! ``` +//! +//! If you'd like to, you can also check tags without case-sensitivity +//! with the [`tag_no_case`](https://docs.rs/nom/latest/nom/bytes/complete/fn.tag_no_case.html) function. +//! +//! ## Character Classes +//! +//! Tags are incredibly useful, but they are also incredibly restrictive. +//! The other end of Nom's functionality is pre-written parsers that allow us to accept any of a group of characters, +//! rather than just accepting characters in a defined sequence. +//! +//! Here is a selection of them: +//! +//! - [`alpha0`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha0.html): Recognizes zero or more lowercase and uppercase alphabetic characters: `/[a-zA-Z]/`. [`alpha1`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha1.html) does the same but returns at least one character +//! - [`alphanumeric0`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric0.html): Recognizes zero or more numerical and alphabetic characters: `/[0-9a-zA-Z]/`. [`alphanumeric1`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric1.html) does the same but returns at least one character +//! - [`digit0`](https://docs.rs/nom/latest/nom/character/complete/fn.digit0.html): Recognizes zero or more numerical characters: `/[0-9]/`. [`digit1`](https://docs.rs/nom/latest/nom/character/complete/fn.digit1.html) does the same but returns at least one character +//! - [`multispace0`](https://docs.rs/nom/latest/nom/character/complete/fn.multispace0.html): Recognizes zero or more spaces, tabs, carriage returns and line feeds. [`multispace1`](https://docs.rs/nom/latest/nom/character/complete/fn.multispace1.html) does the same but returns at least one character +//! - [`space0`](https://docs.rs/nom/latest/nom/character/complete/fn.space0.html): Recognizes zero or more spaces and tabs. [`space1`](https://docs.rs/nom/latest/nom/character/complete/fn.space1.html) does the same but returns at least one character +//! - [`line_ending`](https://docs.rs/nom/latest/nom/character/complete/fn.line_ending.html): Recognizes an end of line (both `\n` and `\r\n`) +//! - [`newline`](https://docs.rs/nom/latest/nom/character/complete/fn.newline.html): Matches a newline character `\n` +//! - [`tab`](https://docs.rs/nom/latest/nom/character/complete/fn.tab.html): Matches a tab character `\t` +//! +//! +//! We can use these in +//! ```rust +//! # extern crate nom; +//! # pub use nom::IResult; +//! # use std::error::Error; +//! pub use nom::character::complete::alpha0; +//! fn parser(input: &str) -> IResult<&str, &str> { +//! alpha0(input) +//! } +//! +//! fn main() -> Result<(), Box> { +//! let (remaining, letters) = parser("abc123")?; +//! assert_eq!(remaining, "123"); +//! assert_eq!(letters, "abc"); +//! +//! # Ok(()) +//! } +//! ``` +//! +//! One important note is that, due to the type signature of these functions, +//! it is generally best to use them within a function that returns an `IResult`. +//! +//! If you don't, some of the information around the type of the `tag` function must be +//! manually specified, which can lead to verbose code or confusing errors. diff --git a/src/_tutorial/chapter_3.md b/src/_tutorial/chapter_3.md index f42a9233..bd8880a6 100644 --- a/src/_tutorial/chapter_3.md +++ b/src/_tutorial/chapter_3.md @@ -1,142 +1,142 @@ -# Chapter 3: Alternatives and Composition - -In the last chapter, we saw how to create simple parsers using the `tag` function; -and some of Nom's prebuilt parsers. - -In this chapter, we explore two other widely used features of Nom: -alternatives and composition. - -## Alternatives - -Sometimes, we might want to choose between two parsers; and we're happy with -either being used. - -Nom gives us a similar ability through the `alt()` combinator. - -```rust -# extern crate nom; -use nom::branch::alt; -``` - -The `alt()` combinator will execute each parser in a tuple until it finds one -that does not error. If all error, then by default you are given the error from -the last error. - -We can see a basic example of `alt()` below. - -```rust -# extern crate nom; -use nom::branch::alt; -use nom::bytes::complete::tag; -use nom::IResult; -# use std::error::Error; - -fn parse_abc_or_def(input: &str) -> IResult<&str, &str> { - alt(( - tag("abc"), - tag("def") - ))(input) -} - -fn main() -> Result<(), Box> { - let (leftover_input, output) = parse_abc_or_def("abcWorld")?; - assert_eq!(leftover_input, "World"); - assert_eq!(output, "abc"); - - assert!(parse_abc_or_def("ghiWorld").is_err()); -# Ok(()) -} -``` - -## Composition - -Now that we can create more interesting regexes, we can compose them together. -The simplest way to do this is just to evaluate them in sequence: - -```rust -# extern crate nom; -use nom::branch::alt; -use nom::bytes::complete::tag; -use nom::IResult; -# use std::error::Error; - -fn parse_abc(input: &str) -> IResult<&str, &str> { - tag("abc")(input) -} -fn parse_def_or_ghi(input: &str) -> IResult<&str, &str> { - alt(( - tag("def"), - tag("ghi") - ))(input) -} - -fn main() -> Result<(), Box> { - let input = "abcghi"; - let (remainder, abc) = parse_abc(input)?; - let (remainder, def_or_ghi) = parse_def_or_ghi(remainder)?; - println!("first parsed: {abc}; then parsed: {def_or_ghi};"); - -# Ok(()) -} -``` - -Composing tags is such a common requirement that, in fact, Nom has a few built in -combinators to do it. The simplest of these is `tuple()`. The `tuple()` combinator takes a tuple of parsers, -and either returns `Ok` with a tuple of all of their successful parses, or it -returns the `Err` of the first failed parser. - -```rust -# extern crate nom; -use nom::sequence::tuple; -``` - - -```rust -# extern crate nom; -use nom::branch::alt; -use nom::sequence::tuple; -use nom::bytes::complete::tag_no_case; -use nom::character::complete::{digit1}; -use nom::IResult; -# use std::error::Error; - -fn parse_base(input: &str) -> IResult<&str, &str> { - alt(( - tag_no_case("a"), - tag_no_case("t"), - tag_no_case("c"), - tag_no_case("g") - ))(input) -} - -fn parse_pair(input: &str) -> IResult<&str, (&str, &str)> { - // the many_m_n combinator might also be appropriate here. - tuple(( - parse_base, - parse_base, - ))(input) -} - -fn main() -> Result<(), Box> { - let (remaining, parsed) = parse_pair("aTcG")?; - assert_eq!(parsed, ("a", "T")); - assert_eq!(remaining, "cG"); - - assert!(parse_pair("Dct").is_err()); - -# Ok(()) -} -``` - - -## Extra Nom Tools - -After using `alt()` and `tuple()`, you might also be interested in a few other parsers that do similar things: - -| combinator | usage | input | output | comment | -|---|---|---|---|---| -| [delimited](https://docs.rs/nom/latest/nom/sequence/fn.delimited.html) | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `Ok(("cd", "ab"))` || -| [preceded](https://docs.rs/nom/latest/nom/sequence/fn.preceded.html) | `preceded(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "XY"))` || -| [terminated](https://docs.rs/nom/latest/nom/sequence/fn.terminated.html) | `terminated(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "ab"))` || -| [pair](https://docs.rs/nom/latest/nom/sequence/fn.pair.html) | `pair(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", ("ab", "XY")))` || -| [separated_pair](https://docs.rs/nom/latest/nom/sequence/fn.separated_pair.html) | `separated_pair(tag("hello"), char(','), tag("world"))` | `"hello,world!"` | `Ok(("!", ("hello", "world")))` || +//! # Chapter 3: Alternatives and Composition +//! +//! In the last chapter, we saw how to create simple parsers using the `tag` function; +//! and some of Nom's prebuilt parsers. +//! +//! In this chapter, we explore two other widely used features of Nom: +//! alternatives and composition. +//! +//! ## Alternatives +//! +//! Sometimes, we might want to choose between two parsers; and we're happy with +//! either being used. +//! +//! Nom gives us a similar ability through the `alt()` combinator. +//! +//! ```rust +//! # extern crate nom; +//! use nom::branch::alt; +//! ``` +//! +//! The `alt()` combinator will execute each parser in a tuple until it finds one +//! that does not error. If all error, then by default you are given the error from +//! the last error. +//! +//! We can see a basic example of `alt()` below. +//! +//! ```rust +//! # extern crate nom; +//! use nom::branch::alt; +//! use nom::bytes::complete::tag; +//! use nom::IResult; +//! # use std::error::Error; +//! +//! fn parse_abc_or_def(input: &str) -> IResult<&str, &str> { +//! alt(( +//! tag("abc"), +//! tag("def") +//! ))(input) +//! } +//! +//! fn main() -> Result<(), Box> { +//! let (leftover_input, output) = parse_abc_or_def("abcWorld")?; +//! assert_eq!(leftover_input, "World"); +//! assert_eq!(output, "abc"); +//! +//! assert!(parse_abc_or_def("ghiWorld").is_err()); +//! # Ok(()) +//! } +//! ``` +//! +//! ## Composition +//! +//! Now that we can create more interesting regexes, we can compose them together. +//! The simplest way to do this is just to evaluate them in sequence: +//! +//! ```rust +//! # extern crate nom; +//! use nom::branch::alt; +//! use nom::bytes::complete::tag; +//! use nom::IResult; +//! # use std::error::Error; +//! +//! fn parse_abc(input: &str) -> IResult<&str, &str> { +//! tag("abc")(input) +//! } +//! fn parse_def_or_ghi(input: &str) -> IResult<&str, &str> { +//! alt(( +//! tag("def"), +//! tag("ghi") +//! ))(input) +//! } +//! +//! fn main() -> Result<(), Box> { +//! let input = "abcghi"; +//! let (remainder, abc) = parse_abc(input)?; +//! let (remainder, def_or_ghi) = parse_def_or_ghi(remainder)?; +//! println!("first parsed: {abc}; then parsed: {def_or_ghi};"); +//! +//! # Ok(()) +//! } +//! ``` +//! +//! Composing tags is such a common requirement that, in fact, Nom has a few built in +//! combinators to do it. The simplest of these is `tuple()`. The `tuple()` combinator takes a tuple of parsers, +//! and either returns `Ok` with a tuple of all of their successful parses, or it +//! returns the `Err` of the first failed parser. +//! +//! ```rust +//! # extern crate nom; +//! use nom::sequence::tuple; +//! ``` +//! +//! +//! ```rust +//! # extern crate nom; +//! use nom::branch::alt; +//! use nom::sequence::tuple; +//! use nom::bytes::complete::tag_no_case; +//! use nom::character::complete::{digit1}; +//! use nom::IResult; +//! # use std::error::Error; +//! +//! fn parse_base(input: &str) -> IResult<&str, &str> { +//! alt(( +//! tag_no_case("a"), +//! tag_no_case("t"), +//! tag_no_case("c"), +//! tag_no_case("g") +//! ))(input) +//! } +//! +//! fn parse_pair(input: &str) -> IResult<&str, (&str, &str)> { +//! // the many_m_n combinator might also be appropriate here. +//! tuple(( +//! parse_base, +//! parse_base, +//! ))(input) +//! } +//! +//! fn main() -> Result<(), Box> { +//! let (remaining, parsed) = parse_pair("aTcG")?; +//! assert_eq!(parsed, ("a", "T")); +//! assert_eq!(remaining, "cG"); +//! +//! assert!(parse_pair("Dct").is_err()); +//! +//! # Ok(()) +//! } +//! ``` +//! +//! +//! ## Extra Nom Tools +//! +//! After using `alt()` and `tuple()`, you might also be interested in a few other parsers that do similar things: +//! +//! | combinator | usage | input | output | comment | +//! |---|---|---|---|---| +//! | [delimited](https://docs.rs/nom/latest/nom/sequence/fn.delimited.html) | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `Ok(("cd", "ab"))` || +//! | [preceded](https://docs.rs/nom/latest/nom/sequence/fn.preceded.html) | `preceded(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "XY"))` || +//! | [terminated](https://docs.rs/nom/latest/nom/sequence/fn.terminated.html) | `terminated(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "ab"))` || +//! | [pair](https://docs.rs/nom/latest/nom/sequence/fn.pair.html) | `pair(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", ("ab", "XY")))` || +//! | [separated_pair](https://docs.rs/nom/latest/nom/sequence/fn.separated_pair.html) | `separated_pair(tag("hello"), char(','), tag("world"))` | `"hello,world!"` | `Ok(("!", ("hello", "world")))` || diff --git a/src/_tutorial/chapter_4.md b/src/_tutorial/chapter_4.md index 20bde8bd..d223dd2d 100644 --- a/src/_tutorial/chapter_4.md +++ b/src/_tutorial/chapter_4.md @@ -1,161 +1,161 @@ -# Chapter 4: Parsers With Custom Return Types - -So far, we have seen mostly functions that take an `&str`, and return a -`IResult<&str, &str>`. Splitting strings into smaller strings is certainly useful, -but it's not the only thing Nom is capable of! - -A useful operation when parsing is to convert between types; for example -parsing from `&str` to another primitive, like `bool`. - -All we need to do for our parser to return a different type is to change -the second type parameter of `IResult` to the desired return type. -For example, to return a bool, return a `IResult<&str, bool>`. - -Recall that the first type parameter of the `IResult` is the input -type, so even if you're returning something different, if your input -is a `&str`, the first type argument of `IResult` should be also. - -Until you have read the chapter on Errors, we strongly suggest avoiding -the use of parsers built into Rust (like `str.parse`); as they require -special handling to work well with Nom. - -That said, one Nom-native way of doing a type conversion is to use the -[`value`](https://docs.rs/nom/latest/nom/combinator/fn.value.html) combinator -to convert from a successful parse to a particular value. - -The following code converts from a string containing `"true"` or `"false"`, -to the corresponding `bool`. - -```rust -# extern crate nom; -# use std::error::Error; -use nom::IResult; -use nom::bytes::complete::tag; -use nom::combinator::value; -use nom::branch::alt; - -fn parse_bool(input: &str) -> IResult<&str, bool> { - // either, parse `"true"` -> `true`; `"false"` -> `false`, or error. - alt(( - value(true, tag("true")), - value(false, tag("false")), - ))(input) -} - -fn main() -> Result<(), Box> { - // Parses the `"true"` out. - let (remaining, parsed) = parse_bool("true|false")?; - assert_eq!(parsed, true); - assert_eq!(remaining, "|false"); - - // If we forget about the "|", we get an error. - let parsing_error = parse_bool(remaining); - assert!(parsing_error.is_err()); - - // Skipping the first byte gives us `false`! - let (remaining, parsed) = parse_bool(&remaining[1..])?; - assert_eq!(parsed, false); - assert_eq!(remaining, ""); - - - -# Ok(()) -} -``` - -## Nom's in-built parser functions - -Nom has a wide array of parsers built in. Here is a list of -[parsers which recognize specific characters](https://docs.rs/nom/latest/nom/character/complete/index.html). - -Some of them we have seen before in Chapter 2, but now we also can try out the parsers that return different -types, like `i32`. An example of this parser is shown in the next section. - -## Building a More Complex Example - -A more complex example of parsing custom types might be parsing a 2D coordinate. - -Let us try to figure out how to design this. - - - We know that we want to take a string, like `"(3, -2)"`, and convert into - a `Coordinate` struct. - - We can split this into three parts: - -```ignore -(vvvvvvvvvvvvv) # The outer brackets. - vvvv , vvvv # The comma, separating values. - 3 -2 # The actual integers. -``` - - - So, we will need three parsers, to deal with this: - 1. A parser for integers, which will deal with the raw numbers. - 2. A parser for comma seperated pair, which will split it up into integers. - 3. A parser for the outer brackets. - - - We can see below how we achieve this: - -```rust -# extern crate nom; -# use std::error::Error; -use nom::IResult; -use nom::bytes::complete::tag; -use nom::sequence::{separated_pair, delimited}; - -// This is the type we will parse into. -#[derive(Debug,PartialEq)] -pub struct Coordinate { - pub x: i32, - pub y: i32, -} - -// 1. Nom has an in-built i32 parser. -use nom::character::complete::i32; - -// 2. Use the `separated_pair` parser to combine two parsers (in this case, -// both `i32`), ignoring something in-between. -fn parse_integer_pair(input: &str) -> IResult<&str, (i32, i32)> { - separated_pair( - i32, - tag(", "), - i32 - )(input) -} - -// 3. Use the `delimited` parser to apply a parser, ignoring the results -// of two surrounding parsers. -fn parse_coordinate(input: &str) -> IResult<&str, Coordinate> { - let (remaining, (x, y)) = delimited( - tag("("), - parse_integer_pair, - tag(")") - )(input)?; - - // Note: we could construct this by implementing `From` on `Coordinate`, - // We don't, just so it's obvious what's happening. - Ok((remaining, Coordinate {x, y})) - -} - -fn main() -> Result<(), Box> { - let (_, parsed) = parse_coordinate("(3, 5)")?; - assert_eq!(parsed, Coordinate {x: 3, y: 5}); - - let (_, parsed) = parse_coordinate("(2, -4)")?; - assert_eq!(parsed, Coordinate {x: 2, y: -4}); - - let parsing_error = parse_coordinate("(3,)"); - assert!(parsing_error.is_err()); - - let parsing_error = parse_coordinate("(,3)"); - assert!(parsing_error.is_err()); - - let parsing_error = parse_coordinate("Ferris"); - assert!(parsing_error.is_err()); - - -# Ok(()) -} -``` - -As an exercise, you might want to explore how to make this parser deal gracefully with -whitespace in the input. +//! # Chapter 4: Parsers With Custom Return Types +//! +//! So far, we have seen mostly functions that take an `&str`, and return a +//! `IResult<&str, &str>`. Splitting strings into smaller strings is certainly useful, +//! but it's not the only thing Nom is capable of! +//! +//! A useful operation when parsing is to convert between types; for example +//! parsing from `&str` to another primitive, like `bool`. +//! +//! All we need to do for our parser to return a different type is to change +//! the second type parameter of `IResult` to the desired return type. +//! For example, to return a bool, return a `IResult<&str, bool>`. +//! +//! Recall that the first type parameter of the `IResult` is the input +//! type, so even if you're returning something different, if your input +//! is a `&str`, the first type argument of `IResult` should be also. +//! +//! Until you have read the chapter on Errors, we strongly suggest avoiding +//! the use of parsers built into Rust (like `str.parse`); as they require +//! special handling to work well with Nom. +//! +//! That said, one Nom-native way of doing a type conversion is to use the +//! [`value`](https://docs.rs/nom/latest/nom/combinator/fn.value.html) combinator +//! to convert from a successful parse to a particular value. +//! +//! The following code converts from a string containing `"true"` or `"false"`, +//! to the corresponding `bool`. +//! +//! ```rust +//! # extern crate nom; +//! # use std::error::Error; +//! use nom::IResult; +//! use nom::bytes::complete::tag; +//! use nom::combinator::value; +//! use nom::branch::alt; +//! +//! fn parse_bool(input: &str) -> IResult<&str, bool> { +//! // either, parse `"true"` -> `true`; `"false"` -> `false`, or error. +//! alt(( +//! value(true, tag("true")), +//! value(false, tag("false")), +//! ))(input) +//! } +//! +//! fn main() -> Result<(), Box> { +//! // Parses the `"true"` out. +//! let (remaining, parsed) = parse_bool("true|false")?; +//! assert_eq!(parsed, true); +//! assert_eq!(remaining, "|false"); +//! +//! // If we forget about the "|", we get an error. +//! let parsing_error = parse_bool(remaining); +//! assert!(parsing_error.is_err()); +//! +//! // Skipping the first byte gives us `false`! +//! let (remaining, parsed) = parse_bool(&remaining[1..])?; +//! assert_eq!(parsed, false); +//! assert_eq!(remaining, ""); +//! +//! +//! +//! # Ok(()) +//! } +//! ``` +//! +//! ## Nom's in-built parser functions +//! +//! Nom has a wide array of parsers built in. Here is a list of +//! [parsers which recognize specific characters](https://docs.rs/nom/latest/nom/character/complete/index.html). +//! +//! Some of them we have seen before in Chapter 2, but now we also can try out the parsers that return different +//! types, like `i32`. An example of this parser is shown in the next section. +//! +//! ## Building a More Complex Example +//! +//! A more complex example of parsing custom types might be parsing a 2D coordinate. +//! +//! Let us try to figure out how to design this. +//! +//! - We know that we want to take a string, like `"(3, -2)"`, and convert into +//! a `Coordinate` struct. +//! - We can split this into three parts: +//! +//! ```ignore +//! (vvvvvvvvvvvvv) # The outer brackets. +//! vvvv , vvvv # The comma, separating values. +//! 3 -2 # The actual integers. +//! ``` +//! +//! - So, we will need three parsers, to deal with this: +//! 1. A parser for integers, which will deal with the raw numbers. +//! 2. A parser for comma seperated pair, which will split it up into integers. +//! 3. A parser for the outer brackets. +//! +//! - We can see below how we achieve this: +//! +//! ```rust +//! # extern crate nom; +//! # use std::error::Error; +//! use nom::IResult; +//! use nom::bytes::complete::tag; +//! use nom::sequence::{separated_pair, delimited}; +//! +//! // This is the type we will parse into. +//! #[derive(Debug,PartialEq)] +//! pub struct Coordinate { +//! pub x: i32, +//! pub y: i32, +//! } +//! +//! // 1. Nom has an in-built i32 parser. +//! use nom::character::complete::i32; +//! +//! // 2. Use the `separated_pair` parser to combine two parsers (in this case, +//! // both `i32`), ignoring something in-between. +//! fn parse_integer_pair(input: &str) -> IResult<&str, (i32, i32)> { +//! separated_pair( +//! i32, +//! tag(", "), +//! i32 +//! )(input) +//! } +//! +//! // 3. Use the `delimited` parser to apply a parser, ignoring the results +//! // of two surrounding parsers. +//! fn parse_coordinate(input: &str) -> IResult<&str, Coordinate> { +//! let (remaining, (x, y)) = delimited( +//! tag("("), +//! parse_integer_pair, +//! tag(")") +//! )(input)?; +//! +//! // Note: we could construct this by implementing `From` on `Coordinate`, +//! // We don't, just so it's obvious what's happening. +//! Ok((remaining, Coordinate {x, y})) +//! +//! } +//! +//! fn main() -> Result<(), Box> { +//! let (_, parsed) = parse_coordinate("(3, 5)")?; +//! assert_eq!(parsed, Coordinate {x: 3, y: 5}); +//! +//! let (_, parsed) = parse_coordinate("(2, -4)")?; +//! assert_eq!(parsed, Coordinate {x: 2, y: -4}); +//! +//! let parsing_error = parse_coordinate("(3,)"); +//! assert!(parsing_error.is_err()); +//! +//! let parsing_error = parse_coordinate("(,3)"); +//! assert!(parsing_error.is_err()); +//! +//! let parsing_error = parse_coordinate("Ferris"); +//! assert!(parsing_error.is_err()); +//! +//! +//! # Ok(()) +//! } +//! ``` +//! +//! As an exercise, you might want to explore how to make this parser deal gracefully with +//! whitespace in the input. diff --git a/src/_tutorial/chapter_5.md b/src/_tutorial/chapter_5.md index b23dd589..ae9bf7b8 100644 --- a/src/_tutorial/chapter_5.md +++ b/src/_tutorial/chapter_5.md @@ -1,64 +1,64 @@ -# Chapter 5: Repeating with Predicates - -Just as, when programming, the humble while loop unlocks many useful -features; in Nom, repeating a parser multiple times can be incredibly useful - -There are, however, two ways of including repeating functionality into Nom -- -parsers which are governed by a predicate; and combinators which repeat -a parser. - -## Parsers which use a predicate - -A `predicate` is a function which returns a boolean value (i.e. given some input, -it returns `true` or `false`). These are incredibly common when parsing -- for instance, -a predicate `is_vowel` might decide whether a character is an english vowel (a, e, i, o or u). - -These can be used to make parsers that Nom hasn't built in. For instance, the below -parser will take as many vowels as possible. - -There are a few different categories of predicate parsers that are worth mentioning: - - - For bytes, there are three different categories of parser: `take_till`, `take_until`, and `take_while`. - `take_till` will continue consuming input until its input meets the predicate. - `take_while` will continue consuming input until its input *does not* meet the predicate. - `take_until` looks a lot like a predicate parser, but simply consumes until the first - occurence of the pattern of bytes. - - Some parsers have a "twin" with a `1` at the end of their name -- for example, `take_while` - has `take_while1`. The difference between them is that `take_while` could return an empty - slice if the first byte does not satisfy a predicate. `take_while1` returns an error if - the predicate is not met. - - As a special case, `take_while_m_n` is like `take_while`, but guarantees that it will consume - at least `m` bytes, and no more than `n` bytes. - - -```rust -# extern crate nom; -# use std::error::Error; -use nom::IResult; -use nom::bytes::complete::{tag, take_until, take_while}; -use nom::character::{is_space}; -use nom::sequence::{terminated}; - -fn parse_sentence(input: &str) -> IResult<&str, &str> { - terminated(take_until("."), take_while(|c| c == '.' || c == ' '))(input) -} - -fn main() -> Result<(), Box> { - let (remaining, parsed) = parse_sentence("I am Tom. I write Rust.")?; - assert_eq!(parsed, "I am Tom"); - assert_eq!(remaining, "I write Rust."); - - let parsing_error = parse_sentence("Not a sentence (no period at the end)"); - assert!(parsing_error.is_err()); - - -# Ok(()) -} -``` - For detailed examples, see their documentation, shown below: - -| combinator | usage | input | output | comment | -|---|---|---|---|---| - | [take_while](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_while.html) | `take_while(is_alphabetic)` | `"abc123"` | `Ok(("123", "abc"))` |Returns the longest list of bytes for which the provided function returns true. `take_while1` does the same, but must return at least one character. `take_while_m_n` does the same, but must return between `m` and `n` characters.| -| [take_till](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_till.html) | `take_till(is_alphabetic)` | `"123abc"` | `Ok(("abc", "123"))` |Returns the longest list of bytes or characters until the provided function returns true. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while`: `take_till(f)` is equivalent to `take_while(\|c\| !f(c))`| -| [take_until](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_until.html) | `take_until("world")` | `"Hello world"` | `Ok(("world", "Hello "))` |Returns the longest list of bytes or characters until the provided tag is found. `take_until1` does the same, but must return at least one character| +//! # Chapter 5: Repeating with Predicates +//! +//! Just as, when programming, the humble while loop unlocks many useful +//! features; in Nom, repeating a parser multiple times can be incredibly useful +//! +//! There are, however, two ways of including repeating functionality into Nom -- +//! parsers which are governed by a predicate; and combinators which repeat +//! a parser. +//! +//! ## Parsers which use a predicate +//! +//! A `predicate` is a function which returns a boolean value (i.e. given some input, +//! it returns `true` or `false`). These are incredibly common when parsing -- for instance, +//! a predicate `is_vowel` might decide whether a character is an english vowel (a, e, i, o or u). +//! +//! These can be used to make parsers that Nom hasn't built in. For instance, the below +//! parser will take as many vowels as possible. +//! +//! There are a few different categories of predicate parsers that are worth mentioning: +//! +//! - For bytes, there are three different categories of parser: `take_till`, `take_until`, and `take_while`. +//! `take_till` will continue consuming input until its input meets the predicate. +//! `take_while` will continue consuming input until its input *does not* meet the predicate. +//! `take_until` looks a lot like a predicate parser, but simply consumes until the first +//! occurence of the pattern of bytes. +//! - Some parsers have a "twin" with a `1` at the end of their name -- for example, `take_while` +//! has `take_while1`. The difference between them is that `take_while` could return an empty +//! slice if the first byte does not satisfy a predicate. `take_while1` returns an error if +//! the predicate is not met. +//! - As a special case, `take_while_m_n` is like `take_while`, but guarantees that it will consume +//! at least `m` bytes, and no more than `n` bytes. +//! +//! +//! ```rust +//! # extern crate nom; +//! # use std::error::Error; +//! use nom::IResult; +//! use nom::bytes::complete::{tag, take_until, take_while}; +//! use nom::character::{is_space}; +//! use nom::sequence::{terminated}; +//! +//! fn parse_sentence(input: &str) -> IResult<&str, &str> { +//! terminated(take_until("."), take_while(|c| c == '.' || c == ' '))(input) +//! } +//! +//! fn main() -> Result<(), Box> { +//! let (remaining, parsed) = parse_sentence("I am Tom. I write Rust.")?; +//! assert_eq!(parsed, "I am Tom"); +//! assert_eq!(remaining, "I write Rust."); +//! +//! let parsing_error = parse_sentence("Not a sentence (no period at the end)"); +//! assert!(parsing_error.is_err()); +//! +//! +//! # Ok(()) +//! } +//! ``` +//! For detailed examples, see their documentation, shown below: +//! +//! | combinator | usage | input | output | comment | +//! |---|---|---|---|---| +//! | [take_while](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_while.html) | `take_while(is_alphabetic)` | `"abc123"` | `Ok(("123", "abc"))` |Returns the longest list of bytes for which the provided function returns true. `take_while1` does the same, but must return at least one character. `take_while_m_n` does the same, but must return between `m` and `n` characters.| +//! | [take_till](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_till.html) | `take_till(is_alphabetic)` | `"123abc"` | `Ok(("abc", "123"))` |Returns the longest list of bytes or characters until the provided function returns true. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while`: `take_till(f)` is equivalent to `take_while(\|c\| !f(c))`| +//! | [take_until](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_until.html) | `take_until("world")` | `"Hello world"` | `Ok(("world", "Hello "))` |Returns the longest list of bytes or characters until the provided tag is found. `take_until1` does the same, but must return at least one character| diff --git a/src/_tutorial/chapter_6.md b/src/_tutorial/chapter_6.md index 4f807ab3..a67f5933 100644 --- a/src/_tutorial/chapter_6.md +++ b/src/_tutorial/chapter_6.md @@ -1,39 +1,39 @@ -# Chapter 6: Repeating Parsers - -A single parser which repeats a predicate is useful, but more useful still is a combinator that -repeats a parser. Nom has multiple combinators which operate on this principle; the most obvious of -which is `many0`, which applies a parser as many times as possible; and returns a vector of -the results of those parses. Here is an example: - -```rust -# extern crate nom; -# use std::error::Error; -use nom::IResult; -use nom::multi::many0; -use nom::bytes::complete::tag; - -fn parser(s: &str) -> IResult<&str, Vec<&str>> { - many0(tag("abc"))(s) -} - -fn main() { - assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); - assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); - assert_eq!(parser("123123"), Ok(("123123", vec![]))); - assert_eq!(parser(""), Ok(("", vec![]))); -} -``` - -There are many different parsers to choose from: - -| combinator | usage | input | output | comment | -|---|---|---|---|---| -| [count](https://docs.rs/nom/latest/nom/multi/fn.count.html) | `count(take(2), 3)` | `"abcdefgh"` | `Ok(("gh", vec!["ab", "cd", "ef"]))` |Applies the child parser a specified number of times| -| [many0](https://docs.rs/nom/latest/nom/multi/fn.many0.html) | `many0(tag("ab"))` | `"abababc"` | `Ok(("c", vec!["ab", "ab", "ab"]))` |Applies the parser 0 or more times and returns the list of results in a Vec. `many1` does the same operation but must return at least one element| -| [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| -| [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| -| [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element| -| [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time| -| [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| -| [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times| - +//! # Chapter 6: Repeating Parsers +//! +//! A single parser which repeats a predicate is useful, but more useful still is a combinator that +//! repeats a parser. Nom has multiple combinators which operate on this principle; the most obvious of +//! which is `many0`, which applies a parser as many times as possible; and returns a vector of +//! the results of those parses. Here is an example: +//! +//! ```rust +//! # extern crate nom; +//! # use std::error::Error; +//! use nom::IResult; +//! use nom::multi::many0; +//! use nom::bytes::complete::tag; +//! +//! fn parser(s: &str) -> IResult<&str, Vec<&str>> { +//! many0(tag("abc"))(s) +//! } +//! +//! fn main() { +//! assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); +//! assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); +//! assert_eq!(parser("123123"), Ok(("123123", vec![]))); +//! assert_eq!(parser(""), Ok(("", vec![]))); +//! } +//! ``` +//! +//! There are many different parsers to choose from: +//! +//! | combinator | usage | input | output | comment | +//! |---|---|---|---|---| +//! | [count](https://docs.rs/nom/latest/nom/multi/fn.count.html) | `count(take(2), 3)` | `"abcdefgh"` | `Ok(("gh", vec!["ab", "cd", "ef"]))` |Applies the child parser a specified number of times| +//! | [many0](https://docs.rs/nom/latest/nom/multi/fn.many0.html) | `many0(tag("ab"))` | `"abababc"` | `Ok(("c", vec!["ab", "ab", "ab"]))` |Applies the parser 0 or more times and returns the list of results in a Vec. `many1` does the same operation but must return at least one element| +//! | [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| +//! | [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| +//! | [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element| +//! | [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time| +//! | [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| +//! | [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times| +//! diff --git a/src/_tutorial/chapter_7.md b/src/_tutorial/chapter_7.md index 0645753e..409e61a2 100644 --- a/src/_tutorial/chapter_7.md +++ b/src/_tutorial/chapter_7.md @@ -1,10 +1,10 @@ -# Chapter 7: Using Errors from Outside Nom - -[Nom has other documentation about errors, so in place of this chapter, read this page.](https://github.com/Geal/nom/blob/main/doc/error_management.md) - -## Particular Notes - - - It's particularly useful to use the `map_res` function. It allows you to - convert an external error to a Nom error. For an example, - see [the Nom example on the front page](https://github.com/Geal/nom#example). - +//! # Chapter 7: Using Errors from Outside Nom +//! +//! [Nom has other documentation about errors, so in place of this chapter, read this page.](https://github.com/Geal/nom/blob/main/doc/error_management.md) +//! +//! ## Particular Notes +//! +//! - It's particularly useful to use the `map_res` function. It allows you to +//! convert an external error to a Nom error. For an example, +//! see [the Nom example on the front page](https://github.com/Geal/nom#example). +//! diff --git a/src/_tutorial/mod.md b/src/_tutorial/mod.md index 7152ac6d..2ca4c1a8 100644 --- a/src/_tutorial/mod.md +++ b/src/_tutorial/mod.md @@ -1,26 +1,26 @@ -# The Nominomicon - -Welcome to Nominomicon; a guide to using the Nom parser for great good. -This guide will give you an introduction to the theory and practice of -using Nom. - -This guide assumes only that you are: - - Wanting to learn Nom, - - Already familiar with Rust. - -Nom is a parser-combinator library. In other words, it gives you tools to define: - - "parsers" (a function that takes an input, and gives back an output), and - - "combinators" (functions that take parsers, and _combine_ them together!). - -By combining parsers with combinators, you can build complex parsers up from -simpler ones. These complex parsers are enough to understand HTML, mkv or Python! - -Before we set off, it's important to list some caveats: - - This guide is for Nom7. Nom has undergone significant changes, so if - you are searching for documentation or StackOverflow answers, you may - find older documentation. Some common indicators that it is an old version are: - - Documentation older than 21st August, 2021 - - Use of the `named!` macro - - Use of `CompleteStr` or `CompleteByteArray`. - - Nom can parse (almost) anything; but this guide will focus almost entirely on parsing - complete `&str` into things. +//! # The Nominomicon +//! +//! Welcome to Nominomicon; a guide to using the Nom parser for great good. +//! This guide will give you an introduction to the theory and practice of +//! using Nom. +//! +//! This guide assumes only that you are: +//! - Wanting to learn Nom, +//! - Already familiar with Rust. +//! +//! Nom is a parser-combinator library. In other words, it gives you tools to define: +//! - "parsers" (a function that takes an input, and gives back an output), and +//! - "combinators" (functions that take parsers, and _combine_ them together!). +//! +//! By combining parsers with combinators, you can build complex parsers up from +//! simpler ones. These complex parsers are enough to understand HTML, mkv or Python! +//! +//! Before we set off, it's important to list some caveats: +//! - This guide is for Nom7. Nom has undergone significant changes, so if +//! you are searching for documentation or StackOverflow answers, you may +//! find older documentation. Some common indicators that it is an old version are: +//! - Documentation older than 21st August, 2021 +//! - Use of the `named!` macro +//! - Use of `CompleteStr` or `CompleteByteArray`. +//! - Nom can parse (almost) anything; but this guide will focus almost entirely on parsing +//! complete `&str` into things. From 63e0168fb36b74f32973c00642806db34d4c31ec Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 20 Feb 2023 10:05:51 -0600 Subject: [PATCH 06/11] docs(tutorial): Replace tutorial with nominomicon --- src/_tutorial.rs | 538 ------------------- src/_tutorial/{chapter_1.md => chapter_1.rs} | 0 src/_tutorial/{chapter_2.md => chapter_2.rs} | 0 src/_tutorial/{chapter_3.md => chapter_3.rs} | 0 src/_tutorial/{chapter_4.md => chapter_4.rs} | 0 src/_tutorial/{chapter_5.md => chapter_5.rs} | 0 src/_tutorial/{chapter_6.md => chapter_6.rs} | 0 src/_tutorial/{chapter_7.md => chapter_7.rs} | 0 src/_tutorial/{mod.md => mod.rs} | 0 9 files changed, 538 deletions(-) delete mode 100644 src/_tutorial.rs rename src/_tutorial/{chapter_1.md => chapter_1.rs} (100%) rename src/_tutorial/{chapter_2.md => chapter_2.rs} (100%) rename src/_tutorial/{chapter_3.md => chapter_3.rs} (100%) rename src/_tutorial/{chapter_4.md => chapter_4.rs} (100%) rename src/_tutorial/{chapter_5.md => chapter_5.rs} (100%) rename src/_tutorial/{chapter_6.md => chapter_6.rs} (100%) rename src/_tutorial/{chapter_7.md => chapter_7.rs} (100%) rename src/_tutorial/{mod.md => mod.rs} (100%) diff --git a/src/_tutorial.rs b/src/_tutorial.rs deleted file mode 100644 index 756d4622..00000000 --- a/src/_tutorial.rs +++ /dev/null @@ -1,538 +0,0 @@ -//! ## Why use winnow -//! -//! If you want to write: -//! -//! ### Binary format parsers -//! -//! winnow was designed to properly parse binary formats from the beginning. Compared -//! to the usual handwritten C parsers, winnow parsers are just as fast, free from -//! buffer overflow vulnerabilities, and handle common patterns for you: -//! -//! - [TLV](https://en.wikipedia.org/wiki/Type-length-value) -//! - Bit level parsing -//! - Hexadecimal viewer in the debugging macros for easy data analysis -//! - Streaming parsers for network formats and huge files -//! -//! Example projects: -//! -//! - [FLV parser](https://github.com/rust-av/flavors) -//! - [Matroska parser](https://github.com/rust-av/matroska) -//! - [tar parser](https://github.com/Keruspe/tar-parser.rs) -//! -//! ### Text format parsers -//! -//! While winnow was made for binary format at first, it soon grew to work just as -//! well with text formats. From line based formats like CSV, to more complex, nested -//! formats such as JSON, winnow can manage it, and provides you with useful tools: -//! -//! - Fast case insensitive comparison -//! - Recognizers for escaped strings -//! - Regular expressions can be embedded in winnow parsers to represent complex character patterns succinctly -//! - Special care has been given to managing non ASCII characters properly -//! -//! Example projects: -//! -//! - [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser) -//! - [TOML parser](https://github.com/joelself/tomllib) -//! -//! ### Programming language parsers -//! -//! While programming language parsers are usually written manually for more -//! flexibility and performance, winnow can be (and has been successfully) used -//! as a prototyping parser for a language. -//! -//! winnow will get you started quickly with powerful custom error types, that you -//! can use to -//! pinpoint the exact line and column of the error. No need for separate -//! tokenizing, lexing and parsing phases: winnow can automatically handle whitespace -//! parsing, and construct an AST in place. -//! -//! ### Streaming formats -//! -//! While a lot of formats (and the code handling them) assume that they can fit -//! the complete data in memory, there are formats for which we only get a part -//! of the data at once, like network formats, or huge files. -//! winnow has been designed for a correct behaviour with partial data: If there is -//! not enough data to decide, winnow will tell you it needs more instead of silently -//! returning a wrong result. Whether your data comes entirely or in chunks, the -//! result should be the same. -//! -//! It allows you to build powerful, deterministic state machines for your protocols. -//! -//! Example projects: -//! -//! - [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser) -//! - [Using winnow with generators](https://github.com/Geal/generator_winnow) -//! -//! ## Parser combinators -//! -//! Parser combinators are an approach to parsers that is very different from -//! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and -//! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar -//! in a separate file and generating the corresponding code, you use very -//! small functions with very specific purpose, like "take 5 bytes", or -//! "recognize the word 'HTTP'", and assemble them in meaningful patterns -//! like "recognize 'HTTP', then a space, then a version". -//! The resulting code is small, and looks like the grammar you would have -//! written with other parser approaches. -//! -//! This has a few advantages: -//! -//! - The parsers are small and easy to write -//! - The parsers components are easy to reuse (if they're general enough, please add them to winnow!) -//! - The parsers components are easy to test separately (unit tests and property-based tests) -//! - The parser combination code looks close to the grammar you would have written -//! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest -//! -//! ## Technical features -//! -//! winnow parsers are for: -//! - [x] **byte-oriented**: The basic type is `&[u8]` and parsers will work as much as possible on byte array slices (but are not limited to them) -//! - [x] **bit-oriented**: winnow can address a byte slice as a bit stream -//! - [x] **string-oriented**: The same kind of combinators can apply on UTF-8 strings as well -//! - [x] **zero-copy**: If a parser returns a subset of its input data, it will return a slice of that input, without copying -//! - [x] **streaming**: winnow can work on partial data and detect when it needs more data to produce a correct result -//! - [x] **descriptive errors**: The parsers can aggregate a list of error codes with pointers to the incriminated input slice. Those error lists can be pattern matched to provide useful messages. -//! - [x] **custom error types**: You can provide a specific type to improve errors returned by parsers -//! - [x] **safe parsing**: winnow leverages Rust's safe memory handling and powerful types, and parsers are routinely fuzzed and tested with real world data. So far, the only flaws found by fuzzing were in code written outside of winnow -//! - [x] **speed**: Benchmarks have shown that winnow parsers often outperform many parser combinators library like Parsec and attoparsec, some regular expression engines and even handwritten C parsers -//! -//! Some benchmarks are available on [Github](https://github.com/rosetta-rs/parser-rosetta-rs). -//! -//! ## Parser combinators -//! -//! Parser combinators are an approach to parsers that is very different from -//! software like [lex](https://en.wikipedia.org/wiki/Lex_(software)) and -//! [yacc](https://en.wikipedia.org/wiki/Yacc). Instead of writing the grammar -//! in a separate syntax and generating the corresponding code, you use very small -//! functions with very specific purposes, like "take 5 bytes", or "recognize the -//! word 'HTTP'", and assemble them in meaningful patterns like "recognize -//! 'HTTP', then a space, then a version". -//! The resulting code is small, and looks like the grammar you would have -//! written with other parser approaches. -//! -//! This gives us a few advantages: -//! -//! - The parsers are small and easy to write -//! - The parsers components are easy to reuse (if they're general enough, please add them to winnow!) -//! - The parsers components are easy to test separately (unit tests and property-based tests) -//! - The parser combination code looks close to the grammar you would have written -//! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest -//! -//! Here is an example of one such parser, to recognize text between parentheses: -//! -//! ```rust -//! use winnow::{ -//! IResult, -//! sequence::delimited, -//! bytes::take_till1 -//! }; -//! -//! fn parens(input: &str) -> IResult<&str, &str> { -//! delimited('(', take_till1(")"), ')')(input) -//! } -//! ``` -//! -//! It defines a function named `parens` which will recognize a sequence of the -//! character `(`, the longest byte array not containing `)`, then the character -//! `)`, and will return the byte array in the middle. -//! -//! Here is another parser, written without using winnow's combinators this time: -//! -//! ```rust -//! use winnow::{IResult, error::ErrMode, error::Needed}; -//! -//! # fn main() { -//! fn take4(i: &[u8]) -> IResult<&[u8], &[u8]>{ -//! if i.len() < 4 { -//! Err(ErrMode::Incomplete(Needed::new(4))) -//! } else { -//! Ok((&i[4..], &i[0..4])) -//! } -//! } -//! # } -//! ``` -//! -//! This function takes a byte array as input, and tries to consume 4 bytes. -//! Writing all the parsers manually, like this, is dangerous, despite Rust's -//! safety features. There are still a lot of mistakes one can make. That's why -//! winnow provides a list of functions to help in developing parsers. -//! -//! With functions, you would write it like this: -//! -//! ```rust -//! use winnow::{IResult, bytes::take, stream::Partial}; -//! fn take4(input: Partial<&str>) -> IResult, &str> { -//! take(4u8)(input) -//! } -//! ``` -//! -//! A parser in winnow is a function which, for an input type `I`, an output type `O` -//! and an optional error type `E`, will have the following signature: -//! -//! ```rust,compile_fail -//! fn parser(input: I) -> IResult; -//! ``` -//! -//! Or like this, if you don't want to specify a custom error type (it will be `(I, ErrorKind)` by default): -//! -//! ```rust,compile_fail -//! fn parser(input: I) -> IResult; -//! ``` -//! -//! `IResult` is an alias for the `Result` type: -//! -//! ```rust -//! use winnow::{error::Needed, error::Error}; -//! -//! type IResult> = Result<(I, O), Err>; -//! -//! enum Err { -//! Incomplete(Needed), -//! Error(E), -//! Failure(E), -//! } -//! ``` -//! -//! It can have the following values: -//! -//! - A correct result `Ok((I,O))` with the first element being the remaining of the input (not parsed yet), and the second the output value; -//! - An error `Err(ErrMode::Backtrack(c))` with `c` an error that can be built from the input position and a parser specific error -//! - An error `Err(ErrMode::Incomplete(Needed))` indicating that more input is necessary. `Needed` can indicate how much data is needed -//! - An error `Err(ErrMode::Cut(c))`. It works like the `Backtrack` case, except it indicates an unrecoverable error: We cannot backtrack and test another parser -//! -//! Please refer to the ["choose a combinator" guide][crate::combinator] for an exhaustive list of parsers. -//! -//! ## Making new parsers with function combinators -//! -//! winnow is based on functions that generate parsers, with a signature like -//! this: `(arguments) -> impl Fn(Stream) -> IResult`. -//! The arguments of a combinator can be direct values (like `take` which uses -//! a number of bytes or character as argument) or even other parsers (like -//! `delimited` which takes as argument 3 parsers, and returns the result of -//! the second one if all are successful). -//! -//! Here are some examples: -//! -//! ```rust -//! use winnow::IResult; -//! use winnow::bytes::{tag, take}; -//! fn abcd_parser(i: &str) -> IResult<&str, &str> { -//! tag("abcd")(i) // will consume bytes if the input begins with "abcd" -//! } -//! -//! fn take_10(i: &[u8]) -> IResult<&[u8], &[u8]> { -//! take(10u8)(i) // will consume and return 10 bytes of input -//! } -//! ``` -//! -//! ## Combining parsers -//! -//! There are higher level patterns, like the **`alt`** combinator, which -//! provides a choice between multiple parsers. If one branch fails, it tries -//! the next, and returns the result of the first parser that succeeds: -//! -//! ```rust -//! use winnow::IResult; -//! use winnow::branch::alt; -//! use winnow::bytes::tag; -//! -//! let mut alt_tags = alt((tag("abcd"), tag("efgh"))); -//! -//! assert_eq!(alt_tags(&b"abcdxxx"[..]), Ok((&b"xxx"[..], &b"abcd"[..]))); -//! assert_eq!(alt_tags(&b"efghxxx"[..]), Ok((&b"xxx"[..], &b"efgh"[..]))); -//! assert_eq!(alt_tags(&b"ijklxxx"[..]), Err(winnow::error::ErrMode::Backtrack(winnow::error::Error::new(&b"ijklxxx"[..], winnow::error::ErrorKind::Tag)))); -//! ``` -//! -//! The **`opt`** combinator makes a parser optional. If the child parser returns -//! an error, **`opt`** will still succeed and return None: -//! -//! ```rust -//! use winnow::{IResult, combinator::opt, bytes::tag}; -//! fn abcd_opt(i: &[u8]) -> IResult<&[u8], Option<&[u8]>> { -//! opt(tag("abcd"))(i) -//! } -//! -//! assert_eq!(abcd_opt(&b"abcdxxx"[..]), Ok((&b"xxx"[..], Some(&b"abcd"[..])))); -//! assert_eq!(abcd_opt(&b"efghxxx"[..]), Ok((&b"efghxxx"[..], None))); -//! ``` -//! -//! **`many0`** applies a parser 0 or more times, and returns a vector of the aggregated results: -//! -//! ```rust -//! # #[cfg(feature = "alloc")] -//! # fn main() { -//! use winnow::{IResult, multi::many0, bytes::tag}; -//! use std::str; -//! -//! fn multi(i: &str) -> IResult<&str, Vec<&str>> { -//! many0(tag("abcd"))(i) -//! } -//! -//! let a = "abcdef"; -//! let b = "abcdabcdef"; -//! let c = "azerty"; -//! assert_eq!(multi(a), Ok(("ef", vec!["abcd"]))); -//! assert_eq!(multi(b), Ok(("ef", vec!["abcd", "abcd"]))); -//! assert_eq!(multi(c), Ok(("azerty", Vec::new()))); -//! # } -//! # #[cfg(not(feature = "alloc"))] -//! # fn main() {} -//! ``` -//! -//! Here are some basic combinators available: -//! -//! - **`opt`**: Will make the parser optional (if it returns the `O` type, the new parser returns `Option`) -//! - **`many0`**: Will apply the parser 0 or more times (if it returns the `O` type, the new parser returns `Vec`) -//! - **`many1`**: Will apply the parser 1 or more times -//! -//! There are more complex (and more useful) parsers like tuples, which is -//! used to apply a series of parsers then assemble their results. -//! -//! Example with tuples: -//! -//! ```rust -//! # fn main() { -//! use winnow::prelude::*; -//! use winnow::{ -//! error::ErrorKind, error::Error, error::Needed, -//! number::be_u16, -//! bytes::{tag, take}, -//! stream::Partial, -//! }; -//! -//! let mut tpl = (be_u16, take(3u8), tag("fg")); -//! -//! assert_eq!( -//! tpl.parse_next(Partial::new(&b"abcdefgh"[..])), -//! Ok(( -//! Partial::new(&b"h"[..]), -//! (0x6162u16, &b"cde"[..], &b"fg"[..]) -//! )) -//! ); -//! assert_eq!(tpl.parse_next(Partial::new(&b"abcde"[..])), Err(winnow::error::ErrMode::Incomplete(Needed::new(2)))); -//! let input = &b"abcdejk"[..]; -//! assert_eq!(tpl.parse_next(Partial::new(input)), Err(winnow::error::ErrMode::Backtrack(Error::new(Partial::new(&input[5..]), ErrorKind::Tag)))); -//! # } -//! ``` -//! -//! But you can also use a sequence of combinators written in imperative style, -//! thanks to the `?` operator: -//! -//! ```rust -//! # fn main() { -//! use winnow::{IResult, bytes::tag}; -//! -//! #[derive(Debug, PartialEq)] -//! struct A { -//! a: u8, -//! b: u8 -//! } -//! -//! fn ret_int1(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,1)) } -//! fn ret_int2(i:&[u8]) -> IResult<&[u8], u8> { Ok((i,2)) } -//! -//! fn f(i: &[u8]) -> IResult<&[u8], A> { -//! // if successful, the parser returns `Ok((remaining_input, output_value))` that we can destructure -//! let (i, _) = tag("abcd")(i)?; -//! let (i, a) = ret_int1(i)?; -//! let (i, _) = tag("efgh")(i)?; -//! let (i, b) = ret_int2(i)?; -//! -//! Ok((i, A { a, b })) -//! } -//! -//! let r = f(b"abcdefghX"); -//! assert_eq!(r, Ok((&b"X"[..], A{a: 1, b: 2}))); -//! # } -//! ``` -//! # Making a new parser from scratch -//! -//! Writing a parser is a very fun, interactive process, but sometimes a daunting -//! task. How do you test it? How to see ambiguities in specifications? -//! -//! winnow is designed to abstract data manipulation (counting array offsets, -//! converting to structures, etc) while providing a safe, composable API. It also -//! takes care of making the code easy to test and read, but it can be confusing at -//! first, if you are not familiar with parser combinators, or if you are not used -//! to Rust generic functions. -//! -//! This document is here to help you in getting started with winnow. You can also find -//! [winnow recipes for common short parsing tasks here](crate::_topic). If you need -//! more specific help, please ping `geal` on IRC (libera, geeknode, -//! oftc), go to `#winnow-parsers` on Libera IRC, or on the -//! [Gitter chat room](https://gitter.im/Geal/winnow). -//! -//! # First step: the initial research -//! -//! A big part of the initial work lies in accumulating enough documentation and -//! samples to understand the format. The specification is useful, but specifications -//! represent an "official" point of view, that may not be the real world usage. Any -//! blog post or open source code is useful, because it shows how people understand -//! the format, and how they work around each other's bugs (if you think a -//! specification ensures every implementation is consistent with the others, think again). -//! -//! You should get a lot of samples (file or network traces) to test your code. The -//! easy way is to use a small number of samples coming from the same source and -//! develop everything around them, to realize later that they share a very specific -//! bug. -//! -//! # Code organization -//! -//! While it is tempting to insert the parsing code right inside the rest of the -//! logic, it usually results in unmaintainable code, and makes testing challenging. -//! Parser combinators, the parsing technique used in winnow, assemble a lot of small -//! functions to make powerful parsers. This means that those functions only depend -//! on their input, not on an external state. This makes it easy to parse the input -//! partially, and to test those functions independently. -//! -//! Usually, you can separate the parsing functions in their own module, so you -//! could have a `src/lib.rs` file containing this: -//! -//! ```rust,ignore -//! pub mod parser; -//! ``` -//! -//! And the `src/parser.rs` file: -//! -//! ```rust -//! use winnow::IResult; -//! use winnow::number::be_u16; -//! use winnow::bytes::take; -//! -//! pub fn length_value(input: &[u8]) -> IResult<&[u8],&[u8]> { -//! let (input, length) = be_u16(input)?; -//! take(length)(input) -//! } -//! ``` -//! -//! # Writing a first parser -//! -//! Let's parse a simple expression like `(12345)`. winnow parsers are functions that -//! use the `winnow::IResult` type everywhere. As an example, a parser taking a byte -//! slice `&[u8]` and returning a 32 bits unsigned integer `u32` would have this -//! signature: `fn parse_u32(input: &[u8]) -> IResult<&[u8], u32>`. -//! -//! The `IResult` type depends on the input and output types, and an optional custom -//! error type. This enum can either be `Ok((i,o))` containing the remaining input -//! and the output value, or, on the `Err` side, an error or an indication that more -//! data is needed. -//! -//! ```rust -//! # use winnow::error::ErrorKind; -//! pub type IResult = Result<(I, O), Err>; -//! -//! #[derive(Debug, PartialEq, Eq, Clone, Copy)] -//! pub enum Needed { -//! Unknown, -//! Size(u32) -//! } -//! -//! #[derive(Debug, Clone, PartialEq)] -//! pub enum Err { -//! Incomplete(Needed), -//! Error(E), -//! Failure(E), -//! } -//! ``` -//! -//! winnow uses this type everywhere. Every combination of parsers will pattern match -//! on this to know if it must return a value, an error, consume more data, etc. -//! But this is done behind the scenes most of the time. -//! -//! Parsers are usually built from the bottom up, by first writing parsers for the -//! smallest elements, then assembling them in more complex parsers by using -//! combinators. -//! -//! As an example, here is how we could build a (non spec compliant) HTTP request -//! line parser: -//! -//! ```rust -//! # use winnow::prelude::*; -//! # use winnow::bytes::take_while1; -//! # use winnow::bytes::tag; -//! # use winnow::sequence::preceded; -//! # use winnow::stream::AsChar; -//! struct Request<'s> { -//! method: &'s [u8], -//! url: &'s [u8], -//! version: &'s [u8], -//! } -//! -//! // combine all previous parsers in one function -//! fn request_line(i: &[u8]) -> IResult<&[u8], Request> { -//! // first implement the basic parsers -//! let method = take_while1(AsChar::is_alpha); -//! let space = |i| take_while1(|c| c == b' ')(i); -//! let url = take_while1(|c| c != b' '); -//! let is_version = |c| c >= b'0' && c <= b'9' || c == b'.'; -//! let version = take_while1(is_version); -//! let line_ending = tag("\r\n"); -//! -//! // combine http and version to extract the version string -//! // preceded will return the result of the second parser -//! // if both succeed -//! let http_version = preceded("HTTP/", version); -//! -//! // A tuple of parsers will evaluate each parser sequentally and return a tuple of the results -//! let (input, (method, _, url, _, version, _)) = -//! (method, space, url, space, http_version, line_ending).parse_next(i)?; -//! -//! Ok((input, Request { method, url, version })) -//! } -//! ``` -//! -//! Since it is easy to combine small parsers, I encourage you to write small -//! functions corresponding to specific parts of the format, test them -//! independently, then combine them in more general parsers. -//! -//! # Finding the right combinator -//! -//! winnow has a lot of different combinators, depending on the use case. They are all -//! described in the [reference][crate::combinator]. -//! -//! Basic functions are available. They deal mostly -//! in recognizing character types, like `alphanumeric` or `digit`. They also parse -//! big endian and little endian integers and floats of multiple sizes. -//! -//! Most of the functions are there to combine parsers, and they are generic over -//! the input type. -//! -//! # Testing the parsers -//! -//! Once you have a parser function, a good trick is to test it on a lot of the -//! samples you gathered, and integrate this to your unit tests. To that end, put -//! all of the test files in a folder like `assets` and refer to test files like -//! this: -//! -//! ```rust -//! #[test] -//! fn header_test() { -//! let data = include_bytes!("../assets/axolotl-piano.gif"); -//! println!("bytes:\n{}", &data[0..100].to_hex(8)); -//! let res = header(data); -//! // ... -//! } -//! ``` -//! -//! The `include_bytes!` macro (provided by Rust's standard library) will integrate -//! the file as a byte slice in your code. You can then just refer to the part of -//! the input the parser has to handle via its offset. Here, we take the first 100 -//! bytes of a GIF file to parse its header -//! (complete code [here](https://github.com/Geal/gif.rs/blob/master/src/parser.rs#L305-L309)). -//! -//! If your parser handles textual data, you can just use a lot of strings directly -//! in the test, like this: -//! -//! ```rust -//! #[test] -//! fn factor_test() { -//! assert_eq!(factor("3"), Ok(("", 3))); -//! assert_eq!(factor(" 12"), Ok(("", 12))); -//! assert_eq!(factor("537 "), Ok(("", 537))); -//! assert_eq!(factor(" 24 "), Ok(("", 24))); -//! } -//! ``` -//! -//! The more samples and test cases you get, the more you can experiment with your -//! parser design. diff --git a/src/_tutorial/chapter_1.md b/src/_tutorial/chapter_1.rs similarity index 100% rename from src/_tutorial/chapter_1.md rename to src/_tutorial/chapter_1.rs diff --git a/src/_tutorial/chapter_2.md b/src/_tutorial/chapter_2.rs similarity index 100% rename from src/_tutorial/chapter_2.md rename to src/_tutorial/chapter_2.rs diff --git a/src/_tutorial/chapter_3.md b/src/_tutorial/chapter_3.rs similarity index 100% rename from src/_tutorial/chapter_3.md rename to src/_tutorial/chapter_3.rs diff --git a/src/_tutorial/chapter_4.md b/src/_tutorial/chapter_4.rs similarity index 100% rename from src/_tutorial/chapter_4.md rename to src/_tutorial/chapter_4.rs diff --git a/src/_tutorial/chapter_5.md b/src/_tutorial/chapter_5.rs similarity index 100% rename from src/_tutorial/chapter_5.md rename to src/_tutorial/chapter_5.rs diff --git a/src/_tutorial/chapter_6.md b/src/_tutorial/chapter_6.rs similarity index 100% rename from src/_tutorial/chapter_6.md rename to src/_tutorial/chapter_6.rs diff --git a/src/_tutorial/chapter_7.md b/src/_tutorial/chapter_7.rs similarity index 100% rename from src/_tutorial/chapter_7.md rename to src/_tutorial/chapter_7.rs diff --git a/src/_tutorial/mod.md b/src/_tutorial/mod.rs similarity index 100% rename from src/_tutorial/mod.md rename to src/_tutorial/mod.rs From bbd67ea483dd2f4baf37aac79550652f0ccb6e19 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 20 Feb 2023 10:09:59 -0600 Subject: [PATCH 07/11] docs(tutorial): Link all pages together --- src/_tutorial/chapter_1.rs | 36 +++++++++++---------- src/_tutorial/chapter_2.rs | 58 +++++++++++++++++----------------- src/_tutorial/chapter_3.rs | 64 ++++++++++++++++++++------------------ src/_tutorial/chapter_4.rs | 62 ++++++++++++++++++------------------ src/_tutorial/chapter_5.rs | 26 +++++++++------- src/_tutorial/chapter_6.rs | 15 ++++----- src/_tutorial/chapter_7.rs | 9 +++--- src/_tutorial/mod.rs | 24 +++++++++----- 8 files changed, 158 insertions(+), 136 deletions(-) diff --git a/src/_tutorial/chapter_1.rs b/src/_tutorial/chapter_1.rs index fd339089..dcb5d7d1 100644 --- a/src/_tutorial/chapter_1.rs +++ b/src/_tutorial/chapter_1.rs @@ -1,9 +1,9 @@ //! # Chapter 1: The Nom Way -//! +//! //! First of all, we need to understand the way that nom thinks about parsing. //! As discussed in the introduction, nom lets us build simple parsers, and //! then combine them (using "combinators"). -//! +//! //! Let's discuss what a "parser" actually does. A parser takes an input and returns //! a result, where: //! - `Ok` indicates the parser successfully found what it was looking for; or @@ -14,10 +14,10 @@ //! tuple will contain everything the parser did not process. The second will contain //! everything the parser processed. The idea is that a parser can happily parse the first //! *part* of an input, without being able to parse the whole thing. -//! +//! //! If the parser failed, then there are multiple errors that could be returned. //! For simplicity, however, in the next chapters we will leave these unexplored. -//! +//! //! ```text //! ┌─► Ok( //! │ what the parser didn't touch, @@ -27,43 +27,43 @@ //! my input───►│my parser├──►either──┤ //! └─────────┘ └─► Err(...) //! ``` -//! -//! +//! +//! //! To represent this model of the world, nom uses the `IResult` type. //! The `Ok` variant has a tuple of `(remaining_input: I, output: O)`; //! whereas the `Err` variant stores an error. -//! +//! //! You can import that from: -//! +//! //! ```rust //! # extern crate nom; //! use nom::IResult; //! ``` -//! +//! //! You'll note that `I` and `O` are parameterized -- while most of the examples in this book //! will be with `&str` (i.e. parsing a string); they do not have to be strings; nor do they //! have to be the same type (consider the simple example where `I = &str`, and `O = u64` -- this //! parses a string into an unsigned integer.) -//! +//! //! Let's write our first parser! //! The simplest parser we can write is one which successfully does nothing. -//! +//! //! This parser should take in an `&str`: -//! +//! //! - Since it is supposed to succeed, we know it will return the Ok Variant. //! - Since it does nothing to our input, the remaining input is the same as the input. //! - Since it doesn't parse anything, it also should just return an empty string. -//! -//! +//! +//! //! ```rust //! # extern crate nom; //! # use nom::IResult; //! # use std::error::Error; -//! +//! //! pub fn do_nothing_parser(input: &str) -> IResult<&str, &str> { //! Ok((input, "")) //! } -//! +//! //! fn main() -> Result<(), Box> { //! let (remaining_input, output) = do_nothing_parser("my_input")?; //! assert_eq!(remaining_input, "my_input"); @@ -71,5 +71,7 @@ //! # Ok(()) //! } //! ``` -//! +//! //! It's that easy! +//! +//! [*prev*][super] [*next*][super::chapter_2] diff --git a/src/_tutorial/chapter_2.rs b/src/_tutorial/chapter_2.rs index aa567c9f..b7e419f6 100644 --- a/src/_tutorial/chapter_2.rs +++ b/src/_tutorial/chapter_2.rs @@ -1,80 +1,80 @@ //! # Chapter 2: Tags and Character Classes -//! +//! //! The simplest _useful_ parser you can write is one which //! has no special characters, it just matches a string. -//! +//! //! In `nom`, we call a simple collection of bytes a tag. Because //! these are so common, there already exists a function called `tag()`. //! This function returns a parser for a given string. -//! +//! //! **Warning**: `nom` has multiple different definitions of `tag`, make sure you use this one for the //! moment! -//! +//! //! ```rust,ignore //! # extern crate nom; //! pub use nom::bytes::complete::tag; //! ``` -//! +//! //! For example, code to parse the string `"abc"` could be represented as `tag("abc")`. -//! +//! //! If you have not programmed in a language where functions are values, the type signature of them //! tag function might be a surprise: -//! +//! //! ```rust,ignore //! pub fn tag>( //! tag: T //! ) -> impl Fn(Input) -> IResult where //! Input: InputTake + Compare, -//! T: InputLength + Clone, +//! T: InputLength + Clone, //! ``` -//! +//! //! Or, for the case where `Input` and `T` are both `&str`, and simplifying slightly: -//! +//! //! ```rust,ignore //! fn tag(tag: &str) -> (impl Fn(&str) -> IResult<&str, Error>) //! ``` -//! +//! //! In other words, this function `tag` *returns a function*. The function it returns is a -//! parser, taking a `&str` and returning an `IResult`. Functions creating parsers and +//! parser, taking a `&str` and returning an `IResult`. Functions creating parsers and //! returning them is a common pattern in Nom, so it is useful to call out. -//! +//! //! Below, we have implemented a function that uses `tag`. -//! +//! //! ```rust //! # extern crate nom; //! # pub use nom::bytes::complete::tag; //! # pub use nom::IResult; //! # use std::error::Error; -//! +//! //! fn parse_input(input: &str) -> IResult<&str, &str> { //! // note that this is really creating a function, the parser for abc -//! // vvvvv +//! // vvvvv //! // which is then called here, returning an IResult<&str, &str> //! // vvvvv //! tag("abc")(input) //! } -//! +//! //! fn main() -> Result<(), Box> { //! let (leftover_input, output) = parse_input("abcWorld")?; //! assert_eq!(leftover_input, "World"); //! assert_eq!(output, "abc"); -//! +//! //! assert!(parse_input("defWorld").is_err()); //! # Ok(()) //! } //! ``` -//! +//! //! If you'd like to, you can also check tags without case-sensitivity //! with the [`tag_no_case`](https://docs.rs/nom/latest/nom/bytes/complete/fn.tag_no_case.html) function. -//! +//! //! ## Character Classes -//! +//! //! Tags are incredibly useful, but they are also incredibly restrictive. //! The other end of Nom's functionality is pre-written parsers that allow us to accept any of a group of characters, //! rather than just accepting characters in a defined sequence. -//! +//! //! Here is a selection of them: -//! +//! //! - [`alpha0`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha0.html): Recognizes zero or more lowercase and uppercase alphabetic characters: `/[a-zA-Z]/`. [`alpha1`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha1.html) does the same but returns at least one character //! - [`alphanumeric0`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric0.html): Recognizes zero or more numerical and alphabetic characters: `/[0-9a-zA-Z]/`. [`alphanumeric1`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric1.html) does the same but returns at least one character //! - [`digit0`](https://docs.rs/nom/latest/nom/character/complete/fn.digit0.html): Recognizes zero or more numerical characters: `/[0-9]/`. [`digit1`](https://docs.rs/nom/latest/nom/character/complete/fn.digit1.html) does the same but returns at least one character @@ -83,8 +83,8 @@ //! - [`line_ending`](https://docs.rs/nom/latest/nom/character/complete/fn.line_ending.html): Recognizes an end of line (both `\n` and `\r\n`) //! - [`newline`](https://docs.rs/nom/latest/nom/character/complete/fn.newline.html): Matches a newline character `\n` //! - [`tab`](https://docs.rs/nom/latest/nom/character/complete/fn.tab.html): Matches a tab character `\t` -//! -//! +//! +//! //! We can use these in //! ```rust //! # extern crate nom; @@ -94,7 +94,7 @@ //! fn parser(input: &str) -> IResult<&str, &str> { //! alpha0(input) //! } -//! +//! //! fn main() -> Result<(), Box> { //! let (remaining, letters) = parser("abc123")?; //! assert_eq!(remaining, "123"); @@ -103,9 +103,11 @@ //! # Ok(()) //! } //! ``` -//! +//! //! One important note is that, due to the type signature of these functions, //! it is generally best to use them within a function that returns an `IResult`. -//! +//! //! If you don't, some of the information around the type of the `tag` function must be //! manually specified, which can lead to verbose code or confusing errors. +//! +//! [*prev*][super::chapter_1] [*next*][super::chapter_3] diff --git a/src/_tutorial/chapter_3.rs b/src/_tutorial/chapter_3.rs index bd8880a6..ab63f922 100644 --- a/src/_tutorial/chapter_3.rs +++ b/src/_tutorial/chapter_3.rs @@ -1,65 +1,65 @@ //! # Chapter 3: Alternatives and Composition -//! +//! //! In the last chapter, we saw how to create simple parsers using the `tag` function; //! and some of Nom's prebuilt parsers. -//! +//! //! In this chapter, we explore two other widely used features of Nom: //! alternatives and composition. -//! +//! //! ## Alternatives -//! +//! //! Sometimes, we might want to choose between two parsers; and we're happy with //! either being used. -//! +//! //! Nom gives us a similar ability through the `alt()` combinator. -//! +//! //! ```rust //! # extern crate nom; //! use nom::branch::alt; //! ``` -//! +//! //! The `alt()` combinator will execute each parser in a tuple until it finds one -//! that does not error. If all error, then by default you are given the error from +//! that does not error. If all error, then by default you are given the error from //! the last error. -//! +//! //! We can see a basic example of `alt()` below. -//! +//! //! ```rust //! # extern crate nom; //! use nom::branch::alt; //! use nom::bytes::complete::tag; //! use nom::IResult; //! # use std::error::Error; -//! +//! //! fn parse_abc_or_def(input: &str) -> IResult<&str, &str> { //! alt(( //! tag("abc"), //! tag("def") //! ))(input) //! } -//! +//! //! fn main() -> Result<(), Box> { //! let (leftover_input, output) = parse_abc_or_def("abcWorld")?; //! assert_eq!(leftover_input, "World"); //! assert_eq!(output, "abc"); -//! +//! //! assert!(parse_abc_or_def("ghiWorld").is_err()); //! # Ok(()) //! } //! ``` -//! +//! //! ## Composition -//! +//! //! Now that we can create more interesting regexes, we can compose them together. //! The simplest way to do this is just to evaluate them in sequence: -//! +//! //! ```rust //! # extern crate nom; //! use nom::branch::alt; //! use nom::bytes::complete::tag; //! use nom::IResult; //! # use std::error::Error; -//! +//! //! fn parse_abc(input: &str) -> IResult<&str, &str> { //! tag("abc")(input) //! } @@ -69,7 +69,7 @@ //! tag("ghi") //! ))(input) //! } -//! +//! //! fn main() -> Result<(), Box> { //! let input = "abcghi"; //! let (remainder, abc) = parse_abc(input)?; @@ -79,18 +79,18 @@ //! # Ok(()) //! } //! ``` -//! +//! //! Composing tags is such a common requirement that, in fact, Nom has a few built in //! combinators to do it. The simplest of these is `tuple()`. The `tuple()` combinator takes a tuple of parsers, -//! and either returns `Ok` with a tuple of all of their successful parses, or it +//! and either returns `Ok` with a tuple of all of their successful parses, or it //! returns the `Err` of the first failed parser. -//! +//! //! ```rust //! # extern crate nom; //! use nom::sequence::tuple; //! ``` -//! -//! +//! +//! //! ```rust //! # extern crate nom; //! use nom::branch::alt; @@ -99,7 +99,7 @@ //! use nom::character::complete::{digit1}; //! use nom::IResult; //! # use std::error::Error; -//! +//! //! fn parse_base(input: &str) -> IResult<&str, &str> { //! alt(( //! tag_no_case("a"), @@ -108,7 +108,7 @@ //! tag_no_case("g") //! ))(input) //! } -//! +//! //! fn parse_pair(input: &str) -> IResult<&str, (&str, &str)> { //! // the many_m_n combinator might also be appropriate here. //! tuple(( @@ -116,23 +116,23 @@ //! parse_base, //! ))(input) //! } -//! +//! //! fn main() -> Result<(), Box> { //! let (remaining, parsed) = parse_pair("aTcG")?; //! assert_eq!(parsed, ("a", "T")); //! assert_eq!(remaining, "cG"); //! //! assert!(parse_pair("Dct").is_err()); -//! +//! //! # Ok(()) //! } //! ``` -//! -//! +//! +//! //! ## Extra Nom Tools -//! +//! //! After using `alt()` and `tuple()`, you might also be interested in a few other parsers that do similar things: -//! +//! //! | combinator | usage | input | output | comment | //! |---|---|---|---|---| //! | [delimited](https://docs.rs/nom/latest/nom/sequence/fn.delimited.html) | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `Ok(("cd", "ab"))` || @@ -140,3 +140,5 @@ //! | [terminated](https://docs.rs/nom/latest/nom/sequence/fn.terminated.html) | `terminated(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "ab"))` || //! | [pair](https://docs.rs/nom/latest/nom/sequence/fn.pair.html) | `pair(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", ("ab", "XY")))` || //! | [separated_pair](https://docs.rs/nom/latest/nom/sequence/fn.separated_pair.html) | `separated_pair(tag("hello"), char(','), tag("world"))` | `"hello,world!"` | `Ok(("!", ("hello", "world")))` || +//! +//! [*prev*][super::chapter_2] [*next*][super::chapter_4] diff --git a/src/_tutorial/chapter_4.rs b/src/_tutorial/chapter_4.rs index d223dd2d..edb7d9bd 100644 --- a/src/_tutorial/chapter_4.rs +++ b/src/_tutorial/chapter_4.rs @@ -1,31 +1,31 @@ -//! # Chapter 4: Parsers With Custom Return Types -//! +//! # Chapter 4: Parsers With Custom Return Types +//! //! So far, we have seen mostly functions that take an `&str`, and return a //! `IResult<&str, &str>`. Splitting strings into smaller strings is certainly useful, //! but it's not the only thing Nom is capable of! -//! +//! //! A useful operation when parsing is to convert between types; for example //! parsing from `&str` to another primitive, like `bool`. -//! +//! //! All we need to do for our parser to return a different type is to change //! the second type parameter of `IResult` to the desired return type. //! For example, to return a bool, return a `IResult<&str, bool>`. -//! +//! //! Recall that the first type parameter of the `IResult` is the input //! type, so even if you're returning something different, if your input //! is a `&str`, the first type argument of `IResult` should be also. -//! +//! //! Until you have read the chapter on Errors, we strongly suggest avoiding //! the use of parsers built into Rust (like `str.parse`); as they require //! special handling to work well with Nom. -//! +//! //! That said, one Nom-native way of doing a type conversion is to use the //! [`value`](https://docs.rs/nom/latest/nom/combinator/fn.value.html) combinator //! to convert from a successful parse to a particular value. -//! +//! //! The following code converts from a string containing `"true"` or `"false"`, //! to the corresponding `bool`. -//! +//! //! ```rust //! # extern crate nom; //! # use std::error::Error; @@ -33,7 +33,7 @@ //! use nom::bytes::complete::tag; //! use nom::combinator::value; //! use nom::branch::alt; -//! +//! //! fn parse_bool(input: &str) -> IResult<&str, bool> { //! // either, parse `"true"` -> `true`; `"false"` -> `false`, or error. //! alt(( @@ -41,7 +41,7 @@ //! value(false, tag("false")), //! ))(input) //! } -//! +//! //! fn main() -> Result<(), Box> { //! // Parses the `"true"` out. //! let (remaining, parsed) = parse_bool("true|false")?; @@ -58,25 +58,25 @@ //! assert_eq!(remaining, ""); //! //! -//! +//! //! # Ok(()) //! } //! ``` -//! +//! //! ## Nom's in-built parser functions -//! +//! //! Nom has a wide array of parsers built in. Here is a list of //! [parsers which recognize specific characters](https://docs.rs/nom/latest/nom/character/complete/index.html). -//! +//! //! Some of them we have seen before in Chapter 2, but now we also can try out the parsers that return different //! types, like `i32`. An example of this parser is shown in the next section. -//! +//! //! ## Building a More Complex Example -//! +//! //! A more complex example of parsing custom types might be parsing a 2D coordinate. -//! +//! //! Let us try to figure out how to design this. -//! +//! //! - We know that we want to take a string, like `"(3, -2)"`, and convert into //! a `Coordinate` struct. //! - We can split this into three parts: @@ -86,31 +86,31 @@ //! vvvv , vvvv # The comma, separating values. //! 3 -2 # The actual integers. //! ``` -//! +//! //! - So, we will need three parsers, to deal with this: //! 1. A parser for integers, which will deal with the raw numbers. //! 2. A parser for comma seperated pair, which will split it up into integers. //! 3. A parser for the outer brackets. //! //! - We can see below how we achieve this: -//! +//! //! ```rust //! # extern crate nom; //! # use std::error::Error; //! use nom::IResult; //! use nom::bytes::complete::tag; //! use nom::sequence::{separated_pair, delimited}; -//! +//! //! // This is the type we will parse into. //! #[derive(Debug,PartialEq)] //! pub struct Coordinate { //! pub x: i32, //! pub y: i32, //! } -//! +//! //! // 1. Nom has an in-built i32 parser. //! use nom::character::complete::i32; -//! +//! //! // 2. Use the `separated_pair` parser to combine two parsers (in this case, //! // both `i32`), ignoring something in-between. //! fn parse_integer_pair(input: &str) -> IResult<&str, (i32, i32)> { @@ -120,7 +120,7 @@ //! i32 //! )(input) //! } -//! +//! //! // 3. Use the `delimited` parser to apply a parser, ignoring the results //! // of two surrounding parsers. //! fn parse_coordinate(input: &str) -> IResult<&str, Coordinate> { @@ -135,7 +135,7 @@ //! Ok((remaining, Coordinate {x, y})) //! //! } -//! +//! //! fn main() -> Result<(), Box> { //! let (_, parsed) = parse_coordinate("(3, 5)")?; //! assert_eq!(parsed, Coordinate {x: 3, y: 5}); @@ -152,10 +152,12 @@ //! let parsing_error = parse_coordinate("Ferris"); //! assert!(parsing_error.is_err()); //! -//! +//! //! # Ok(()) //! } //! ``` -//! -//! As an exercise, you might want to explore how to make this parser deal gracefully with -//! whitespace in the input. +//! +//! As an exercise, you might want to explore how to make this parser deal gracefully with +//! whitespace in the input. +//! +//! [*prev*][super::chapter_3] [*next*][super::chapter_5] diff --git a/src/_tutorial/chapter_5.rs b/src/_tutorial/chapter_5.rs index ae9bf7b8..037db415 100644 --- a/src/_tutorial/chapter_5.rs +++ b/src/_tutorial/chapter_5.rs @@ -1,23 +1,23 @@ //! # Chapter 5: Repeating with Predicates -//! +//! //! Just as, when programming, the humble while loop unlocks many useful //! features; in Nom, repeating a parser multiple times can be incredibly useful -//! +//! //! There are, however, two ways of including repeating functionality into Nom -- //! parsers which are governed by a predicate; and combinators which repeat //! a parser. -//! +//! //! ## Parsers which use a predicate -//! +//! //! A `predicate` is a function which returns a boolean value (i.e. given some input, //! it returns `true` or `false`). These are incredibly common when parsing -- for instance, //! a predicate `is_vowel` might decide whether a character is an english vowel (a, e, i, o or u). -//! +//! //! These can be used to make parsers that Nom hasn't built in. For instance, the below //! parser will take as many vowels as possible. -//! +//! //! There are a few different categories of predicate parsers that are worth mentioning: -//! +//! //! - For bytes, there are three different categories of parser: `take_till`, `take_until`, and `take_while`. //! `take_till` will continue consuming input until its input meets the predicate. //! `take_while` will continue consuming input until its input *does not* meet the predicate. @@ -30,7 +30,7 @@ //! - As a special case, `take_while_m_n` is like `take_while`, but guarantees that it will consume //! at least `m` bytes, and no more than `n` bytes. //! -//! +//! //! ```rust //! # extern crate nom; //! # use std::error::Error; @@ -38,11 +38,11 @@ //! use nom::bytes::complete::{tag, take_until, take_while}; //! use nom::character::{is_space}; //! use nom::sequence::{terminated}; -//! +//! //! fn parse_sentence(input: &str) -> IResult<&str, &str> { //! terminated(take_until("."), take_while(|c| c == '.' || c == ' '))(input) //! } -//! +//! //! fn main() -> Result<(), Box> { //! let (remaining, parsed) = parse_sentence("I am Tom. I write Rust.")?; //! assert_eq!(parsed, "I am Tom"); @@ -51,14 +51,16 @@ //! let parsing_error = parse_sentence("Not a sentence (no period at the end)"); //! assert!(parsing_error.is_err()); //! -//! +//! //! # Ok(()) //! } //! ``` //! For detailed examples, see their documentation, shown below: -//! +//! //! | combinator | usage | input | output | comment | //! |---|---|---|---|---| //! | [take_while](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_while.html) | `take_while(is_alphabetic)` | `"abc123"` | `Ok(("123", "abc"))` |Returns the longest list of bytes for which the provided function returns true. `take_while1` does the same, but must return at least one character. `take_while_m_n` does the same, but must return between `m` and `n` characters.| //! | [take_till](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_till.html) | `take_till(is_alphabetic)` | `"123abc"` | `Ok(("abc", "123"))` |Returns the longest list of bytes or characters until the provided function returns true. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while`: `take_till(f)` is equivalent to `take_while(\|c\| !f(c))`| //! | [take_until](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_until.html) | `take_until("world")` | `"Hello world"` | `Ok(("world", "Hello "))` |Returns the longest list of bytes or characters until the provided tag is found. `take_until1` does the same, but must return at least one character| +//! +//! [*prev*][super::chapter_4] [*next*][super::chapter_6] diff --git a/src/_tutorial/chapter_6.rs b/src/_tutorial/chapter_6.rs index a67f5933..0a1d827f 100644 --- a/src/_tutorial/chapter_6.rs +++ b/src/_tutorial/chapter_6.rs @@ -1,21 +1,21 @@ //! # Chapter 6: Repeating Parsers -//! +//! //! A single parser which repeats a predicate is useful, but more useful still is a combinator that //! repeats a parser. Nom has multiple combinators which operate on this principle; the most obvious of //! which is `many0`, which applies a parser as many times as possible; and returns a vector of //! the results of those parses. Here is an example: -//! +//! //! ```rust //! # extern crate nom; //! # use std::error::Error; //! use nom::IResult; //! use nom::multi::many0; //! use nom::bytes::complete::tag; -//! +//! //! fn parser(s: &str) -> IResult<&str, Vec<&str>> { //! many0(tag("abc"))(s) //! } -//! +//! //! fn main() { //! assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); //! assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); @@ -23,9 +23,9 @@ //! assert_eq!(parser(""), Ok(("", vec![]))); //! } //! ``` -//! +//! //! There are many different parsers to choose from: -//! +//! //! | combinator | usage | input | output | comment | //! |---|---|---|---|---| //! | [count](https://docs.rs/nom/latest/nom/multi/fn.count.html) | `count(take(2), 3)` | `"abcdefgh"` | `Ok(("gh", vec!["ab", "cd", "ef"]))` |Applies the child parser a specified number of times| @@ -36,4 +36,5 @@ //! | [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time| //! | [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| //! | [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times| -//! +//! +//! [*prev*][super::chapter_5] [*next*][super::chapter_7] diff --git a/src/_tutorial/chapter_7.rs b/src/_tutorial/chapter_7.rs index 409e61a2..9bf6fa6e 100644 --- a/src/_tutorial/chapter_7.rs +++ b/src/_tutorial/chapter_7.rs @@ -1,10 +1,11 @@ //! # Chapter 7: Using Errors from Outside Nom -//! +//! //! [Nom has other documentation about errors, so in place of this chapter, read this page.](https://github.com/Geal/nom/blob/main/doc/error_management.md) -//! +//! //! ## Particular Notes -//! +//! //! - It's particularly useful to use the `map_res` function. It allows you to //! convert an external error to a Nom error. For an example, //! see [the Nom example on the front page](https://github.com/Geal/nom#example). -//! +//! +//! [*prev*][super::chapter_5] diff --git a/src/_tutorial/mod.rs b/src/_tutorial/mod.rs index 2ca4c1a8..c35f3f26 100644 --- a/src/_tutorial/mod.rs +++ b/src/_tutorial/mod.rs @@ -1,20 +1,20 @@ -//! # The Nominomicon -//! +//! # The Nominomiconsa +//! //! Welcome to Nominomicon; a guide to using the Nom parser for great good. -//! This guide will give you an introduction to the theory and practice of +//! This guide will give you an introduction to the theory and practice of //! using Nom. -//! +//! //! This guide assumes only that you are: //! - Wanting to learn Nom, //! - Already familiar with Rust. -//! +//! //! Nom is a parser-combinator library. In other words, it gives you tools to define: //! - "parsers" (a function that takes an input, and gives back an output), and //! - "combinators" (functions that take parsers, and _combine_ them together!). -//! +//! //! By combining parsers with combinators, you can build complex parsers up from //! simpler ones. These complex parsers are enough to understand HTML, mkv or Python! -//! +//! //! Before we set off, it's important to list some caveats: //! - This guide is for Nom7. Nom has undergone significant changes, so if //! you are searching for documentation or StackOverflow answers, you may @@ -24,3 +24,13 @@ //! - Use of `CompleteStr` or `CompleteByteArray`. //! - Nom can parse (almost) anything; but this guide will focus almost entirely on parsing //! complete `&str` into things. +//! +//! [*next*][chapter_1] + +pub mod chapter_1; +pub mod chapter_2; +pub mod chapter_3; +pub mod chapter_4; +pub mod chapter_5; +pub mod chapter_6; +pub mod chapter_7; From 37456b0fd3f09e954aa32c58e4989e05d1a34e05 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 20 Feb 2023 12:39:04 -0600 Subject: [PATCH 08/11] docs(tutorial): Massage tutorial --- src/_tutorial/chapter_1.rs | 57 ++++--- src/_tutorial/chapter_2.rs | 197 ++++++++++++++--------- src/_tutorial/chapter_3.rs | 317 ++++++++++++++++++++++++------------- src/_tutorial/chapter_4.rs | 219 ++++++++++--------------- src/_tutorial/chapter_5.rs | 2 +- src/_tutorial/chapter_6.rs | 229 +++++++++++++++++++++++---- src/_tutorial/chapter_7.rs | 161 ++++++++++++++++++- src/_tutorial/chapter_8.rs | 115 ++++++++++++++ src/_tutorial/mod.rs | 47 +++--- 9 files changed, 939 insertions(+), 405 deletions(-) create mode 100644 src/_tutorial/chapter_8.rs diff --git a/src/_tutorial/chapter_1.rs b/src/_tutorial/chapter_1.rs index dcb5d7d1..dad0f733 100644 --- a/src/_tutorial/chapter_1.rs +++ b/src/_tutorial/chapter_1.rs @@ -1,17 +1,17 @@ -//! # Chapter 1: The Nom Way +//! # Chapter 1: The Winnow Way //! -//! First of all, we need to understand the way that nom thinks about parsing. -//! As discussed in the introduction, nom lets us build simple parsers, and +//! First of all, we need to understand the way that winnow thinks about parsing. +//! As discussed in the introduction, winnow lets us build simple parsers, and //! then combine them (using "combinators"). //! //! Let's discuss what a "parser" actually does. A parser takes an input and returns //! a result, where: //! - `Ok` indicates the parser successfully found what it was looking for; or //! - `Err` indicates the parser could not find what it was looking for. -//! +//! //! Parsers do more than just return a binary "success"/"failure" code. If -//! the parser was successful, then it will return a tuple. The first field of the -//! tuple will contain everything the parser did not process. The second will contain +//! the parser was successful, then it will return a tuple where the first field +//! will contain everything the parser did not process. The second will contain //! everything the parser processed. The idea is that a parser can happily parse the first //! *part* of an input, without being able to parse the whole thing. //! @@ -21,7 +21,7 @@ //! ```text //! ┌─► Ok( //! │ what the parser didn't touch, -//! │ what matched the regex +//! │ what matched the parser //! │ ) //! ┌─────────┐ │ //! my input───►│my parser├──►either──┤ @@ -29,15 +29,14 @@ //! ``` //! //! -//! To represent this model of the world, nom uses the `IResult` type. -//! The `Ok` variant has a tuple of `(remaining_input: I, output: O)`; +//! To represent this model of the world, winnow uses the [`IResult`] type. +//! The `Ok` variant has a tuple of `(remainder: I, output: O)`; //! whereas the `Err` variant stores an error. //! //! You can import that from: //! //! ```rust -//! # extern crate nom; -//! use nom::IResult; +//! use winnow::IResult; //! ``` //! //! You'll note that `I` and `O` are parameterized -- while most of the examples in this book @@ -45,33 +44,45 @@ //! have to be the same type (consider the simple example where `I = &str`, and `O = u64` -- this //! parses a string into an unsigned integer.) //! -//! Let's write our first parser! +//! To combine parsers, we need a common way to refer to them which is where the [`Parser`] +//! trait comes in with [`Parser::parse_next`] being the primary way to drive +//! parsing forward. +//! +//! # Let's write our first parser! +//! //! The simplest parser we can write is one which successfully does nothing. //! -//! This parser should take in an `&str`: +//! To make it easier to implement a [`Parser`], the trait is implemented for +//! functions of the form `Fn(I) -> IResult`. +//! +//! This parser function should take in a `&str`: //! //! - Since it is supposed to succeed, we know it will return the Ok Variant. //! - Since it does nothing to our input, the remaining input is the same as the input. //! - Since it doesn't parse anything, it also should just return an empty string. //! -//! //! ```rust -//! # extern crate nom; -//! # use nom::IResult; -//! # use std::error::Error; +//! use winnow::IResult; +//! use winnow::Parser; //! //! pub fn do_nothing_parser(input: &str) -> IResult<&str, &str> { //! Ok((input, "")) //! } //! -//! fn main() -> Result<(), Box> { -//! let (remaining_input, output) = do_nothing_parser("my_input")?; -//! assert_eq!(remaining_input, "my_input"); +//! fn main() { +//! let input = "0x1a2b Hello"; +//! +//! let (remainder, output) = do_nothing_parser.parse_next(input).unwrap(); +//! // Same as: +//! // let (remainder, output) = do_nothing_parser(input).unwrap(); +//! +//! assert_eq!(remainder, "0x1a2b Hello"); //! assert_eq!(output, ""); -//! # Ok(()) //! } //! ``` //! -//! It's that easy! -//! //! [*prev*][super] [*next*][super::chapter_2] + +#![allow(unused_imports)] +use crate::IResult; +use crate::Parser; diff --git a/src/_tutorial/chapter_2.rs b/src/_tutorial/chapter_2.rs index b7e419f6..2a92f051 100644 --- a/src/_tutorial/chapter_2.rs +++ b/src/_tutorial/chapter_2.rs @@ -1,113 +1,154 @@ -//! # Chapter 2: Tags and Character Classes +//! # Chapter 2: Tokens and Tags //! -//! The simplest _useful_ parser you can write is one which -//! has no special characters, it just matches a string. +//! The simplest *useful* parser you can write is one which matches tokens. //! -//! In `nom`, we call a simple collection of bytes a tag. Because -//! these are so common, there already exists a function called `tag()`. -//! This function returns a parser for a given string. +//! ## Tokens //! -//! **Warning**: `nom` has multiple different definitions of `tag`, make sure you use this one for the -//! moment! +//! Matching a single token literal so common, `Parser` is implemented for +//! `char`. //! -//! ```rust,ignore -//! # extern crate nom; -//! pub use nom::bytes::complete::tag; -//! ``` -//! -//! For example, code to parse the string `"abc"` could be represented as `tag("abc")`. +//! ```rust +//! # use winnow::Parser; +//! # use winnow::IResult; +//! # +//! fn parse_prefix(input: &str) -> IResult<&str, char> { +//! '0'.parse_next(input) +//! } //! -//! If you have not programmed in a language where functions are values, the type signature of them -//! tag function might be a surprise: +//! fn main() { +//! let input = "0x1a2b Hello"; //! -//! ```rust,ignore -//! pub fn tag>( -//! tag: T -//! ) -> impl Fn(Input) -> IResult where -//! Input: InputTake + Compare, -//! T: InputLength + Clone, -//! ``` +//! let (remainder, output) = parse_prefix.parse_next(input).unwrap(); //! -//! Or, for the case where `Input` and `T` are both `&str`, and simplifying slightly: +//! assert_eq!(remainder, "x1a2b Hello"); +//! assert_eq!(output, '0'); //! -//! ```rust,ignore -//! fn tag(tag: &str) -> (impl Fn(&str) -> IResult<&str, Error>) +//! assert!(parse_prefix("d").is_err()); +//! } //! ``` //! -//! In other words, this function `tag` *returns a function*. The function it returns is a -//! parser, taking a `&str` and returning an `IResult`. Functions creating parsers and -//! returning them is a common pattern in Nom, so it is useful to call out. +//! ## Tags //! -//! Below, we have implemented a function that uses `tag`. +//! One of the most frequent way of matching a token is when they are combined into a string. +//! Again, this is common enough that `Parser` is implemented for `&str`: //! //! ```rust -//! # extern crate nom; -//! # pub use nom::bytes::complete::tag; -//! # pub use nom::IResult; -//! # use std::error::Error; -//! -//! fn parse_input(input: &str) -> IResult<&str, &str> { -//! // note that this is really creating a function, the parser for abc -//! // vvvvv -//! // which is then called here, returning an IResult<&str, &str> -//! // vvvvv -//! tag("abc")(input) +//! # use winnow::Parser; +//! # use winnow::IResult; +//! # +//! fn parse_prefix(input: &str) -> IResult<&str, &str> { +//! "0x".parse_next(input) //! } //! -//! fn main() -> Result<(), Box> { -//! let (leftover_input, output) = parse_input("abcWorld")?; -//! assert_eq!(leftover_input, "World"); -//! assert_eq!(output, "abc"); +//! fn main() { +//! let input = "0x1a2b Hello"; //! -//! assert!(parse_input("defWorld").is_err()); -//! # Ok(()) +//! let (remainder, output) = parse_prefix.parse_next(input).unwrap(); +//! assert_eq!(remainder, "1a2b Hello"); +//! assert_eq!(output, "0x"); +//! +//! assert!(parse_prefix("0o123").is_err()); //! } //! ``` //! -//! If you'd like to, you can also check tags without case-sensitivity -//! with the [`tag_no_case`](https://docs.rs/nom/latest/nom/bytes/complete/fn.tag_no_case.html) function. +//! In `winnow`, we call this type of parser a [`tag`]. //! //! ## Character Classes //! -//! Tags are incredibly useful, but they are also incredibly restrictive. -//! The other end of Nom's functionality is pre-written parsers that allow us to accept any of a group of characters, -//! rather than just accepting characters in a defined sequence. +//! Selecting a single `char` or a `tag` is fairly limited. Sometimes, you will want to select one of several +//! `chars` of a specific class, like digits. For this, we use the [`one_of`] parer: +//! +//! ```rust +//! # use winnow::Parser; +//! # use winnow::IResult; +//! use winnow::bytes::one_of; //! -//! Here is a selection of them: +//! fn parse_digits(input: &str) -> IResult<&str, char> { +//! one_of("0123456789abcdefgABCDEFG").parse_next(input) +//! } //! -//! - [`alpha0`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha0.html): Recognizes zero or more lowercase and uppercase alphabetic characters: `/[a-zA-Z]/`. [`alpha1`](https://docs.rs/nom/latest/nom/character/complete/fn.alpha1.html) does the same but returns at least one character -//! - [`alphanumeric0`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric0.html): Recognizes zero or more numerical and alphabetic characters: `/[0-9a-zA-Z]/`. [`alphanumeric1`](https://docs.rs/nom/latest/nom/character/complete/fn.alphanumeric1.html) does the same but returns at least one character -//! - [`digit0`](https://docs.rs/nom/latest/nom/character/complete/fn.digit0.html): Recognizes zero or more numerical characters: `/[0-9]/`. [`digit1`](https://docs.rs/nom/latest/nom/character/complete/fn.digit1.html) does the same but returns at least one character -//! - [`multispace0`](https://docs.rs/nom/latest/nom/character/complete/fn.multispace0.html): Recognizes zero or more spaces, tabs, carriage returns and line feeds. [`multispace1`](https://docs.rs/nom/latest/nom/character/complete/fn.multispace1.html) does the same but returns at least one character -//! - [`space0`](https://docs.rs/nom/latest/nom/character/complete/fn.space0.html): Recognizes zero or more spaces and tabs. [`space1`](https://docs.rs/nom/latest/nom/character/complete/fn.space1.html) does the same but returns at least one character -//! - [`line_ending`](https://docs.rs/nom/latest/nom/character/complete/fn.line_ending.html): Recognizes an end of line (both `\n` and `\r\n`) -//! - [`newline`](https://docs.rs/nom/latest/nom/character/complete/fn.newline.html): Matches a newline character `\n` -//! - [`tab`](https://docs.rs/nom/latest/nom/character/complete/fn.tab.html): Matches a tab character `\t` +//! fn main() { +//! let input = "1a2b Hello"; //! +//! let (remainder, output) = parse_digits.parse_next(input).unwrap(); +//! assert_eq!(remainder, "a2b Hello"); +//! assert_eq!(output, '1'); //! -//! We can use these in +//! assert!(parse_digits("Z").is_err()); +//! } +//! ``` +//! +//! > **Aside:** `one_of` might look straightforward, a function returning a value that implements `Parser`. +//! > Let's look at it more closely as its used above (resolving all generic parameters): +//! > ```rust +//! > # use winnow::IResult; +//! > pub fn one_of<'i>( +//! > list: &'static str +//! > ) -> impl FnMut(&'i str) -> IResult<&'i str, char> { +//! > // ... +//! > # winnow::bytes::one_of(list) +//! > } +//! > ``` +//! > If you have not programmed in a language where functions are values, the type signature of the +//! > `one_of` function might be a surprise. +//! > The function `tag` *returns a function*. The function it returns is a +//! > `Parser`, taking a `&str` and returning an `IResult`. This is a common pattern in winnow for +//! > configurable or stateful parsers. +//! +//! Some of character classes are common enough that a named parser is provided, like with: +//! - [`line_ending`][crate::character::line_ending]: Recognizes an end of line (both `\n` and `\r\n`) +//! - [`newline`][crate::character::newline]: Matches a newline character `\n` +//! - [`tab`][crate::character::tab]: Matches a tab character `\t` +//! +//! You can then capture sequences of these characters with parsers like [`take_while1`]. //! ```rust -//! # extern crate nom; -//! # pub use nom::IResult; -//! # use std::error::Error; -//! pub use nom::character::complete::alpha0; -//! fn parser(input: &str) -> IResult<&str, &str> { -//! alpha0(input) +//! # use winnow::Parser; +//! # use winnow::IResult; +//! use winnow::bytes::take_while1; +//! +//! fn parse_digits(input: &str) -> IResult<&str, &str> { +//! take_while1("0123456789abcdefgABCDEFG").parse_next(input) //! } //! -//! fn main() -> Result<(), Box> { -//! let (remaining, letters) = parser("abc123")?; -//! assert_eq!(remaining, "123"); -//! assert_eq!(letters, "abc"); -//! -//! # Ok(()) +//! fn main() { +//! let input = "1a2b Hello"; +//! +//! let (remainder, output) = parse_digits.parse_next(input).unwrap(); +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(output, "1a2b"); +//! +//! assert!(parse_digits("Z").is_err()); //! } //! ``` //! -//! One important note is that, due to the type signature of these functions, -//! it is generally best to use them within a function that returns an `IResult`. +//! We could simplify this further with by using one of the built-in character classes, [`hex_digit1`]: +//! ```rust +//! # use winnow::Parser; +//! # use winnow::IResult; +//! use winnow::character::hex_digit1; //! -//! If you don't, some of the information around the type of the `tag` function must be -//! manually specified, which can lead to verbose code or confusing errors. +//! fn parse_digits(input: &str) -> IResult<&str, &str> { +//! hex_digit1.parse_next(input) +//! } +//! +//! fn main() { +//! let input = "1a2b Hello"; +//! +//! let (remainder, output) = parse_digits.parse_next(input).unwrap(); +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(output, "1a2b"); +//! +//! assert!(parse_digits("Z").is_err()); +//! } +//! ``` //! //! [*prev*][super::chapter_1] [*next*][super::chapter_3] + +#![allow(unused_imports)] +use crate::bytes::one_of; +use crate::bytes::tag; +use crate::bytes::take_while1; +use crate::character::hex_digit1; +use crate::stream::ContainsToken; +use crate::Parser; +use std::ops::RangeInclusive; diff --git a/src/_tutorial/chapter_3.rs b/src/_tutorial/chapter_3.rs index ab63f922..de9b3be2 100644 --- a/src/_tutorial/chapter_3.rs +++ b/src/_tutorial/chapter_3.rs @@ -1,144 +1,241 @@ -//! # Chapter 3: Alternatives and Composition +//! # Chapter 3: Sequencing and Alternatives //! -//! In the last chapter, we saw how to create simple parsers using the `tag` function; -//! and some of Nom's prebuilt parsers. +//! In the last chapter, we saw how to create simple parsers using prebuilt parsers. //! -//! In this chapter, we explore two other widely used features of Nom: +//! In this chapter, we explore two other widely used features: //! alternatives and composition. //! -//! ## Alternatives +//! ## Sequencing //! -//! Sometimes, we might want to choose between two parsers; and we're happy with -//! either being used. -//! -//! Nom gives us a similar ability through the `alt()` combinator. +//! Now that we can create more interesting parsers, we can sequence them together, like: //! //! ```rust -//! # extern crate nom; -//! use nom::branch::alt; -//! ``` -//! -//! The `alt()` combinator will execute each parser in a tuple until it finds one -//! that does not error. If all error, then by default you are given the error from -//! the last error. -//! -//! We can see a basic example of `alt()` below. -//! -//! ```rust -//! # extern crate nom; -//! use nom::branch::alt; -//! use nom::bytes::complete::tag; -//! use nom::IResult; -//! # use std::error::Error; +//! # use winnow::bytes::take_while1; +//! # use winnow::Parser; +//! # use winnow::IResult; +//! # +//! fn parse_prefix(input: &str) -> IResult<&str, &str> { +//! "0x".parse_next(input) +//! } //! -//! fn parse_abc_or_def(input: &str) -> IResult<&str, &str> { -//! alt(( -//! tag("abc"), -//! tag("def") -//! ))(input) +//! fn parse_digits(input: &str) -> IResult<&str, &str> { +//! take_while1(( +//! ('0'..='9'), +//! ('A'..='F'), +//! ('a'..='f'), +//! )).parse_next(input) //! } //! -//! fn main() -> Result<(), Box> { -//! let (leftover_input, output) = parse_abc_or_def("abcWorld")?; -//! assert_eq!(leftover_input, "World"); -//! assert_eq!(output, "abc"); +//! fn main() { +//! let input = "0x1a2b Hello"; +//! +//! let (remainder, prefix) = parse_prefix.parse_next(input).unwrap(); +//! let (remainder, digits) = parse_digits.parse_next(remainder).unwrap(); //! -//! assert!(parse_abc_or_def("ghiWorld").is_err()); -//! # Ok(()) +//! assert_eq!(prefix, "0x"); +//! assert_eq!(digits, "1a2b"); +//! assert_eq!(remainder, " Hello"); //! } //! ``` //! -//! ## Composition -//! -//! Now that we can create more interesting regexes, we can compose them together. -//! The simplest way to do this is just to evaluate them in sequence: -//! +//! To sequence these together, you can just put them in a tuple: //! ```rust -//! # extern crate nom; -//! use nom::branch::alt; -//! use nom::bytes::complete::tag; -//! use nom::IResult; -//! # use std::error::Error; -//! -//! fn parse_abc(input: &str) -> IResult<&str, &str> { -//! tag("abc")(input) -//! } -//! fn parse_def_or_ghi(input: &str) -> IResult<&str, &str> { -//! alt(( -//! tag("def"), -//! tag("ghi") -//! ))(input) +//! # use winnow::bytes::take_while1; +//! # use winnow::Parser; +//! # use winnow::IResult; +//! # +//! # fn parse_prefix(input: &str) -> IResult<&str, &str> { +//! # "0x".parse_next(input) +//! # } +//! # +//! # fn parse_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! # +//! //... +//! +//! fn main() { +//! let input = "0x1a2b Hello"; +//! +//! let (remainder, (prefix, digits)) = ( +//! parse_prefix, +//! parse_digits +//! ).parse_next(input).unwrap(); +//! +//! assert_eq!(prefix, "0x"); +//! assert_eq!(digits, "1a2b"); +//! assert_eq!(remainder, " Hello"); //! } +//! ``` //! -//! fn main() -> Result<(), Box> { -//! let input = "abcghi"; -//! let (remainder, abc) = parse_abc(input)?; -//! let (remainder, def_or_ghi) = parse_def_or_ghi(remainder)?; -//! println!("first parsed: {abc}; then parsed: {def_or_ghi};"); -//! -//! # Ok(()) +//! Frequently, you won't care about the tag and you can instead use one of the provided combinators, +//! like [`preceded`]: +//! ```rust +//! # use winnow::bytes::take_while1; +//! # use winnow::Parser; +//! # use winnow::IResult; +//! use winnow::sequence::preceded; +//! +//! # fn parse_prefix(input: &str) -> IResult<&str, &str> { +//! # "0x".parse_next(input) +//! # } +//! # +//! # fn parse_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! # +//! //... +//! +//! fn main() { +//! let input = "0x1a2b Hello"; +//! +//! let (remainder, digits) = preceded( +//! parse_prefix, +//! parse_digits +//! ).parse_next(input).unwrap(); +//! +//! assert_eq!(digits, "1a2b"); +//! assert_eq!(remainder, " Hello"); //! } //! ``` //! -//! Composing tags is such a common requirement that, in fact, Nom has a few built in -//! combinators to do it. The simplest of these is `tuple()`. The `tuple()` combinator takes a tuple of parsers, -//! and either returns `Ok` with a tuple of all of their successful parses, or it -//! returns the `Err` of the first failed parser. +//! ## Alternatives //! -//! ```rust -//! # extern crate nom; -//! use nom::sequence::tuple; -//! ``` +//! Sometimes, we might want to choose between two parsers; and we're happy with +//! either being used. //! +//! The de facto way to do this in winnow is with the [`alt()`] combinator which will execute each +//! parser in a tuple until it finds one that does not error. If all error, then by default you are +//! given the error from the last parser. //! +//! We can see a basic example of `alt()` below. //! ```rust -//! # extern crate nom; -//! use nom::branch::alt; -//! use nom::sequence::tuple; -//! use nom::bytes::complete::tag_no_case; -//! use nom::character::complete::{digit1}; -//! use nom::IResult; -//! # use std::error::Error; -//! -//! fn parse_base(input: &str) -> IResult<&str, &str> { -//! alt(( -//! tag_no_case("a"), -//! tag_no_case("t"), -//! tag_no_case("c"), -//! tag_no_case("g") -//! ))(input) -//! } +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! use winnow::branch::alt; //! -//! fn parse_pair(input: &str) -> IResult<&str, (&str, &str)> { -//! // the many_m_n combinator might also be appropriate here. -//! tuple(( -//! parse_base, -//! parse_base, -//! ))(input) +//! fn parse_digits(input: &str) -> IResult<&str, (&str, &str)> { +//! alt(( +//! ("0b", parse_bin_digits), +//! ("0o", parse_oct_digits), +//! ("0d", parse_dec_digits), +//! ("0x", parse_hex_digits), +//! )).parse_next(input) //! } //! -//! fn main() -> Result<(), Box> { -//! let (remaining, parsed) = parse_pair("aTcG")?; -//! assert_eq!(parsed, ("a", "T")); -//! assert_eq!(remaining, "cG"); -//! -//! assert!(parse_pair("Dct").is_err()); -//! -//! # Ok(()) +//! // ... +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! +//! fn main() { +//! let input = "0x1a2b Hello"; +//! +//! let (remainder, (prefix, digits)) = parse_digits.parse_next(input).unwrap(); +//! +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(prefix, "0x"); +//! assert_eq!(digits, "1a2b"); +//! +//! assert!(parse_digits("ghiWorld").is_err()); //! } //! ``` //! +//! Sometimes a giant if/else-if ladder can be slow and you'd rather have a `match` statement for +//! branches of your parser that have unique prefixes. In this case, you can use the +//! [`dispatch`][crate::branch::dispatch] macro: //! -//! ## Extra Nom Tools -//! -//! After using `alt()` and `tuple()`, you might also be interested in a few other parsers that do similar things: +//! ```rust +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! use winnow::branch::dispatch; +//! use winnow::bytes::take; +//! use winnow::combinator::fail; +//! +//! fn parse_digits(input: &str) -> IResult<&str, &str> { +//! dispatch!(take(2usize); +//! "0b" => parse_bin_digits, +//! "0o" => parse_oct_digits, +//! "0d" => parse_dec_digits, +//! "0x" => parse_hex_digits, +//! _ => fail, +//! ).parse_next(input) +//! } //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [delimited](https://docs.rs/nom/latest/nom/sequence/fn.delimited.html) | `delimited(char('('), take(2), char(')'))` | `"(ab)cd"` | `Ok(("cd", "ab"))` || -//! | [preceded](https://docs.rs/nom/latest/nom/sequence/fn.preceded.html) | `preceded(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "XY"))` || -//! | [terminated](https://docs.rs/nom/latest/nom/sequence/fn.terminated.html) | `terminated(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", "ab"))` || -//! | [pair](https://docs.rs/nom/latest/nom/sequence/fn.pair.html) | `pair(tag("ab"), tag("XY"))` | `"abXYZ"` | `Ok(("Z", ("ab", "XY")))` || -//! | [separated_pair](https://docs.rs/nom/latest/nom/sequence/fn.separated_pair.html) | `separated_pair(tag("hello"), char(','), tag("world"))` | `"hello,world!"` | `Ok(("!", ("hello", "world")))` || +//! // ... +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! +//! fn main() { +//! let input = "0x1a2b Hello"; +//! +//! let (remainder, digits) = parse_digits.parse_next(input).unwrap(); +//! +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(digits, "1a2b"); +//! +//! assert!(parse_digits("ghiWorld").is_err()); +//! } +//! ``` //! //! [*prev*][super::chapter_2] [*next*][super::chapter_4] + +#![allow(unused_imports)] +use crate::branch::alt; +use crate::branch::dispatch; +use crate::sequence::preceded; diff --git a/src/_tutorial/chapter_4.rs b/src/_tutorial/chapter_4.rs index edb7d9bd..00615a9c 100644 --- a/src/_tutorial/chapter_4.rs +++ b/src/_tutorial/chapter_4.rs @@ -1,163 +1,108 @@ //! # Chapter 4: Parsers With Custom Return Types //! //! So far, we have seen mostly functions that take an `&str`, and return a -//! `IResult<&str, &str>`. Splitting strings into smaller strings is certainly useful, -//! but it's not the only thing Nom is capable of! +//! `IResult<&str, &str>`. Splitting strings into smaller strings and characters is certainly +//! useful, but it's not the only thing winnow is capable of! //! //! A useful operation when parsing is to convert between types; for example -//! parsing from `&str` to another primitive, like `bool`. +//! parsing from `&str` to another primitive, like [`usize`]. //! //! All we need to do for our parser to return a different type is to change -//! the second type parameter of `IResult` to the desired return type. -//! For example, to return a bool, return a `IResult<&str, bool>`. -//! +//! the second type parameter of [`IResult`] to the desired return type. +//! For example, to return a `usize`, return a `IResult<&str, usize>`. //! Recall that the first type parameter of the `IResult` is the input //! type, so even if you're returning something different, if your input //! is a `&str`, the first type argument of `IResult` should be also. //! -//! Until you have read the chapter on Errors, we strongly suggest avoiding -//! the use of parsers built into Rust (like `str.parse`); as they require -//! special handling to work well with Nom. -//! -//! That said, one Nom-native way of doing a type conversion is to use the -//! [`value`](https://docs.rs/nom/latest/nom/combinator/fn.value.html) combinator -//! to convert from a successful parse to a particular value. -//! -//! The following code converts from a string containing `"true"` or `"false"`, -//! to the corresponding `bool`. +//! One winnow-native way of doing a type conversion is to use the +//! [`Parser::parse_to`] combinator +//! to convert from a successful parse to a particular type using [`FromStr`]. //! +//! The following code converts from a string containing a number to `usize`: //! ```rust -//! # extern crate nom; -//! # use std::error::Error; -//! use nom::IResult; -//! use nom::bytes::complete::tag; -//! use nom::combinator::value; -//! use nom::branch::alt; -//! -//! fn parse_bool(input: &str) -> IResult<&str, bool> { -//! // either, parse `"true"` -> `true`; `"false"` -> `false`, or error. -//! alt(( -//! value(true, tag("true")), -//! value(false, tag("false")), -//! ))(input) -//! } -//! -//! fn main() -> Result<(), Box> { -//! // Parses the `"true"` out. -//! let (remaining, parsed) = parse_bool("true|false")?; -//! assert_eq!(parsed, true); -//! assert_eq!(remaining, "|false"); -//! -//! // If we forget about the "|", we get an error. -//! let parsing_error = parse_bool(remaining); -//! assert!(parsing_error.is_err()); -//! -//! // Skipping the first byte gives us `false`! -//! let (remaining, parsed) = parse_bool(&remaining[1..])?; -//! assert_eq!(parsed, false); -//! assert_eq!(remaining, ""); -//! -//! -//! -//! # Ok(()) +//! # use winnow::Parser; +//! # use winnow::IResult; +//! # use winnow::character::digit1; +//! # +//! fn parse_digits(input: &str) -> IResult<&str, usize> { +//! digit1 +//! .parse_to() +//! .parse_next(input) //! } -//! ``` -//! -//! ## Nom's in-built parser functions -//! -//! Nom has a wide array of parsers built in. Here is a list of -//! [parsers which recognize specific characters](https://docs.rs/nom/latest/nom/character/complete/index.html). -//! -//! Some of them we have seen before in Chapter 2, but now we also can try out the parsers that return different -//! types, like `i32`. An example of this parser is shown in the next section. //! -//! ## Building a More Complex Example +//! fn main() { +//! let input = "1024 Hello"; //! -//! A more complex example of parsing custom types might be parsing a 2D coordinate. +//! let (remainder, output) = parse_digits.parse_next(input).unwrap(); +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(output, 1024); //! -//! Let us try to figure out how to design this. -//! -//! - We know that we want to take a string, like `"(3, -2)"`, and convert into -//! a `Coordinate` struct. -//! - We can split this into three parts: -//! -//! ```ignore -//! (vvvvvvvvvvvvv) # The outer brackets. -//! vvvv , vvvv # The comma, separating values. -//! 3 -2 # The actual integers. +//! assert!(parse_digits("Z").is_err()); +//! } //! ``` //! -//! - So, we will need three parsers, to deal with this: -//! 1. A parser for integers, which will deal with the raw numbers. -//! 2. A parser for comma seperated pair, which will split it up into integers. -//! 3. A parser for the outer brackets. -//! -//! - We can see below how we achieve this: -//! +//! `Parser::parse_to` is just a convenient form of [`Parser::map_res`] which we can use to handle +//! all radices of numbers: //! ```rust -//! # extern crate nom; -//! # use std::error::Error; -//! use nom::IResult; -//! use nom::bytes::complete::tag; -//! use nom::sequence::{separated_pair, delimited}; -//! -//! // This is the type we will parse into. -//! #[derive(Debug,PartialEq)] -//! pub struct Coordinate { -//! pub x: i32, -//! pub y: i32, -//! } -//! -//! // 1. Nom has an in-built i32 parser. -//! use nom::character::complete::i32; -//! -//! // 2. Use the `separated_pair` parser to combine two parsers (in this case, -//! // both `i32`), ignoring something in-between. -//! fn parse_integer_pair(input: &str) -> IResult<&str, (i32, i32)> { -//! separated_pair( -//! i32, -//! tag(", "), -//! i32 -//! )(input) -//! } -//! -//! // 3. Use the `delimited` parser to apply a parser, ignoring the results -//! // of two surrounding parsers. -//! fn parse_coordinate(input: &str) -> IResult<&str, Coordinate> { -//! let (remaining, (x, y)) = delimited( -//! tag("("), -//! parse_integer_pair, -//! tag(")") -//! )(input)?; -//! -//! // Note: we could construct this by implementing `From` on `Coordinate`, -//! // We don't, just so it's obvious what's happening. -//! Ok((remaining, Coordinate {x, y})) -//! +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! use winnow::branch::dispatch; +//! use winnow::bytes::take; +//! use winnow::combinator::fail; +//! +//! fn parse_digits(input: &str) -> IResult<&str, usize> { +//! dispatch!(take(2usize); +//! "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), +//! "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), +//! "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), +//! "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! _ => fail, +//! ).parse_next(input) //! } //! -//! fn main() -> Result<(), Box> { -//! let (_, parsed) = parse_coordinate("(3, 5)")?; -//! assert_eq!(parsed, Coordinate {x: 3, y: 5}); -//! -//! let (_, parsed) = parse_coordinate("(2, -4)")?; -//! assert_eq!(parsed, Coordinate {x: 2, y: -4}); -//! -//! let parsing_error = parse_coordinate("(3,)"); -//! assert!(parsing_error.is_err()); -//! -//! let parsing_error = parse_coordinate("(,3)"); -//! assert!(parsing_error.is_err()); -//! -//! let parsing_error = parse_coordinate("Ferris"); -//! assert!(parsing_error.is_err()); -//! -//! -//! # Ok(()) +//! // ... +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! +//! fn main() { +//! let input = "0x1a2b Hello"; +//! +//! let (remainder, digits) = parse_digits.parse_next(input).unwrap(); +//! +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(digits, 0x1a2b); +//! +//! assert!(parse_digits("ghiWorld").is_err()); //! } //! ``` //! -//! As an exercise, you might want to explore how to make this parser deal gracefully with -//! whitespace in the input. -//! //! [*prev*][super::chapter_3] [*next*][super::chapter_5] + +#![allow(unused_imports)] +use crate::IResult; +use crate::Parser; +use std::str::FromStr; diff --git a/src/_tutorial/chapter_5.rs b/src/_tutorial/chapter_5.rs index 037db415..7a45f9d1 100644 --- a/src/_tutorial/chapter_5.rs +++ b/src/_tutorial/chapter_5.rs @@ -31,7 +31,7 @@ //! at least `m` bytes, and no more than `n` bytes. //! //! -//! ```rust +//! ```rust,ignore //! # extern crate nom; //! # use std::error::Error; //! use nom::IResult; diff --git a/src/_tutorial/chapter_6.rs b/src/_tutorial/chapter_6.rs index 0a1d827f..6e5f614b 100644 --- a/src/_tutorial/chapter_6.rs +++ b/src/_tutorial/chapter_6.rs @@ -1,40 +1,213 @@ -//! # Chapter 6: Repeating Parsers +//! # Chapter 6: Repetition //! -//! A single parser which repeats a predicate is useful, but more useful still is a combinator that -//! repeats a parser. Nom has multiple combinators which operate on this principle; the most obvious of -//! which is `many0`, which applies a parser as many times as possible; and returns a vector of -//! the results of those parses. Here is an example: +//! In [`chapter_3`], we covered how to sequence different parsers into a tuple but sometimes you need to run a +//! single parser many times into a [`Vec`]. //! +//! Let's take our `parse_digits` and collect a list of them with [`many0`]: //! ```rust -//! # extern crate nom; -//! # use std::error::Error; -//! use nom::IResult; -//! use nom::multi::many0; -//! use nom::bytes::complete::tag; -//! -//! fn parser(s: &str) -> IResult<&str, Vec<&str>> { -//! many0(tag("abc"))(s) +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! # use winnow::branch::dispatch; +//! # use winnow::bytes::take; +//! # use winnow::combinator::fail; +//! use winnow::combinator::opt; +//! use winnow::multi::many0; +//! use winnow::sequence::terminated; +//! +//! fn parse_list(input: &str) -> IResult<&str, Vec> { +//! many0(terminated(parse_digits, opt(','))).parse_next(input) +//! } +//! +//! // ... +//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # dispatch!(take(2usize); +//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) +//! # } +//! # +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! +//! fn main() { +//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! +//! let (remainder, digits) = parse_list.parse_next(input).unwrap(); +//! +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(digits, vec![0x1a2b, 0x3c4d, 0x5e6f]); +//! +//! assert!(parse_digits("ghiWorld").is_err()); //! } +//! ``` +//! +//! You'll notice that the above allows trailing `,` when we intended to not support that. We can +//! easily fix this by using [`separated0`]: +//! ```rust +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! # use winnow::branch::dispatch; +//! # use winnow::bytes::take; +//! # use winnow::combinator::fail; +//! use winnow::multi::separated0; +//! +//! fn parse_list(input: &str) -> IResult<&str, Vec> { +//! separated0(parse_digits, ",").parse_next(input) +//! } +//! +//! // ... +//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # dispatch!(take(2usize); +//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) +//! # } +//! # +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } //! //! fn main() { -//! assert_eq!(parser("abcabc"), Ok(("", vec!["abc", "abc"]))); -//! assert_eq!(parser("abc123"), Ok(("123", vec!["abc"]))); -//! assert_eq!(parser("123123"), Ok(("123123", vec![]))); -//! assert_eq!(parser(""), Ok(("", vec![]))); +//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! +//! let (remainder, digits) = parse_list.parse_next(input).unwrap(); +//! +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(digits, vec![0x1a2b, 0x3c4d, 0x5e6f]); +//! +//! assert!(parse_digits("ghiWorld").is_err()); //! } //! ``` //! -//! There are many different parsers to choose from: +//! If you look closely at [`many0`], it isn't collecting directly into a [`Vec`] but +//! [`Accumulate`] to gather the results. This let's us make more complex parsers than we did in +//! [`chapter_2`] by accumulating the results into a `()` and [`recognize`][Parser::recognize]-ing the captured input: +//! ```rust +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! # use winnow::branch::dispatch; +//! # use winnow::bytes::take; +//! # use winnow::combinator::fail; +//! # use winnow::multi::separated0; +//! # +//! fn recognize_list(input: &str) -> IResult<&str, &str> { +//! parse_list.recognize().parse_next(input) +//! } +//! +//! fn parse_list(input: &str) -> IResult<&str, ()> { +//! separated0(parse_digits, ",").parse_next(input) +//! } +//! +//! // ... +//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # dispatch!(take(2usize); +//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) +//! # } +//! # +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! +//! fn main() { +//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! +//! let (remainder, digits) = recognize_list.parse_next(input).unwrap(); //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [count](https://docs.rs/nom/latest/nom/multi/fn.count.html) | `count(take(2), 3)` | `"abcdefgh"` | `Ok(("gh", vec!["ab", "cd", "ef"]))` |Applies the child parser a specified number of times| -//! | [many0](https://docs.rs/nom/latest/nom/multi/fn.many0.html) | `many0(tag("ab"))` | `"abababc"` | `Ok(("c", vec!["ab", "ab", "ab"]))` |Applies the parser 0 or more times and returns the list of results in a Vec. `many1` does the same operation but must return at least one element| -//! | [many_m_n](https://docs.rs/nom/latest/nom/multi/fn.many_m_n.html) | `many_m_n(1, 3, tag("ab"))` | `"ababc"` | `Ok(("c", vec!["ab", "ab"]))` |Applies the parser between m and n times (n included) and returns the list of results in a Vec| -//! | [many_till](https://docs.rs/nom/latest/nom/multi/fn.many_till.html) | `many_till(tag( "ab" ), tag( "ef" ))` | `"ababefg"` | `Ok(("g", (vec!["ab", "ab"], "ef")))` |Applies the first parser until the second applies. Returns a tuple containing the list of results from the first in a Vec and the result of the second| -//! | [separated_list0](https://docs.rs/nom/latest/nom/multi/fn.separated_list0.html) | `separated_list0(tag(","), tag("ab"))` | `"ab,ab,ab."` | `Ok((".", vec!["ab", "ab", "ab"]))` |`separated_list1` works like `separated_list0` but must returns at least one element| -//! | [fold_many0](https://docs.rs/nom/latest/nom/multi/fn.fold_many0.html) | `fold_many0(be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([], 6))` |Applies the parser 0 or more times and folds the list of return values. The `fold_many1` version must apply the child parser at least one time| -//! | [fold_many_m_n](https://docs.rs/nom/latest/nom/multi/fn.fold_many_m_n.html) | `fold_many_m_n(1, 2, be_u8, \|\| 0, \|acc, item\| acc + item)` | `[1, 2, 3]` | `Ok(([3], 3))` |Applies the parser between m and n times (n included) and folds the list of return value| -//! | [length_count](https://docs.rs/nom/latest/nom/multi/fn.length_count.html) | `length_count(number, tag("ab"))` | `"2ababab"` | `Ok(("ab", vec!["ab", "ab"]))` |Gets a number from the first parser, then applies the second parser that many times| +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(digits, "0x1a2b,0x3c4d,0x5e6f"); +//! +//! assert!(parse_digits("ghiWorld").is_err()); +//! } +//! ``` //! //! [*prev*][super::chapter_5] [*next*][super::chapter_7] + +#![allow(unused_imports)] +use super::chapter_2; +use super::chapter_3; +use crate::multi::many0; +use crate::multi::separated0; +use crate::stream::Accumulate; +use crate::Parser; +use std::vec::Vec; diff --git a/src/_tutorial/chapter_7.rs b/src/_tutorial/chapter_7.rs index 9bf6fa6e..752bbd72 100644 --- a/src/_tutorial/chapter_7.rs +++ b/src/_tutorial/chapter_7.rs @@ -1,11 +1,158 @@ -//! # Chapter 7: Using Errors from Outside Nom +//! # Chapter 7: Error Reporting //! -//! [Nom has other documentation about errors, so in place of this chapter, read this page.](https://github.com/Geal/nom/blob/main/doc/error_management.md) +//! ## `Error` //! -//! ## Particular Notes +//! Back in [`chapter_1`], we glossed over the `Err` side of [`IResult`]. `IResult` is +//! actually short for `IResult` where [`Error`] is a cheap, universal error type +//! for getting started. When humans are producing the file, like with `toml`, you might want to +//! sacrifice some performance for providing more details on how to resolve the problem //! -//! - It's particularly useful to use the `map_res` function. It allows you to -//! convert an external error to a Nom error. For an example, -//! see [the Nom example on the front page](https://github.com/Geal/nom#example). +//! winnow includes [`VerboseError`] for this but you can [customize the error as you +//! wish][_topic::error]. You can use [`Parser::context`] to annotate the error with custom types +//! while unwinding to further improve the error quality. //! -//! [*prev*][super::chapter_5] +//! ```rust +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! # use winnow::branch::alt; +//! use winnow::error::VerboseError; +//! +//! fn parse_digits(input: &str) -> IResult<&str, (&str, &str), VerboseError<&str>> { +//! alt(( +//! ("0b", parse_bin_digits).context("binary"), +//! ("0o", parse_oct_digits).context("octal"), +//! ("0d", parse_dec_digits).context("decimal"), +//! ("0x", parse_hex_digits).context("hexadecimal"), +//! )).parse_next(input) +//! } +//! +//! // ... +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! +//! fn main() { +//! let input = "0x1a2b Hello"; +//! +//! let (remainder, (prefix, digits)) = parse_digits.parse_next(input).unwrap(); +//! +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(prefix, "0x"); +//! assert_eq!(digits, "1a2b"); +//! } +//! ``` +//! +//! At first glance, this looks correct but what `context` will be reported when parsing `"0b5"`? +//! If you remember back to [`chapter_3`], [`alt`] will only report the last error by default which +//! means when parsing `"0b5"`, the `context` will be `"hexadecimal"`. +//! +//! ## `ErrMode` +//! +//! Let's break down `IResult` one step further: +//! ```rust +//! # use winnow::error::Error; +//! # use winnow::error::ErrMode; +//! pub type IResult> = Result<(I, O), ErrMode>; +//! ``` +//! `IResult` is just a fancy wrapper around `Result` that wraps our error in an [`ErrMode`] +//! type. +//! +//! `ErrMode` is an enum with `Backtrack` and `Cut` variants (ignore `Incomplete` as its only +//! relevant for [streaming][_topic::stream]. By default, errors are `Backtrack`, meaning that +//! other parsing branches will be attempted on failure, like the next case of an `alt`. `Cut` +//! shortcircuits all other branches, immediately reporting the error. +//! +//! So we can get the correct `context` by modifying the above example with [`cut_err`]: +//! ```rust +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! # use winnow::branch::alt; +//! # use winnow::error::VerboseError; +//! use winnow::combinator::cut_err; +//! +//! fn parse_digits(input: &str) -> IResult<&str, (&str, &str), VerboseError<&str>> { +//! alt(( +//! ("0b", cut_err(parse_bin_digits)).context("binary"), +//! ("0o", cut_err(parse_oct_digits)).context("octal"), +//! ("0d", cut_err(parse_dec_digits)).context("decimal"), +//! ("0x", cut_err(parse_hex_digits)).context("hexadecimal"), +//! )).parse_next(input) +//! } +//! +//! // ... +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! +//! fn main() { +//! let input = "0x1a2b Hello"; +//! +//! let (remainder, (prefix, digits)) = parse_digits.parse_next(input).unwrap(); +//! +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(prefix, "0x"); +//! assert_eq!(digits, "1a2b"); +//! } +//! ``` +//! Now, when parsing `"0b5"`, the `context` will be `"binary"`. +//! +//! [*prev*][super::chapter_6] [*next*][super::chapter_8] + +#![allow(unused_imports)] +use super::chapter_1; +use super::chapter_3; +use crate::branch::alt; +use crate::combinator::cut_err; +use crate::error::ErrMode; +use crate::error::Error; +use crate::error::VerboseError; +use crate::FinishIResult; +use crate::IResult; +use crate::Parser; +use crate::_topic; diff --git a/src/_tutorial/chapter_8.rs b/src/_tutorial/chapter_8.rs new file mode 100644 index 00000000..cdfae35c --- /dev/null +++ b/src/_tutorial/chapter_8.rs @@ -0,0 +1,115 @@ +//! # Chapter 8: Integrating the Parser +//! +//! So far, we've highlighted how to incrementally parse, but how do we bring this all together +//! into our application? +//! +//! The type we've been working with looks like: +//! ```rust +//! # use winnow::error::VerboseError; +//! # use winnow::error::ErrMode; +//! type IResult<'i, O> = Result< +//! (&'i str, O), +//! ErrMode< +//! VerboseError<&'i str> +//! > +//! >; +//! ``` +//! 1. We have to decide what to do about the `remainder` of the input. +//! 2. The error type is not compatible with the rest of the Rust ecosystem +//! +//! Normally, Rust applications want errors that are `std::error::Error + Send + Sync + 'static` +//! meaning: +//! - They implement the [`std::error::Error`] trait +//! - They can be sent across threads +//! - They are safe to be referenced across threads +//! - They do not borrow +//! +//! winnow provides some helpers for this like [`FinishIResult`]: +//! ```rust +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! # use winnow::branch::dispatch; +//! # use winnow::bytes::take; +//! # use winnow::combinator::fail; +//! use winnow::FinishIResult; +//! use winnow::error::Error; +//! +//! #[derive(Debug, PartialEq, Eq)] +//! pub struct Hex(usize); +//! +//! impl std::str::FromStr for Hex { +//! type Err = Error; +//! +//! fn from_str(input: &str) -> Result { +//! parse_digits +//! .map(Hex) +//! .parse_next(input) +//! .finish() +//! .map_err(|e| e.into_owned()) +//! } +//! } +//! +//! // ... +//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # dispatch!(take(2usize); +//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) +//! # } +//! # +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! +//! fn main() { +//! let input = "0x1a2b"; +//! assert_eq!(input.parse::().unwrap(), Hex(0x1a2b)); +//! +//! let input = "0x1a2b Hello"; +//! assert!(input.parse::().is_err()); +//! let input = "ghiHello"; +//! assert!(input.parse::().is_err()); +//! } +//! ``` +//! [`FinishIResult::finish`]: +//! - Ensures we hit [`eof`] +//! - Removes the [`ErrMode`] wrapper +//! +//! [`Error::into_owned`]: +//! - Converts the `&str` in `Error` to `String` which enables support for [`std::error::Error`] +//! +//! [*prev*][super::chapter_7] + +#![allow(unused_imports)] +use super::chapter_1; +use crate::combinator::eof; +use crate::error::ErrMode; +use crate::error::Error; +use crate::FinishIResult; +use crate::IResult; diff --git a/src/_tutorial/mod.rs b/src/_tutorial/mod.rs index c35f3f26..f8674156 100644 --- a/src/_tutorial/mod.rs +++ b/src/_tutorial/mod.rs @@ -1,32 +1,36 @@ -//! # The Nominomiconsa +//! # Tutorial //! -//! Welcome to Nominomicon; a guide to using the Nom parser for great good. -//! This guide will give you an introduction to the theory and practice of -//! using Nom. +//! This tutorial assumes that you are: +//! - Already familiar with Rust +//! - Using `winnow` for the first time //! -//! This guide assumes only that you are: -//! - Wanting to learn Nom, -//! - Already familiar with Rust. +//! The focus will be on parsing in-memory strings (`&str`). Once done, you might want to check the +//! [Special Topics][_topic] for more specialized topics or examples. //! -//! Nom is a parser-combinator library. In other words, it gives you tools to define: -//! - "parsers" (a function that takes an input, and gives back an output), and -//! - "combinators" (functions that take parsers, and _combine_ them together!). +//! ## About //! -//! By combining parsers with combinators, you can build complex parsers up from -//! simpler ones. These complex parsers are enough to understand HTML, mkv or Python! +//! `winnow` is a parser-combinator library. In other words, it gives you tools to define: +//! - "parsers", or functions that takes an input and gives back an output +//! - "combinators", or functions that take parsers and _combine_ them together! //! -//! Before we set off, it's important to list some caveats: -//! - This guide is for Nom7. Nom has undergone significant changes, so if -//! you are searching for documentation or StackOverflow answers, you may -//! find older documentation. Some common indicators that it is an old version are: -//! - Documentation older than 21st August, 2021 -//! - Use of the `named!` macro -//! - Use of `CompleteStr` or `CompleteByteArray`. -//! - Nom can parse (almost) anything; but this guide will focus almost entirely on parsing -//! complete `&str` into things. +//! While "combinator" might be an unfamiliar word, you are likely using them in your rust code +//! today, like with the [`Iterator`] trait: +//! ```rust +//! let data = vec![1, 2, 3, 4, 5]; +//! let even_count = data.iter() +//! .copied() // combinator +//! .filter(|d| d % 2 == 0) // combinator +//! .count(); // combinator +//! ``` +//! +//! Parser combinators allow building parsers for complex formats from simple, reusable parsers. //! //! [*next*][chapter_1] +#![allow(unused_imports)] +use crate::_topic; +use std::iter::Iterator; + pub mod chapter_1; pub mod chapter_2; pub mod chapter_3; @@ -34,3 +38,4 @@ pub mod chapter_4; pub mod chapter_5; pub mod chapter_6; pub mod chapter_7; +pub mod chapter_8; From 6bf4c6d97222ea31c10a8bbf96d2428515bd65c9 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 20 Feb 2023 15:38:47 -0600 Subject: [PATCH 09/11] docs(tutorial): Remove old chapter 5 --- src/_tutorial/chapter_5.rs | 263 +++++++++++++++++++++++++++++-------- src/_tutorial/chapter_6.rs | 205 +++++++++++------------------ src/_tutorial/chapter_7.rs | 177 ++++++++++--------------- src/_tutorial/chapter_8.rs | 115 ---------------- src/_tutorial/mod.rs | 1 - 5 files changed, 347 insertions(+), 414 deletions(-) delete mode 100644 src/_tutorial/chapter_8.rs diff --git a/src/_tutorial/chapter_5.rs b/src/_tutorial/chapter_5.rs index 7a45f9d1..ac619450 100644 --- a/src/_tutorial/chapter_5.rs +++ b/src/_tutorial/chapter_5.rs @@ -1,66 +1,213 @@ -//! # Chapter 5: Repeating with Predicates -//! -//! Just as, when programming, the humble while loop unlocks many useful -//! features; in Nom, repeating a parser multiple times can be incredibly useful -//! -//! There are, however, two ways of including repeating functionality into Nom -- -//! parsers which are governed by a predicate; and combinators which repeat -//! a parser. -//! -//! ## Parsers which use a predicate -//! -//! A `predicate` is a function which returns a boolean value (i.e. given some input, -//! it returns `true` or `false`). These are incredibly common when parsing -- for instance, -//! a predicate `is_vowel` might decide whether a character is an english vowel (a, e, i, o or u). -//! -//! These can be used to make parsers that Nom hasn't built in. For instance, the below -//! parser will take as many vowels as possible. -//! -//! There are a few different categories of predicate parsers that are worth mentioning: -//! -//! - For bytes, there are three different categories of parser: `take_till`, `take_until`, and `take_while`. -//! `take_till` will continue consuming input until its input meets the predicate. -//! `take_while` will continue consuming input until its input *does not* meet the predicate. -//! `take_until` looks a lot like a predicate parser, but simply consumes until the first -//! occurence of the pattern of bytes. -//! - Some parsers have a "twin" with a `1` at the end of their name -- for example, `take_while` -//! has `take_while1`. The difference between them is that `take_while` could return an empty -//! slice if the first byte does not satisfy a predicate. `take_while1` returns an error if -//! the predicate is not met. -//! - As a special case, `take_while_m_n` is like `take_while`, but guarantees that it will consume -//! at least `m` bytes, and no more than `n` bytes. -//! -//! -//! ```rust,ignore -//! # extern crate nom; -//! # use std::error::Error; -//! use nom::IResult; -//! use nom::bytes::complete::{tag, take_until, take_while}; -//! use nom::character::{is_space}; -//! use nom::sequence::{terminated}; -//! -//! fn parse_sentence(input: &str) -> IResult<&str, &str> { -//! terminated(take_until("."), take_while(|c| c == '.' || c == ' '))(input) +//! # Chapter 6: Repetition +//! +//! In [`chapter_3`], we covered how to sequence different parsers into a tuple but sometimes you need to run a +//! single parser many times into a [`Vec`]. +//! +//! Let's take our `parse_digits` and collect a list of them with [`many0`]: +//! ```rust +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! # use winnow::branch::dispatch; +//! # use winnow::bytes::take; +//! # use winnow::combinator::fail; +//! use winnow::combinator::opt; +//! use winnow::multi::many0; +//! use winnow::sequence::terminated; +//! +//! fn parse_list(input: &str) -> IResult<&str, Vec> { +//! many0(terminated(parse_digits, opt(','))).parse_next(input) +//! } +//! +//! // ... +//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # dispatch!(take(2usize); +//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) +//! # } +//! # +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! +//! fn main() { +//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! +//! let (remainder, digits) = parse_list.parse_next(input).unwrap(); +//! +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(digits, vec![0x1a2b, 0x3c4d, 0x5e6f]); +//! +//! assert!(parse_digits("ghiWorld").is_err()); +//! } +//! ``` +//! +//! You'll notice that the above allows trailing `,` when we intended to not support that. We can +//! easily fix this by using [`separated0`]: +//! ```rust +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! # use winnow::branch::dispatch; +//! # use winnow::bytes::take; +//! # use winnow::combinator::fail; +//! use winnow::multi::separated0; +//! +//! fn parse_list(input: &str) -> IResult<&str, Vec> { +//! separated0(parse_digits, ",").parse_next(input) //! } //! -//! fn main() -> Result<(), Box> { -//! let (remaining, parsed) = parse_sentence("I am Tom. I write Rust.")?; -//! assert_eq!(parsed, "I am Tom"); -//! assert_eq!(remaining, "I write Rust."); -//! -//! let parsing_error = parse_sentence("Not a sentence (no period at the end)"); -//! assert!(parsing_error.is_err()); -//! +//! // ... +//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # dispatch!(take(2usize); +//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) +//! # } +//! # +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } //! -//! # Ok(()) +//! fn main() { +//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! +//! let (remainder, digits) = parse_list.parse_next(input).unwrap(); +//! +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(digits, vec![0x1a2b, 0x3c4d, 0x5e6f]); +//! +//! assert!(parse_digits("ghiWorld").is_err()); //! } //! ``` -//! For detailed examples, see their documentation, shown below: //! -//! | combinator | usage | input | output | comment | -//! |---|---|---|---|---| -//! | [take_while](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_while.html) | `take_while(is_alphabetic)` | `"abc123"` | `Ok(("123", "abc"))` |Returns the longest list of bytes for which the provided function returns true. `take_while1` does the same, but must return at least one character. `take_while_m_n` does the same, but must return between `m` and `n` characters.| -//! | [take_till](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_till.html) | `take_till(is_alphabetic)` | `"123abc"` | `Ok(("abc", "123"))` |Returns the longest list of bytes or characters until the provided function returns true. `take_till1` does the same, but must return at least one character. This is the reverse behaviour from `take_while`: `take_till(f)` is equivalent to `take_while(\|c\| !f(c))`| -//! | [take_until](https://docs.rs/nom/latest/nom/bytes/complete/fn.take_until.html) | `take_until("world")` | `"Hello world"` | `Ok(("world", "Hello "))` |Returns the longest list of bytes or characters until the provided tag is found. `take_until1` does the same, but must return at least one character| +//! If you look closely at [`many0`], it isn't collecting directly into a [`Vec`] but +//! [`Accumulate`] to gather the results. This let's us make more complex parsers than we did in +//! [`chapter_2`] by accumulating the results into a `()` and [`recognize`][Parser::recognize]-ing the captured input: +//! ```rust +//! # use winnow::IResult; +//! # use winnow::Parser; +//! # use winnow::bytes::take_while1; +//! # use winnow::branch::dispatch; +//! # use winnow::bytes::take; +//! # use winnow::combinator::fail; +//! # use winnow::multi::separated0; +//! # +//! fn recognize_list(input: &str) -> IResult<&str, &str> { +//! parse_list.recognize().parse_next(input) +//! } +//! +//! fn parse_list(input: &str) -> IResult<&str, ()> { +//! separated0(parse_digits, ",").parse_next(input) +//! } +//! +//! // ... +//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # dispatch!(take(2usize); +//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) +//! # } +//! # +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='7'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # )).parse_next(input) +//! # } +//! # +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # take_while1(( +//! # ('0'..='9'), +//! # ('A'..='F'), +//! # ('a'..='f'), +//! # )).parse_next(input) +//! # } +//! +//! fn main() { +//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! +//! let (remainder, digits) = recognize_list.parse_next(input).unwrap(); +//! +//! assert_eq!(remainder, " Hello"); +//! assert_eq!(digits, "0x1a2b,0x3c4d,0x5e6f"); +//! +//! assert!(parse_digits("ghiWorld").is_err()); +//! } +//! ``` //! //! [*prev*][super::chapter_4] [*next*][super::chapter_6] + +#![allow(unused_imports)] +use super::chapter_2; +use super::chapter_3; +use crate::multi::many0; +use crate::multi::separated0; +use crate::stream::Accumulate; +use crate::Parser; +use std::vec::Vec; diff --git a/src/_tutorial/chapter_6.rs b/src/_tutorial/chapter_6.rs index 6e5f614b..59686299 100644 --- a/src/_tutorial/chapter_6.rs +++ b/src/_tutorial/chapter_6.rs @@ -1,54 +1,52 @@ -//! # Chapter 6: Repetition +//! # Chapter 7: Error Reporting //! -//! In [`chapter_3`], we covered how to sequence different parsers into a tuple but sometimes you need to run a -//! single parser many times into a [`Vec`]. +//! ## `Error` +//! +//! Back in [`chapter_1`], we glossed over the `Err` side of [`IResult`]. `IResult` is +//! actually short for `IResult` where [`Error`] is a cheap, universal error type +//! for getting started. When humans are producing the file, like with `toml`, you might want to +//! sacrifice some performance for providing more details on how to resolve the problem +//! +//! winnow includes [`VerboseError`] for this but you can [customize the error as you +//! wish][_topic::error]. You can use [`Parser::context`] to annotate the error with custom types +//! while unwinding to further improve the error quality. //! -//! Let's take our `parse_digits` and collect a list of them with [`many0`]: //! ```rust //! # use winnow::IResult; //! # use winnow::Parser; //! # use winnow::bytes::take_while1; -//! # use winnow::branch::dispatch; -//! # use winnow::bytes::take; -//! # use winnow::combinator::fail; -//! use winnow::combinator::opt; -//! use winnow::multi::many0; -//! use winnow::sequence::terminated; -//! -//! fn parse_list(input: &str) -> IResult<&str, Vec> { -//! many0(terminated(parse_digits, opt(','))).parse_next(input) +//! # use winnow::branch::alt; +//! use winnow::error::VerboseError; +//! +//! fn parse_digits(input: &str) -> IResult<&str, (&str, &str), VerboseError<&str>> { +//! alt(( +//! ("0b", parse_bin_digits).context("binary"), +//! ("0o", parse_oct_digits).context("octal"), +//! ("0d", parse_dec_digits).context("decimal"), +//! ("0x", parse_hex_digits).context("hexadecimal"), +//! )).parse_next(input) //! } //! //! // ... -//! # fn parse_digits(input: &str) -> IResult<&str, usize> { -//! # dispatch!(take(2usize); -//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), -//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), -//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), -//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), -//! # _ => fail, -//! # ).parse_next(input) -//! # } -//! # -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { //! # take_while1(( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { //! # take_while1(( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { //! # take_while1(( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { //! # take_while1(( //! # ('0'..='9'), //! # ('A'..='F'), @@ -57,131 +55,74 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! let input = "0x1a2b Hello"; //! -//! let (remainder, digits) = parse_list.parse_next(input).unwrap(); +//! let (remainder, (prefix, digits)) = parse_digits.parse_next(input).unwrap(); //! //! assert_eq!(remainder, " Hello"); -//! assert_eq!(digits, vec![0x1a2b, 0x3c4d, 0x5e6f]); -//! -//! assert!(parse_digits("ghiWorld").is_err()); +//! assert_eq!(prefix, "0x"); +//! assert_eq!(digits, "1a2b"); //! } //! ``` //! -//! You'll notice that the above allows trailing `,` when we intended to not support that. We can -//! easily fix this by using [`separated0`]: -//! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! # use winnow::branch::dispatch; -//! # use winnow::bytes::take; -//! # use winnow::combinator::fail; -//! use winnow::multi::separated0; +//! At first glance, this looks correct but what `context` will be reported when parsing `"0b5"`? +//! If you remember back to [`chapter_3`], [`alt`] will only report the last error by default which +//! means when parsing `"0b5"`, the `context` will be `"hexadecimal"`. //! -//! fn parse_list(input: &str) -> IResult<&str, Vec> { -//! separated0(parse_digits, ",").parse_next(input) -//! } +//! ## `ErrMode` //! -//! // ... -//! # fn parse_digits(input: &str) -> IResult<&str, usize> { -//! # dispatch!(take(2usize); -//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), -//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), -//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), -//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), -//! # _ => fail, -//! # ).parse_next(input) -//! # } -//! # -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( -//! # ('0'..='7'), -//! # )).parse_next(input) -//! # } -//! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( -//! # ('0'..='7'), -//! # )).parse_next(input) -//! # } -//! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( -//! # ('0'..='9'), -//! # )).parse_next(input) -//! # } -//! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( -//! # ('0'..='9'), -//! # ('A'..='F'), -//! # ('a'..='f'), -//! # )).parse_next(input) -//! # } -//! -//! fn main() { -//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; -//! -//! let (remainder, digits) = parse_list.parse_next(input).unwrap(); -//! -//! assert_eq!(remainder, " Hello"); -//! assert_eq!(digits, vec![0x1a2b, 0x3c4d, 0x5e6f]); -//! -//! assert!(parse_digits("ghiWorld").is_err()); -//! } +//! Let's break down `IResult` one step further: +//! ```rust +//! # use winnow::error::Error; +//! # use winnow::error::ErrMode; +//! pub type IResult> = Result<(I, O), ErrMode>; //! ``` +//! `IResult` is just a fancy wrapper around `Result` that wraps our error in an [`ErrMode`] +//! type. +//! +//! `ErrMode` is an enum with `Backtrack` and `Cut` variants (ignore `Incomplete` as its only +//! relevant for [streaming][_topic::stream]. By default, errors are `Backtrack`, meaning that +//! other parsing branches will be attempted on failure, like the next case of an `alt`. `Cut` +//! shortcircuits all other branches, immediately reporting the error. //! -//! If you look closely at [`many0`], it isn't collecting directly into a [`Vec`] but -//! [`Accumulate`] to gather the results. This let's us make more complex parsers than we did in -//! [`chapter_2`] by accumulating the results into a `()` and [`recognize`][Parser::recognize]-ing the captured input: +//! So we can get the correct `context` by modifying the above example with [`cut_err`]: //! ```rust //! # use winnow::IResult; //! # use winnow::Parser; //! # use winnow::bytes::take_while1; -//! # use winnow::branch::dispatch; -//! # use winnow::bytes::take; -//! # use winnow::combinator::fail; -//! # use winnow::multi::separated0; -//! # -//! fn recognize_list(input: &str) -> IResult<&str, &str> { -//! parse_list.recognize().parse_next(input) -//! } -//! -//! fn parse_list(input: &str) -> IResult<&str, ()> { -//! separated0(parse_digits, ",").parse_next(input) +//! # use winnow::branch::alt; +//! # use winnow::error::VerboseError; +//! use winnow::combinator::cut_err; +//! +//! fn parse_digits(input: &str) -> IResult<&str, (&str, &str), VerboseError<&str>> { +//! alt(( +//! ("0b", cut_err(parse_bin_digits)).context("binary"), +//! ("0o", cut_err(parse_oct_digits)).context("octal"), +//! ("0d", cut_err(parse_dec_digits)).context("decimal"), +//! ("0x", cut_err(parse_hex_digits)).context("hexadecimal"), +//! )).parse_next(input) //! } //! //! // ... -//! # fn parse_digits(input: &str) -> IResult<&str, usize> { -//! # dispatch!(take(2usize); -//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), -//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), -//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), -//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), -//! # _ => fail, -//! # ).parse_next(input) -//! # } -//! # -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { //! # take_while1(( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { //! # take_while1(( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { //! # take_while1(( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { //! # take_while1(( //! # ('0'..='9'), //! # ('A'..='F'), @@ -190,24 +131,28 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b,0x3c4d,0x5e6f Hello"; +//! let input = "0x1a2b Hello"; //! -//! let (remainder, digits) = recognize_list.parse_next(input).unwrap(); +//! let (remainder, (prefix, digits)) = parse_digits.parse_next(input).unwrap(); //! //! assert_eq!(remainder, " Hello"); -//! assert_eq!(digits, "0x1a2b,0x3c4d,0x5e6f"); -//! -//! assert!(parse_digits("ghiWorld").is_err()); +//! assert_eq!(prefix, "0x"); +//! assert_eq!(digits, "1a2b"); //! } //! ``` +//! Now, when parsing `"0b5"`, the `context` will be `"binary"`. //! //! [*prev*][super::chapter_5] [*next*][super::chapter_7] #![allow(unused_imports)] -use super::chapter_2; +use super::chapter_1; use super::chapter_3; -use crate::multi::many0; -use crate::multi::separated0; -use crate::stream::Accumulate; +use crate::branch::alt; +use crate::combinator::cut_err; +use crate::error::ErrMode; +use crate::error::Error; +use crate::error::VerboseError; +use crate::FinishIResult; +use crate::IResult; use crate::Parser; -use std::vec::Vec; +use crate::_topic; diff --git a/src/_tutorial/chapter_7.rs b/src/_tutorial/chapter_7.rs index 752bbd72..e37d9661 100644 --- a/src/_tutorial/chapter_7.rs +++ b/src/_tutorial/chapter_7.rs @@ -1,128 +1,85 @@ -//! # Chapter 7: Error Reporting +//! # Chapter 8: Integrating the Parser //! -//! ## `Error` +//! So far, we've highlighted how to incrementally parse, but how do we bring this all together +//! into our application? //! -//! Back in [`chapter_1`], we glossed over the `Err` side of [`IResult`]. `IResult` is -//! actually short for `IResult` where [`Error`] is a cheap, universal error type -//! for getting started. When humans are producing the file, like with `toml`, you might want to -//! sacrifice some performance for providing more details on how to resolve the problem -//! -//! winnow includes [`VerboseError`] for this but you can [customize the error as you -//! wish][_topic::error]. You can use [`Parser::context`] to annotate the error with custom types -//! while unwinding to further improve the error quality. -//! -//! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! # use winnow::branch::alt; -//! use winnow::error::VerboseError; -//! -//! fn parse_digits(input: &str) -> IResult<&str, (&str, &str), VerboseError<&str>> { -//! alt(( -//! ("0b", parse_bin_digits).context("binary"), -//! ("0o", parse_oct_digits).context("octal"), -//! ("0d", parse_dec_digits).context("decimal"), -//! ("0x", parse_hex_digits).context("hexadecimal"), -//! )).parse_next(input) -//! } -//! -//! // ... -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( -//! # ('0'..='7'), -//! # )).parse_next(input) -//! # } -//! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( -//! # ('0'..='7'), -//! # )).parse_next(input) -//! # } -//! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( -//! # ('0'..='9'), -//! # )).parse_next(input) -//! # } -//! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { -//! # take_while1(( -//! # ('0'..='9'), -//! # ('A'..='F'), -//! # ('a'..='f'), -//! # )).parse_next(input) -//! # } -//! -//! fn main() { -//! let input = "0x1a2b Hello"; -//! -//! let (remainder, (prefix, digits)) = parse_digits.parse_next(input).unwrap(); -//! -//! assert_eq!(remainder, " Hello"); -//! assert_eq!(prefix, "0x"); -//! assert_eq!(digits, "1a2b"); -//! } -//! ``` -//! -//! At first glance, this looks correct but what `context` will be reported when parsing `"0b5"`? -//! If you remember back to [`chapter_3`], [`alt`] will only report the last error by default which -//! means when parsing `"0b5"`, the `context` will be `"hexadecimal"`. -//! -//! ## `ErrMode` -//! -//! Let's break down `IResult` one step further: +//! The type we've been working with looks like: //! ```rust -//! # use winnow::error::Error; +//! # use winnow::error::VerboseError; //! # use winnow::error::ErrMode; -//! pub type IResult> = Result<(I, O), ErrMode>; +//! type IResult<'i, O> = Result< +//! (&'i str, O), +//! ErrMode< +//! VerboseError<&'i str> +//! > +//! >; //! ``` -//! `IResult` is just a fancy wrapper around `Result` that wraps our error in an [`ErrMode`] -//! type. +//! 1. We have to decide what to do about the `remainder` of the input. +//! 2. The error type is not compatible with the rest of the Rust ecosystem //! -//! `ErrMode` is an enum with `Backtrack` and `Cut` variants (ignore `Incomplete` as its only -//! relevant for [streaming][_topic::stream]. By default, errors are `Backtrack`, meaning that -//! other parsing branches will be attempted on failure, like the next case of an `alt`. `Cut` -//! shortcircuits all other branches, immediately reporting the error. +//! Normally, Rust applications want errors that are `std::error::Error + Send + Sync + 'static` +//! meaning: +//! - They implement the [`std::error::Error`] trait +//! - They can be sent across threads +//! - They are safe to be referenced across threads +//! - They do not borrow //! -//! So we can get the correct `context` by modifying the above example with [`cut_err`]: +//! winnow provides some helpers for this like [`FinishIResult`]: //! ```rust //! # use winnow::IResult; //! # use winnow::Parser; //! # use winnow::bytes::take_while1; -//! # use winnow::branch::alt; -//! # use winnow::error::VerboseError; -//! use winnow::combinator::cut_err; -//! -//! fn parse_digits(input: &str) -> IResult<&str, (&str, &str), VerboseError<&str>> { -//! alt(( -//! ("0b", cut_err(parse_bin_digits)).context("binary"), -//! ("0o", cut_err(parse_oct_digits)).context("octal"), -//! ("0d", cut_err(parse_dec_digits)).context("decimal"), -//! ("0x", cut_err(parse_hex_digits)).context("hexadecimal"), -//! )).parse_next(input) +//! # use winnow::branch::dispatch; +//! # use winnow::bytes::take; +//! # use winnow::combinator::fail; +//! use winnow::FinishIResult; +//! use winnow::error::Error; +//! +//! #[derive(Debug, PartialEq, Eq)] +//! pub struct Hex(usize); +//! +//! impl std::str::FromStr for Hex { +//! type Err = Error; +//! +//! fn from_str(input: &str) -> Result { +//! parse_digits +//! .map(Hex) +//! .parse_next(input) +//! .finish() +//! .map_err(|e| e.into_owned()) +//! } //! } //! //! // ... -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # fn parse_digits(input: &str) -> IResult<&str, usize> { +//! # dispatch!(take(2usize); +//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), +//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), +//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), +//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), +//! # _ => fail, +//! # ).parse_next(input) +//! # } +//! # +//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { //! # take_while1(( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { //! # take_while1(( //! # ('0'..='7'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { //! # take_while1(( //! # ('0'..='9'), //! # )).parse_next(input) //! # } //! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str, VerboseError<&str>> { +//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { //! # take_while1(( //! # ('0'..='9'), //! # ('A'..='F'), @@ -131,28 +88,28 @@ //! # } //! //! fn main() { -//! let input = "0x1a2b Hello"; +//! let input = "0x1a2b"; +//! assert_eq!(input.parse::().unwrap(), Hex(0x1a2b)); //! -//! let (remainder, (prefix, digits)) = parse_digits.parse_next(input).unwrap(); -//! -//! assert_eq!(remainder, " Hello"); -//! assert_eq!(prefix, "0x"); -//! assert_eq!(digits, "1a2b"); +//! let input = "0x1a2b Hello"; +//! assert!(input.parse::().is_err()); +//! let input = "ghiHello"; +//! assert!(input.parse::().is_err()); //! } //! ``` -//! Now, when parsing `"0b5"`, the `context` will be `"binary"`. +//! [`FinishIResult::finish`]: +//! - Ensures we hit [`eof`] +//! - Removes the [`ErrMode`] wrapper +//! +//! [`Error::into_owned`]: +//! - Converts the `&str` in `Error` to `String` which enables support for [`std::error::Error`] //! -//! [*prev*][super::chapter_6] [*next*][super::chapter_8] +//! [*prev*][super::chapter_6] #![allow(unused_imports)] use super::chapter_1; -use super::chapter_3; -use crate::branch::alt; -use crate::combinator::cut_err; +use crate::combinator::eof; use crate::error::ErrMode; use crate::error::Error; -use crate::error::VerboseError; use crate::FinishIResult; use crate::IResult; -use crate::Parser; -use crate::_topic; diff --git a/src/_tutorial/chapter_8.rs b/src/_tutorial/chapter_8.rs deleted file mode 100644 index cdfae35c..00000000 --- a/src/_tutorial/chapter_8.rs +++ /dev/null @@ -1,115 +0,0 @@ -//! # Chapter 8: Integrating the Parser -//! -//! So far, we've highlighted how to incrementally parse, but how do we bring this all together -//! into our application? -//! -//! The type we've been working with looks like: -//! ```rust -//! # use winnow::error::VerboseError; -//! # use winnow::error::ErrMode; -//! type IResult<'i, O> = Result< -//! (&'i str, O), -//! ErrMode< -//! VerboseError<&'i str> -//! > -//! >; -//! ``` -//! 1. We have to decide what to do about the `remainder` of the input. -//! 2. The error type is not compatible with the rest of the Rust ecosystem -//! -//! Normally, Rust applications want errors that are `std::error::Error + Send + Sync + 'static` -//! meaning: -//! - They implement the [`std::error::Error`] trait -//! - They can be sent across threads -//! - They are safe to be referenced across threads -//! - They do not borrow -//! -//! winnow provides some helpers for this like [`FinishIResult`]: -//! ```rust -//! # use winnow::IResult; -//! # use winnow::Parser; -//! # use winnow::bytes::take_while1; -//! # use winnow::branch::dispatch; -//! # use winnow::bytes::take; -//! # use winnow::combinator::fail; -//! use winnow::FinishIResult; -//! use winnow::error::Error; -//! -//! #[derive(Debug, PartialEq, Eq)] -//! pub struct Hex(usize); -//! -//! impl std::str::FromStr for Hex { -//! type Err = Error; -//! -//! fn from_str(input: &str) -> Result { -//! parse_digits -//! .map(Hex) -//! .parse_next(input) -//! .finish() -//! .map_err(|e| e.into_owned()) -//! } -//! } -//! -//! // ... -//! # fn parse_digits(input: &str) -> IResult<&str, usize> { -//! # dispatch!(take(2usize); -//! # "0b" => parse_bin_digits.map_res(|s| usize::from_str_radix(s, 2)), -//! # "0o" => parse_oct_digits.map_res(|s| usize::from_str_radix(s, 8)), -//! # "0d" => parse_dec_digits.map_res(|s| usize::from_str_radix(s, 10)), -//! # "0x" => parse_hex_digits.map_res(|s| usize::from_str_radix(s, 16)), -//! # _ => fail, -//! # ).parse_next(input) -//! # } -//! # -//! # fn parse_bin_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( -//! # ('0'..='7'), -//! # )).parse_next(input) -//! # } -//! # -//! # fn parse_oct_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( -//! # ('0'..='7'), -//! # )).parse_next(input) -//! # } -//! # -//! # fn parse_dec_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( -//! # ('0'..='9'), -//! # )).parse_next(input) -//! # } -//! # -//! # fn parse_hex_digits(input: &str) -> IResult<&str, &str> { -//! # take_while1(( -//! # ('0'..='9'), -//! # ('A'..='F'), -//! # ('a'..='f'), -//! # )).parse_next(input) -//! # } -//! -//! fn main() { -//! let input = "0x1a2b"; -//! assert_eq!(input.parse::().unwrap(), Hex(0x1a2b)); -//! -//! let input = "0x1a2b Hello"; -//! assert!(input.parse::().is_err()); -//! let input = "ghiHello"; -//! assert!(input.parse::().is_err()); -//! } -//! ``` -//! [`FinishIResult::finish`]: -//! - Ensures we hit [`eof`] -//! - Removes the [`ErrMode`] wrapper -//! -//! [`Error::into_owned`]: -//! - Converts the `&str` in `Error` to `String` which enables support for [`std::error::Error`] -//! -//! [*prev*][super::chapter_7] - -#![allow(unused_imports)] -use super::chapter_1; -use crate::combinator::eof; -use crate::error::ErrMode; -use crate::error::Error; -use crate::FinishIResult; -use crate::IResult; diff --git a/src/_tutorial/mod.rs b/src/_tutorial/mod.rs index f8674156..5709a78c 100644 --- a/src/_tutorial/mod.rs +++ b/src/_tutorial/mod.rs @@ -38,4 +38,3 @@ pub mod chapter_4; pub mod chapter_5; pub mod chapter_6; pub mod chapter_7; -pub mod chapter_8; From 8b7d1bc7e6a91da3f678c0512c62ef2bc9a8de62 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 20 Feb 2023 15:43:18 -0600 Subject: [PATCH 10/11] docs(tutorial): Change how chapters are linked This makes the links more obvious --- src/_tutorial/chapter_0.rs | 32 ++++++++++++++++++++++++++++++++ src/_tutorial/chapter_1.rs | 5 +++-- src/_tutorial/chapter_2.rs | 5 +++-- src/_tutorial/chapter_3.rs | 5 +++-- src/_tutorial/chapter_4.rs | 5 +++-- src/_tutorial/chapter_5.rs | 5 +++-- src/_tutorial/chapter_6.rs | 5 +++-- src/_tutorial/chapter_7.rs | 4 ++-- src/_tutorial/mod.rs | 32 ++------------------------------ src/lib.rs | 4 ++-- 10 files changed, 56 insertions(+), 46 deletions(-) create mode 100644 src/_tutorial/chapter_0.rs diff --git a/src/_tutorial/chapter_0.rs b/src/_tutorial/chapter_0.rs new file mode 100644 index 00000000..67655378 --- /dev/null +++ b/src/_tutorial/chapter_0.rs @@ -0,0 +1,32 @@ +//! # Chapter 0: Introduction +//! +//! This tutorial assumes that you are: +//! - Already familiar with Rust +//! - Using `winnow` for the first time +//! +//! The focus will be on parsing in-memory strings (`&str`). Once done, you might want to check the +//! [Special Topics][_topic] for more specialized topics or examples. +//! +//! ## About +//! +//! `winnow` is a parser-combinator library. In other words, it gives you tools to define: +//! - "parsers", or functions that takes an input and gives back an output +//! - "combinators", or functions that take parsers and _combine_ them together! +//! +//! While "combinator" might be an unfamiliar word, you are likely using them in your rust code +//! today, like with the [`Iterator`] trait: +//! ```rust +//! let data = vec![1, 2, 3, 4, 5]; +//! let even_count = data.iter() +//! .copied() // combinator +//! .filter(|d| d % 2 == 0) // combinator +//! .count(); // combinator +//! ``` +//! +//! Parser combinators allow building parsers for complex formats from simple, reusable parsers. + +#![allow(unused_imports)] +use crate::_topic; +use std::iter::Iterator; + +pub use super::chapter_1 as next; diff --git a/src/_tutorial/chapter_1.rs b/src/_tutorial/chapter_1.rs index dad0f733..d6a45c8b 100644 --- a/src/_tutorial/chapter_1.rs +++ b/src/_tutorial/chapter_1.rs @@ -80,9 +80,10 @@ //! assert_eq!(output, ""); //! } //! ``` -//! -//! [*prev*][super] [*next*][super::chapter_2] #![allow(unused_imports)] use crate::IResult; use crate::Parser; + +pub use super::chapter_0 as previous; +pub use super::chapter_2 as next; diff --git a/src/_tutorial/chapter_2.rs b/src/_tutorial/chapter_2.rs index 2a92f051..4a1c8ce3 100644 --- a/src/_tutorial/chapter_2.rs +++ b/src/_tutorial/chapter_2.rs @@ -141,8 +141,6 @@ //! assert!(parse_digits("Z").is_err()); //! } //! ``` -//! -//! [*prev*][super::chapter_1] [*next*][super::chapter_3] #![allow(unused_imports)] use crate::bytes::one_of; @@ -152,3 +150,6 @@ use crate::character::hex_digit1; use crate::stream::ContainsToken; use crate::Parser; use std::ops::RangeInclusive; + +pub use super::chapter_1 as previous; +pub use super::chapter_3 as next; diff --git a/src/_tutorial/chapter_3.rs b/src/_tutorial/chapter_3.rs index de9b3be2..29c5db45 100644 --- a/src/_tutorial/chapter_3.rs +++ b/src/_tutorial/chapter_3.rs @@ -232,10 +232,11 @@ //! assert!(parse_digits("ghiWorld").is_err()); //! } //! ``` -//! -//! [*prev*][super::chapter_2] [*next*][super::chapter_4] #![allow(unused_imports)] use crate::branch::alt; use crate::branch::dispatch; use crate::sequence::preceded; + +pub use super::chapter_2 as previous; +pub use super::chapter_4 as next; diff --git a/src/_tutorial/chapter_4.rs b/src/_tutorial/chapter_4.rs index 00615a9c..5d19a05b 100644 --- a/src/_tutorial/chapter_4.rs +++ b/src/_tutorial/chapter_4.rs @@ -99,10 +99,11 @@ //! assert!(parse_digits("ghiWorld").is_err()); //! } //! ``` -//! -//! [*prev*][super::chapter_3] [*next*][super::chapter_5] #![allow(unused_imports)] use crate::IResult; use crate::Parser; use std::str::FromStr; + +pub use super::chapter_3 as previous; +pub use super::chapter_5 as next; diff --git a/src/_tutorial/chapter_5.rs b/src/_tutorial/chapter_5.rs index ac619450..4a57fae0 100644 --- a/src/_tutorial/chapter_5.rs +++ b/src/_tutorial/chapter_5.rs @@ -200,8 +200,6 @@ //! assert!(parse_digits("ghiWorld").is_err()); //! } //! ``` -//! -//! [*prev*][super::chapter_4] [*next*][super::chapter_6] #![allow(unused_imports)] use super::chapter_2; @@ -211,3 +209,6 @@ use crate::multi::separated0; use crate::stream::Accumulate; use crate::Parser; use std::vec::Vec; + +pub use super::chapter_4 as previous; +pub use super::chapter_6 as next; diff --git a/src/_tutorial/chapter_6.rs b/src/_tutorial/chapter_6.rs index 59686299..57797066 100644 --- a/src/_tutorial/chapter_6.rs +++ b/src/_tutorial/chapter_6.rs @@ -141,8 +141,6 @@ //! } //! ``` //! Now, when parsing `"0b5"`, the `context` will be `"binary"`. -//! -//! [*prev*][super::chapter_5] [*next*][super::chapter_7] #![allow(unused_imports)] use super::chapter_1; @@ -156,3 +154,6 @@ use crate::FinishIResult; use crate::IResult; use crate::Parser; use crate::_topic; + +pub use super::chapter_5 as previous; +pub use super::chapter_7 as next; diff --git a/src/_tutorial/chapter_7.rs b/src/_tutorial/chapter_7.rs index e37d9661..2ca956b4 100644 --- a/src/_tutorial/chapter_7.rs +++ b/src/_tutorial/chapter_7.rs @@ -103,8 +103,6 @@ //! //! [`Error::into_owned`]: //! - Converts the `&str` in `Error` to `String` which enables support for [`std::error::Error`] -//! -//! [*prev*][super::chapter_6] #![allow(unused_imports)] use super::chapter_1; @@ -113,3 +111,5 @@ use crate::error::ErrMode; use crate::error::Error; use crate::FinishIResult; use crate::IResult; + +pub use super::chapter_6 as previous; diff --git a/src/_tutorial/mod.rs b/src/_tutorial/mod.rs index 5709a78c..e4b8392f 100644 --- a/src/_tutorial/mod.rs +++ b/src/_tutorial/mod.rs @@ -1,36 +1,8 @@ //! # Tutorial //! -//! This tutorial assumes that you are: -//! - Already familiar with Rust -//! - Using `winnow` for the first time -//! -//! The focus will be on parsing in-memory strings (`&str`). Once done, you might want to check the -//! [Special Topics][_topic] for more specialized topics or examples. -//! -//! ## About -//! -//! `winnow` is a parser-combinator library. In other words, it gives you tools to define: -//! - "parsers", or functions that takes an input and gives back an output -//! - "combinators", or functions that take parsers and _combine_ them together! -//! -//! While "combinator" might be an unfamiliar word, you are likely using them in your rust code -//! today, like with the [`Iterator`] trait: -//! ```rust -//! let data = vec![1, 2, 3, 4, 5]; -//! let even_count = data.iter() -//! .copied() // combinator -//! .filter(|d| d % 2 == 0) // combinator -//! .count(); // combinator -//! ``` -//! -//! Parser combinators allow building parsers for complex formats from simple, reusable parsers. -//! -//! [*next*][chapter_1] - -#![allow(unused_imports)] -use crate::_topic; -use std::iter::Iterator; +//! Table of Content +pub mod chapter_0; pub mod chapter_1; pub mod chapter_2; pub mod chapter_3; diff --git a/src/lib.rs b/src/lib.rs index b31e3cc7..c4d10286 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,7 @@ //! //! Quick links: //! - [List of combinators][crate::combinator] -//! - [Tutorial][_tutorial] +//! - [Tutorial][_tutorial::chapter_0] //! - [Special Topics][_topic] //! - [Discussions](https://github.com/winnow-rs/winnow/discussions) //! @@ -40,7 +40,7 @@ #![doc = include_str!("../examples/css/parser.rs")] //! ``` //! -//! See also the [Tutorial][_tutorial] and [Special Topics][_topic] +//! See also the [Tutorial][_tutorial::chapter_0] and [Special Topics][_topic] #![cfg_attr(docsrs, feature(doc_auto_cfg))] #![cfg_attr(docsrs, feature(doc_cfg))] From 414d49cf5a0bdd4a7d64e9614edda5f0c07a1651 Mon Sep 17 00:00:00 2001 From: Ed Page Date: Mon, 20 Feb 2023 15:55:04 -0600 Subject: [PATCH 11/11] docs(tutorial): Expand on combiantor benefits This is a part of the "old tutorial" that is worth carrying forward. --- src/_tutorial/chapter_0.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/_tutorial/chapter_0.rs b/src/_tutorial/chapter_0.rs index 67655378..4c768d8f 100644 --- a/src/_tutorial/chapter_0.rs +++ b/src/_tutorial/chapter_0.rs @@ -23,7 +23,13 @@ //! .count(); // combinator //! ``` //! -//! Parser combinators allow building parsers for complex formats from simple, reusable parsers. +//! Parser combinators are great because: +//! +//! - The parsers are small and easy to write +//! - The parsers components are easy to reuse (if they're general enough, please add them to winnow!) +//! - The parsers components are easy to test separately (unit tests and property-based tests) +//! - The parser combination code looks close to the grammar you would have written +//! - You can build partial parsers, specific to the data you need at the moment, and ignore the rest #![allow(unused_imports)] use crate::_topic;