Skip to content

Commit

Permalink
Ran cargo fmt, some nitpicks. (#60)
Browse files Browse the repository at this point in the history
* ran cargo fmt

* use Self

* refactoring

* use mathces

* use Self

* use matches + use Self

* some refactoring in parser.

* refactoring in parser.rs
  • Loading branch information
adamnemecek authored Jun 19, 2023
1 parent 7efec98 commit 257597f
Show file tree
Hide file tree
Showing 6 changed files with 725 additions and 758 deletions.
138 changes: 70 additions & 68 deletions examples/utf8.rs
Original file line number Diff line number Diff line change
@@ -1,68 +1,70 @@
// Example shows basic UTF-8 combinators

use pom::utf8::*;

fn main() {
// Informal, Spanish-language movie database format
let input = "\
Título: Abre los ojos
Año: 1997
Director: Alejandro Amenábar
Título: Amores Perros
Director: Alejandro González Iñárritu
Año: 2000
Título: La montaña sagrada
Año: 1973
Director: Alejandro Jodorowsky
";

enum DataLine<'a> {
Title(&'a str),
Director(&'a str),
Year(i32),
}

fn positive<'a>() -> Parser<'a, i32> {
// let integer = (one_of("123456789") - one_of("0123456789").repeat(0..)) | sym(b'0'); // TODO
let digit = one_of("0123456789");
let integer = digit.discard().repeat(1..);
integer.collect().convert(|x|x.parse::<i32>())
}

fn rest_str<'a>() -> Parser<'a, &'a str> {
any().repeat(1..).collect()
}

fn separator<'a>() ->Parser<'a, ()> {
seq(": ").discard()
}

let parser =
(seq("Título") * separator() * rest_str().map(|s| DataLine::Title(s)))
| (seq("Director") * separator() * rest_str().map(|s| DataLine::Director(s)))
| (seq("Año") * separator() * positive().map(|i| DataLine::Year(i)));

{
let mut title_opt:Option<&str> = None;
let mut year_opt:Option<i32> = None;
let mut director_opt:Option<&str> = None;

for line in input.lines() {
if !line.is_empty() { // Skip blank lines without parsing
// Parse line
match parser.parse_str(line).unwrap() {
DataLine::Title(s) => title_opt = Some(s),
DataLine::Director(s) => director_opt = Some(s),
DataLine::Year(s) => year_opt = Some(s),
}
// When all three line types have been collected, print them
if let (Some(title), Some(year), Some(director)) = (title_opt,year_opt,director_opt) {
println!("Title: {}\nDirector: {}\nYear: {}\n", title, director, year);
(title_opt, year_opt, director_opt) = (None,None,None);
}
}
}
}
}
// Example shows basic UTF-8 combinators

use pom::utf8::*;

fn main() {
// Informal, Spanish-language movie database format
let input = "\
Título: Abre los ojos
Año: 1997
Director: Alejandro Amenábar
Título: Amores Perros
Director: Alejandro González Iñárritu
Año: 2000
Título: La montaña sagrada
Año: 1973
Director: Alejandro Jodorowsky
";

enum DataLine<'a> {
Title(&'a str),
Director(&'a str),
Year(i32),
}

fn positive<'a>() -> Parser<'a, i32> {
// let integer = (one_of("123456789") - one_of("0123456789").repeat(0..)) | sym(b'0'); // TODO
let digit = one_of("0123456789");
let integer = digit.discard().repeat(1..);
integer.collect().convert(|x| x.parse::<i32>())
}

fn rest_str<'a>() -> Parser<'a, &'a str> {
any().repeat(1..).collect()
}

fn separator<'a>() -> Parser<'a, ()> {
seq(": ").discard()
}

let parser = (seq("Título") * separator() * rest_str().map(|s| DataLine::Title(s)))
| (seq("Director") * separator() * rest_str().map(|s| DataLine::Director(s)))
| (seq("Año") * separator() * positive().map(|i| DataLine::Year(i)));

{
let mut title_opt: Option<&str> = None;
let mut year_opt: Option<i32> = None;
let mut director_opt: Option<&str> = None;

for line in input.lines() {
if !line.is_empty() {
// Skip blank lines without parsing
// Parse line
match parser.parse_str(line).unwrap() {
DataLine::Title(s) => title_opt = Some(s),
DataLine::Director(s) => director_opt = Some(s),
DataLine::Year(s) => year_opt = Some(s),
}
// When all three line types have been collected, print them
if let (Some(title), Some(year), Some(director)) =
(title_opt, year_opt, director_opt)
{
println!("Title: {}\nDirector: {}\nYear: {}\n", title, director, year);
(title_opt, year_opt, director_opt) = (None, None, None);
}
}
}
}
}
71 changes: 36 additions & 35 deletions examples/utf8_mixed.rs
Original file line number Diff line number Diff line change
@@ -1,35 +1,36 @@
// Example shows UTF-8 combinators intermixed with binary combinators

use pom::parser::*;
use pom::utf8;

fn main() {
// A parser for MsgPack (but only messages encoding a string)
let testcases: [Vec<u8>; 6] = [
vec![0b10100100, 0b11110000, 0b10011111, 0b10100100, 0b10010100], // 🤔, max-size 31 format
vec![0xd9, 4, 0b11110000, 0b10011111, 0b10011000, 0b10101110], // 😮, max-size 255 format
vec![0xda, 0, 4, 0b11110000, 0b10011111, 0b10100100, 0b10101111], // 🤯, max-size 2^16-1 format
vec![0xdb, 0, 0, 0, 4, 0b11110000, 0b10011111, 0b10010010, 0b10100101], // 💥, max-size 2^32-1 format
vec![0xc4, 4, 0b11110000, 0b10011111, 0b10011000, 0b10101110], // Valid MsgPack, but not a string (binary)
vec![0b10100100, 0b10010100, 0b10100100, 0b10011111, 0b11110000], // A MsgPack string, but invalid UTF-8
];

const MASK:u8 = 0b11100000; // size 31 format is denoted by 3 high bits == 101
const SIZE_31:u8 = 0b10100000;

fn rest_as_str<'a>() -> utf8::Parser<'a, &'a str> {
utf8::any().repeat(0..).collect()
}

// Demo parser does not verify that the claimed length matches the actual length (but checking so is simple with >>)
let parser =
(sym(0xdb) * any().repeat(4) * rest_as_str()) // 2^32-1 format
| (sym(0xda) * any().repeat(2) * rest_as_str()) // 2^16-1 format
| (sym(0xd9) * any() * rest_as_str()) // 255 format
| (is_a(|x| x&MASK == SIZE_31) * rest_as_str()) // 31 format
- end();

for testcase in testcases.iter() {
println!("{:?}", parser.parse(testcase));
}
}
// Example shows UTF-8 combinators intermixed with binary combinators

use pom::parser::*;
use pom::utf8;

fn main() {
// A parser for MsgPack (but only messages encoding a string)
let testcases: [Vec<u8>; 6] = [
vec![0b10100100, 0b11110000, 0b10011111, 0b10100100, 0b10010100], // 🤔, max-size 31 format
vec![0xd9, 4, 0b11110000, 0b10011111, 0b10011000, 0b10101110], // 😮, max-size 255 format
vec![0xda, 0, 4, 0b11110000, 0b10011111, 0b10100100, 0b10101111], // 🤯, max-size 2^16-1 format
vec![
0xdb, 0, 0, 0, 4, 0b11110000, 0b10011111, 0b10010010, 0b10100101,
], // 💥, max-size 2^32-1 format
vec![0xc4, 4, 0b11110000, 0b10011111, 0b10011000, 0b10101110], // Valid MsgPack, but not a string (binary)
vec![0b10100100, 0b10010100, 0b10100100, 0b10011111, 0b11110000], // A MsgPack string, but invalid UTF-8
];

const MASK: u8 = 0b11100000; // size 31 format is denoted by 3 high bits == 101
const SIZE_31: u8 = 0b10100000;

fn rest_as_str<'a>() -> utf8::Parser<'a, &'a str> {
utf8::any().repeat(0..).collect()
}

// Demo parser does not verify that the claimed length matches the actual length (but checking so is simple with >>)
let parser = (sym(0xdb) * any().repeat(4) * rest_as_str()) // 2^32-1 format
| (sym(0xda) * any().repeat(2) * rest_as_str()) // 2^16-1 format
| (sym(0xd9) * any() * rest_as_str()) // 255 format
| (is_a(|x| x&MASK == SIZE_31) * rest_as_str()) // 31 format
- end();

for testcase in testcases.iter() {
println!("{:?}", parser.parse(testcase));
}
}
6 changes: 3 additions & 3 deletions src/char_class.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ pub fn alphanum(term: u8) -> bool {
/// Recognises a hexadecimal digit, `0-9a-fA-F`.
#[inline]
pub fn hex_digit(term: u8) -> bool {
(0x30..=0x39).contains(&term) || (0x41..=0x46).contains(&term) || (0x61..=0x66).contains(&term)
matches!(term, 0x30..=0x39 | 0x41..=0x46 | 0x61..=0x66)
}

/// Recognises an octal digit, `0-7`.
Expand All @@ -31,11 +31,11 @@ pub fn oct_digit(term: u8) -> bool {
/// Recognises a space or tab.
#[inline]
pub fn space(term: u8) -> bool {
term == b' ' || term == b'\t'
matches!(term, b' ' | b'\t')
}

/// Recognises a space, tab, line feed, or carriage return.
#[inline]
pub fn multispace(term: u8) -> bool {
space(term) || term == b'\n' || term == b'\r'
space(term) || matches!(term, b'\n' | b'\r')
}
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pub mod parser;
pub mod char_class;

/// Variants of parser functions specialized for matching UTF-8 strings and returning chars.
/// Method and constructor names/functionality are generally the same as in base parser module.
/// Method and constructor names/functionality are generally the same as in base parser module.
#[cfg(feature = "utf8")]
pub mod utf8;

Expand Down
Loading

0 comments on commit 257597f

Please sign in to comment.