-
Notifications
You must be signed in to change notification settings - Fork 33
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* ran cargo fmt * use Self * refactoring * use mathces * use Self * use matches + use Self * some refactoring in parser. * refactoring in parser.rs
- Loading branch information
1 parent
7efec98
commit 257597f
Showing
6 changed files
with
725 additions
and
758 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,68 +1,70 @@ | ||
// Example shows basic UTF-8 combinators | ||
|
||
use pom::utf8::*; | ||
|
||
fn main() { | ||
// Informal, Spanish-language movie database format | ||
let input = "\ | ||
Título: Abre los ojos | ||
Año: 1997 | ||
Director: Alejandro Amenábar | ||
Título: Amores Perros | ||
Director: Alejandro González Iñárritu | ||
Año: 2000 | ||
Título: La montaña sagrada | ||
Año: 1973 | ||
Director: Alejandro Jodorowsky | ||
"; | ||
|
||
enum DataLine<'a> { | ||
Title(&'a str), | ||
Director(&'a str), | ||
Year(i32), | ||
} | ||
|
||
fn positive<'a>() -> Parser<'a, i32> { | ||
// let integer = (one_of("123456789") - one_of("0123456789").repeat(0..)) | sym(b'0'); // TODO | ||
let digit = one_of("0123456789"); | ||
let integer = digit.discard().repeat(1..); | ||
integer.collect().convert(|x|x.parse::<i32>()) | ||
} | ||
|
||
fn rest_str<'a>() -> Parser<'a, &'a str> { | ||
any().repeat(1..).collect() | ||
} | ||
|
||
fn separator<'a>() ->Parser<'a, ()> { | ||
seq(": ").discard() | ||
} | ||
|
||
let parser = | ||
(seq("Título") * separator() * rest_str().map(|s| DataLine::Title(s))) | ||
| (seq("Director") * separator() * rest_str().map(|s| DataLine::Director(s))) | ||
| (seq("Año") * separator() * positive().map(|i| DataLine::Year(i))); | ||
|
||
{ | ||
let mut title_opt:Option<&str> = None; | ||
let mut year_opt:Option<i32> = None; | ||
let mut director_opt:Option<&str> = None; | ||
|
||
for line in input.lines() { | ||
if !line.is_empty() { // Skip blank lines without parsing | ||
// Parse line | ||
match parser.parse_str(line).unwrap() { | ||
DataLine::Title(s) => title_opt = Some(s), | ||
DataLine::Director(s) => director_opt = Some(s), | ||
DataLine::Year(s) => year_opt = Some(s), | ||
} | ||
// When all three line types have been collected, print them | ||
if let (Some(title), Some(year), Some(director)) = (title_opt,year_opt,director_opt) { | ||
println!("Title: {}\nDirector: {}\nYear: {}\n", title, director, year); | ||
(title_opt, year_opt, director_opt) = (None,None,None); | ||
} | ||
} | ||
} | ||
} | ||
} | ||
// Example shows basic UTF-8 combinators | ||
|
||
use pom::utf8::*; | ||
|
||
fn main() { | ||
// Informal, Spanish-language movie database format | ||
let input = "\ | ||
Título: Abre los ojos | ||
Año: 1997 | ||
Director: Alejandro Amenábar | ||
Título: Amores Perros | ||
Director: Alejandro González Iñárritu | ||
Año: 2000 | ||
Título: La montaña sagrada | ||
Año: 1973 | ||
Director: Alejandro Jodorowsky | ||
"; | ||
|
||
enum DataLine<'a> { | ||
Title(&'a str), | ||
Director(&'a str), | ||
Year(i32), | ||
} | ||
|
||
fn positive<'a>() -> Parser<'a, i32> { | ||
// let integer = (one_of("123456789") - one_of("0123456789").repeat(0..)) | sym(b'0'); // TODO | ||
let digit = one_of("0123456789"); | ||
let integer = digit.discard().repeat(1..); | ||
integer.collect().convert(|x| x.parse::<i32>()) | ||
} | ||
|
||
fn rest_str<'a>() -> Parser<'a, &'a str> { | ||
any().repeat(1..).collect() | ||
} | ||
|
||
fn separator<'a>() -> Parser<'a, ()> { | ||
seq(": ").discard() | ||
} | ||
|
||
let parser = (seq("Título") * separator() * rest_str().map(|s| DataLine::Title(s))) | ||
| (seq("Director") * separator() * rest_str().map(|s| DataLine::Director(s))) | ||
| (seq("Año") * separator() * positive().map(|i| DataLine::Year(i))); | ||
|
||
{ | ||
let mut title_opt: Option<&str> = None; | ||
let mut year_opt: Option<i32> = None; | ||
let mut director_opt: Option<&str> = None; | ||
|
||
for line in input.lines() { | ||
if !line.is_empty() { | ||
// Skip blank lines without parsing | ||
// Parse line | ||
match parser.parse_str(line).unwrap() { | ||
DataLine::Title(s) => title_opt = Some(s), | ||
DataLine::Director(s) => director_opt = Some(s), | ||
DataLine::Year(s) => year_opt = Some(s), | ||
} | ||
// When all three line types have been collected, print them | ||
if let (Some(title), Some(year), Some(director)) = | ||
(title_opt, year_opt, director_opt) | ||
{ | ||
println!("Title: {}\nDirector: {}\nYear: {}\n", title, director, year); | ||
(title_opt, year_opt, director_opt) = (None, None, None); | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,35 +1,36 @@ | ||
// Example shows UTF-8 combinators intermixed with binary combinators | ||
|
||
use pom::parser::*; | ||
use pom::utf8; | ||
|
||
fn main() { | ||
// A parser for MsgPack (but only messages encoding a string) | ||
let testcases: [Vec<u8>; 6] = [ | ||
vec![0b10100100, 0b11110000, 0b10011111, 0b10100100, 0b10010100], // 🤔, max-size 31 format | ||
vec![0xd9, 4, 0b11110000, 0b10011111, 0b10011000, 0b10101110], // 😮, max-size 255 format | ||
vec![0xda, 0, 4, 0b11110000, 0b10011111, 0b10100100, 0b10101111], // 🤯, max-size 2^16-1 format | ||
vec![0xdb, 0, 0, 0, 4, 0b11110000, 0b10011111, 0b10010010, 0b10100101], // 💥, max-size 2^32-1 format | ||
vec![0xc4, 4, 0b11110000, 0b10011111, 0b10011000, 0b10101110], // Valid MsgPack, but not a string (binary) | ||
vec![0b10100100, 0b10010100, 0b10100100, 0b10011111, 0b11110000], // A MsgPack string, but invalid UTF-8 | ||
]; | ||
|
||
const MASK:u8 = 0b11100000; // size 31 format is denoted by 3 high bits == 101 | ||
const SIZE_31:u8 = 0b10100000; | ||
|
||
fn rest_as_str<'a>() -> utf8::Parser<'a, &'a str> { | ||
utf8::any().repeat(0..).collect() | ||
} | ||
|
||
// Demo parser does not verify that the claimed length matches the actual length (but checking so is simple with >>) | ||
let parser = | ||
(sym(0xdb) * any().repeat(4) * rest_as_str()) // 2^32-1 format | ||
| (sym(0xda) * any().repeat(2) * rest_as_str()) // 2^16-1 format | ||
| (sym(0xd9) * any() * rest_as_str()) // 255 format | ||
| (is_a(|x| x&MASK == SIZE_31) * rest_as_str()) // 31 format | ||
- end(); | ||
|
||
for testcase in testcases.iter() { | ||
println!("{:?}", parser.parse(testcase)); | ||
} | ||
} | ||
// Example shows UTF-8 combinators intermixed with binary combinators | ||
|
||
use pom::parser::*; | ||
use pom::utf8; | ||
|
||
fn main() { | ||
// A parser for MsgPack (but only messages encoding a string) | ||
let testcases: [Vec<u8>; 6] = [ | ||
vec![0b10100100, 0b11110000, 0b10011111, 0b10100100, 0b10010100], // 🤔, max-size 31 format | ||
vec![0xd9, 4, 0b11110000, 0b10011111, 0b10011000, 0b10101110], // 😮, max-size 255 format | ||
vec![0xda, 0, 4, 0b11110000, 0b10011111, 0b10100100, 0b10101111], // 🤯, max-size 2^16-1 format | ||
vec![ | ||
0xdb, 0, 0, 0, 4, 0b11110000, 0b10011111, 0b10010010, 0b10100101, | ||
], // 💥, max-size 2^32-1 format | ||
vec![0xc4, 4, 0b11110000, 0b10011111, 0b10011000, 0b10101110], // Valid MsgPack, but not a string (binary) | ||
vec![0b10100100, 0b10010100, 0b10100100, 0b10011111, 0b11110000], // A MsgPack string, but invalid UTF-8 | ||
]; | ||
|
||
const MASK: u8 = 0b11100000; // size 31 format is denoted by 3 high bits == 101 | ||
const SIZE_31: u8 = 0b10100000; | ||
|
||
fn rest_as_str<'a>() -> utf8::Parser<'a, &'a str> { | ||
utf8::any().repeat(0..).collect() | ||
} | ||
|
||
// Demo parser does not verify that the claimed length matches the actual length (but checking so is simple with >>) | ||
let parser = (sym(0xdb) * any().repeat(4) * rest_as_str()) // 2^32-1 format | ||
| (sym(0xda) * any().repeat(2) * rest_as_str()) // 2^16-1 format | ||
| (sym(0xd9) * any() * rest_as_str()) // 255 format | ||
| (is_a(|x| x&MASK == SIZE_31) * rest_as_str()) // 31 format | ||
- end(); | ||
|
||
for testcase in testcases.iter() { | ||
println!("{:?}", parser.parse(testcase)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.