From d4d742d8b4717cd8e9e8392db4dd852479e3e268 Mon Sep 17 00:00:00 2001 From: PlexSheep Date: Mon, 15 Jan 2024 23:26:30 +0100 Subject: [PATCH] pest parser --- Cargo.lock | 130 +++++++++++++++++++++++++++++ Cargo.toml | 4 +- members/pest-demo/Cargo.toml | 10 +++ members/pest-demo/data/example.csv | 5 ++ members/pest-demo/src/csv.pest | 3 + members/pest-demo/src/main.rs | 42 ++++++++++ 6 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 members/pest-demo/Cargo.toml create mode 100644 members/pest-demo/data/example.csv create mode 100644 members/pest-demo/src/csv.pest create mode 100644 members/pest-demo/src/main.rs diff --git a/Cargo.lock b/Cargo.lock index b2510b3..1f3a810 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -145,6 +145,15 @@ version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "327762f6e5a765692301e5bb513e0d9fef63be86bbc14528052b1cd3e6f03e07" +[[package]] +name = "block-buffer" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" +dependencies = [ + "generic-array", +] + [[package]] name = "bstr" version = "1.9.0" @@ -296,6 +305,15 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" +[[package]] +name = "cpufeatures" +version = "0.2.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" +dependencies = [ + "libc", +] + [[package]] name = "criterion" version = "0.5.1" @@ -364,6 +382,16 @@ version = "0.8.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345" +[[package]] +name = "crypto-common" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +dependencies = [ + "generic-array", + "typenum", +] + [[package]] name = "cucumber" version = "0.20.2" @@ -445,6 +473,16 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "digest" +version = "0.10.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" +dependencies = [ + "block-buffer", + "crypto-common", +] + [[package]] name = "drain_filter_polyfill" version = "0.1.3" @@ -562,6 +600,16 @@ dependencies = [ "slab", ] +[[package]] +name = "generic-array" +version = "0.14.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" +dependencies = [ + "typenum", + "version_check", +] + [[package]] name = "getrandom" version = "0.2.12" @@ -905,6 +953,59 @@ version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9555b1514d2d99d78150d3c799d4c357a3e2c2a8062cd108e93a06d9057629c5" +[[package]] +name = "pest" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1f200d8d83c44a45b21764d1916299752ca035d15ecd46faca3e9a2a2bf6ad06" +dependencies = [ + "memchr", + "thiserror", + "ucd-trie", +] + +[[package]] +name = "pest-demo" +version = "0.1.0" +dependencies = [ + "pest", + "pest_derive", +] + +[[package]] +name = "pest_derive" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcd6ab1236bbdb3a49027e920e693192ebfe8913f6d60e294de57463a493cfde" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2a31940305ffc96863a735bef7c7994a00b325a7138fdbc5bda0f1a0476d3275" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn 2.0.48", +] + +[[package]] +name = "pest_meta" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7ff62f5259e53b78d1af898941cdcdccfae7385cf7d793a6e55de5d05bb4b7d" +dependencies = [ + "once_cell", + "pest", + "sha2", +] + [[package]] name = "pin-project" version = "1.1.3" @@ -1175,6 +1276,17 @@ dependencies = [ "serde", ] +[[package]] +name = "sha2" +version = "0.10.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" +dependencies = [ + "cfg-if", + "cpufeatures", + "digest", +] + [[package]] name = "slab" version = "0.4.9" @@ -1373,6 +1485,18 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + +[[package]] +name = "ucd-trie" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -1397,6 +1521,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "version_check" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + [[package]] name = "walkdir" version = "2.4.0" diff --git a/Cargo.toml b/Cargo.toml index 7caa9f0..dbb7e1c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,6 +4,7 @@ members = [ "members/criterion-demo", "members/cucumber-demo", "members/panic-calm", + "members/pest-demo", "members/revsqrt", "members/serde-json-demo", "members/slog-demo", @@ -11,8 +12,9 @@ members = [ ] default-members = [ ".", - "members/criterion-demo", "members/revsqrt", + "members/criterion-demo", + "members/pest-demo", "members/serde-json-demo", "members/slog-demo", "members/cucumber-demo", diff --git a/members/pest-demo/Cargo.toml b/members/pest-demo/Cargo.toml new file mode 100644 index 0000000..96acf2f --- /dev/null +++ b/members/pest-demo/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "pest-demo" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +pest = "2.6" +pest_derive = "2.6" diff --git a/members/pest-demo/data/example.csv b/members/pest-demo/data/example.csv new file mode 100644 index 0000000..20a98c9 --- /dev/null +++ b/members/pest-demo/data/example.csv @@ -0,0 +1,5 @@ +65279,1179403647,1463895090 +3.1415927,2.7182817,1.618034 +-40,-273.15 +13,42 +65537 diff --git a/members/pest-demo/src/csv.pest b/members/pest-demo/src/csv.pest new file mode 100644 index 0000000..2d134a9 --- /dev/null +++ b/members/pest-demo/src/csv.pest @@ -0,0 +1,3 @@ +file = { SOI ~ (record ~ "\n")* ~ EOI } +record = { field ~ ("," ~ field)* } +field = { (ASCII_DIGIT | "." | "-")+ } diff --git a/members/pest-demo/src/main.rs b/members/pest-demo/src/main.rs new file mode 100644 index 0000000..94efab3 --- /dev/null +++ b/members/pest-demo/src/main.rs @@ -0,0 +1,42 @@ +use std::fs; + +use pest::Parser; +use pest_derive::Parser; + +#[derive(Parser)] +#[grammar = "csv.pest"] +pub struct CSVParser; + +fn main() { + println!("cwd: {:?}", std::env::current_dir().unwrap()); + let unparsed_file = fs::read_to_string("pest-demo/data/example.csv").expect("cannot read file"); + let file = CSVParser::parse(Rule::file, &unparsed_file) + .expect("unsuccessful parse") // unwrap the parse result + .next() + .unwrap(); // get and unwrap the `file` rule; never fails + // let mut field_sum: f64 = 0.0; + let mut field_sum: f64 = 0.0; + let mut record_count: u64 = 0; + + println!("{:=^80}", ""); + for record in file.into_inner() { + match record.as_rule() { + Rule::record => { + print!("|"); + record_count += 1; + + for field in record.into_inner() { + field_sum += field.as_str().parse::().unwrap(); + print!("{:^25}|", field.as_str()); + } + println!(); + } + Rule::EOI => (), + _ => unreachable!(), + } + } + println!("{:=^80}", ""); + + println!("Sum of fields: {}", field_sum); + println!("Number of records: {}", record_count); +}