rust maybe base?

Even writing a lexer in rust seems weird and too hard.
The typical lexer scheme does not seem to exist in any
crate used by some qualified amount of people, instead
we have `nom`, which is a "parser combinator", meaning
we write a lot of functions instead of grammar that
a parser/scanner will be generated from.

This just confuses me a lot to be honest, I'd just want
lex and yacc in rust, coming from a university course
with ply and antlr4.
This commit is contained in:
Christoph J. Scherr 2023-12-19 01:17:44 +01:00
parent 1b61598a56
commit fc889b57d3
7 changed files with 544 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/target

319
Cargo.lock generated Normal file
View File

@ -0,0 +1,319 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "anstream"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d664a92ecae85fd0a7392615844904654d1d5f5514837f471ddef4a057aba1b6"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87"
[[package]]
name = "anstyle-parse"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c75ac65da39e5fe5ab759307499ddad880d724eed2f6ce5b5e8a26f4f387928c"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e28923312444cdd728e4738b3f9c9cac739500909bb3d3c94b43551b16517648"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1cd54b81ec8d6180e24654d0b371ad22fc3dd083b6ff8ba325b72e00c87660a7"
dependencies = [
"anstyle",
"windows-sys",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "clap"
version = "4.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bfaff671f6b22ca62406885ece523383b9b64022e341e53e009a62ebc47a45f2"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap-num"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "488557e97528174edaa2ee268b23a809e0c598213a4bbcb4f34575a46fda147e"
dependencies = [
"num-traits",
]
[[package]]
name = "clap-verbosity-flag"
version = "2.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c90e95e5bd4e8ac34fa6f37c774b0c6f8ed06ea90c79931fd448fcf941a9767"
dependencies = [
"clap",
"log",
]
[[package]]
name = "clap_builder"
version = "4.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a216b506622bb1d316cd51328dce24e07bdff4a6128a47c7e7fad11878d5adbb"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.4.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf9804afaaf59a91e75b022a30fb7229a7901f60c755489cc61c9b423b836442"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.41",
]
[[package]]
name = "clap_lex"
version = "0.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "702fc72eb24e5a1e48ce58027a675bc24edd52096d5397d4aea7c6dd9eca0bd1"
[[package]]
name = "colorchoice"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acbf1af155f9b9ef647e42cdc158db4b64a1b61f743629225fde6f3e0be2a7c7"
[[package]]
name = "heck"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "log"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "memchr"
version = "2.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f665ee40bc4a3c5590afb1e9677db74a508659dfd71e126420da8274909a0167"
[[package]]
name = "minimal-lexical"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
[[package]]
name = "nom"
version = "7.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
dependencies = [
"memchr",
"minimal-lexical",
]
[[package]]
name = "num-traits"
version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c"
dependencies = [
"autocfg",
]
[[package]]
name = "oopsc"
version = "0.1.0"
dependencies = [
"clap",
"clap-num",
"clap-verbosity-flag",
"nom",
"racc",
]
[[package]]
name = "proc-macro2"
version = "1.0.70"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "39278fbbf5fb4f646ce651690877f89d1c5811a3d4acb27700c1cb3cdb78fd3b"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
dependencies = [
"proc-macro2",
]
[[package]]
name = "racc"
version = "0.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3a93af6fd1d634b74440a3f0602fcbcea7db7950960a99c9c9a9f05b7da93594"
dependencies = [
"log",
"proc-macro2",
"quote",
"ramp_table",
"syn 1.0.109",
]
[[package]]
name = "ramp_table"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1bff95c0969b90c5d05263ee9543720e6361c59a686e6dfd149b4a1ddcc28217"
[[package]]
name = "strsim"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.41"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "44c8b28c477cc3bf0e7966561e3460130e1255f7a1cf71931075f1c5e7a7e269"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "utf8parse"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a"
[[package]]
name = "windows-sys"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
dependencies = [
"windows-targets",
]
[[package]]
name = "windows-targets"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd"
dependencies = [
"windows_aarch64_gnullvm",
"windows_aarch64_msvc",
"windows_i686_gnu",
"windows_i686_msvc",
"windows_x86_64_gnu",
"windows_x86_64_gnullvm",
"windows_x86_64_msvc",
]
[[package]]
name = "windows_aarch64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea"
[[package]]
name = "windows_aarch64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef"
[[package]]
name = "windows_i686_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313"
[[package]]
name = "windows_i686_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a"
[[package]]
name = "windows_x86_64_gnu"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd"
[[package]]
name = "windows_x86_64_gnullvm"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e"
[[package]]
name = "windows_x86_64_msvc"
version = "0.52.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"

21
Cargo.toml Normal file
View File

@ -0,0 +1,21 @@
[workspace]
resolver = "2"
members = [
"oopsc",
]
default-members = [
"oopsc",
]
[workspace.package]
name = "oops"
version = "0.1.0"
edition = "2021"
authors = ["Christoph J. Scherr <softwar@cscherr.de>"]
license = "MIT"
description = "Personal multitool"
readme = "README.md"
homepage = "https://git.cscherr.de/PlexSheep/oops"
repository = "https://git.cscherr.de/PlexSheep/oops"
keywords = ["compiler"]
categories = ["command-line-utilities", "development-tools"]
publish = false

23
oopsc/Cargo.toml Normal file
View File

@ -0,0 +1,23 @@
[package]
name = "oopsc"
version = "0.1.0"
publish.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
description.workspace = true
readme.workspace = true
homepage.workspace = true
repository.workspace = true
keywords.workspace = true
categories.workspace = true
[features]
default = []
[dependencies]
clap = "4.4.11"
clap-num = "1.0.2"
clap-verbosity-flag = "2.1.1"
nom = "7.1.3"
racc = "0.0.4"

126
oopsc/src/cli/mod.rs Normal file
View File

@ -0,0 +1,126 @@
//! # cli module
//!
//! This module defines the CLI interface of the oopsc. It uses [`clap`].
//// ATTRIBUTES ////////////////////////////////////////////////////////////////////////////////////
// we want docs
#![warn(missing_docs)]
#![warn(rustdoc::missing_crate_level_docs)]
////////////////////////////////////////////////////////////////////////////////////////////////////
// we want Debug everywhere.
#![warn(missing_debug_implementations)]
////////////////////////////////////////////////////////////////////////////////////////////////////
// enable clippy's extra lints, the pedantic version
#![warn(clippy::pedantic)]
//// IMPORTS ///////////////////////////////////////////////////////////////////////////////////////
use clap::{Parser, Subcommand};
use clap_num::number_range;
use clap_verbosity_flag::{Verbosity, InfoLevel};
//// CONSTANTS /////////////////////////////////////////////////////////////////////////////////////
/// short about section displayed in help
const ABOUT_ROOT: &'static str = r##"
OOPS compiler
A compiler for the oops programming language
"##;
/// longer about section displayed in help, is combined with [the short help](ABOUT_ROOT)
static LONG_ABOUT_ROOT: &'static str = r##"
Object oriented pseudo syntax (OOPS) is a obscure programming language. This program is the
compiler for that language.
"##;
//// STATICS ///////////////////////////////////////////////////////////////////////////////////////
/// ## Main struct for parsing CLI arguments
///
/// This struct describes the complete commandline options/arguments that [pt](crate) can take. It
/// makes use of composition to build a complex system of commands, subcommands, flags and options.
#[derive(Debug, Clone, Parser)]
#[command(
author,
version,
about = ABOUT_ROOT,
long_about = format!("{}{}", ABOUT_ROOT ,LONG_ABOUT_ROOT),
help_template =
r#"oopsc: {version}{about-section}Author:
{author-with-newline}
{usage-heading} {usage}{all-args}{tab}"#
)]
pub struct Cli {
// enable more verbose output
#[clap(short, long, global = true)]
pub verbose: bool,
/// read from stdin instead of files
#[arg(short, long, global = true)]
pub stdin: bool,
/// choose a subcommand
#[command(subcommand)]
pub command: Commands,
/// a list of file (paths)
#[clap(short, long, global = true)]
pub files: Vec<std::path::PathBuf>
}
//// ENUMS /////////////////////////////////////////////////////////////////////////////////////////
/// ## defines the top level commands
#[derive(Debug, Clone, Subcommand)]
#[non_exhaustive]
pub enum Commands {
/// Scan a file for tokens
Scan,
}
////////////////////////////////////////////////////////////////////////////////////////////////////
/// ## defines the networking commands
#[derive(Debug, Clone, Subcommand)]
#[non_exhaustive]
pub enum NetCommands {
/// monitor your network
Monitor {
/// repeat every N seconds, 0 means no repeat
#[clap(short, long, default_value_t = 0, name = "N")]
repeat: u64,
/// At what percentage should the try be considered successful
#[clap(short, long, default_value_t = 100, value_parser=max100)]
success_ratio: u8,
/// extra URLs to check with
extra_urls: Vec<String>,
/// Don't check for default URLs
#[clap(short, long)]
no_default: bool,
/// set a timeout (in ms)
#[clap(short, long, default_value_t = 100)]
timeout: u64
},
/// discover hosts in your network
Discover {
}
}
////////////////////////////////////////////////////////////////////////////////////////////////////
//// STRUCTS ///////////////////////////////////////////////////////////////////////////////////////
//// IMPLEMENTATION ////////////////////////////////////////////////////////////////////////////////
//// PUBLIC FUNCTIONS //////////////////////////////////////////////////////////////////////////////
//// PRIVATE FUNCTIONS /////////////////////////////////////////////////////////////////////////////
/// custom value parser, only allow 0 to 100
fn max100(s: &str) -> Result<u8, String> {
number_range(s, 0, 100)
}

40
oopsc/src/main.rs Normal file
View File

@ -0,0 +1,40 @@
use std::io::BufReader;
use clap::Parser;
mod cli;
use clap_verbosity_flag::{LogLevel, InfoLevel};
use cli::*;
mod scanner;
use scanner::scan;
fn main() -> std::io::Result<()> {
let cli = Cli::parse();
match cli.command {
Commands::Scan => {
let mut buf: String = String::new();
if cli.stdin {
for line in std::io::stdin().lines() {
match line {
Ok(line) => buf = format!("{buf}\n{line}"),
Err(err) => eprintln!("IO error while reading stdin: {err}")
}
}
}
else if cli.files.len() >= 1 {
for file in cli.files {
buf += &std::fs::read_to_string(file)?;
}
}
if cli.verbose {
println!("{text:=^80}\n{buf}\n{text:=^80}", text = "raw combined source");
}
let result = scan(&buf);
for i in result { println!("\"{i}\"\n")}
}
}
Ok(())
}

14
oopsc/src/scanner/mod.rs Normal file
View File

@ -0,0 +1,14 @@
use nom::{
IResult,
bytes::complete::{tag},
};
fn comment(input: &str) -> IResult<&str, &str> {
tag("/*")(input)
}
pub fn scan(input: &str) -> Vec<&str> {
let foo = comment(input).unwrap().1;
println!("{foo}");
vec!["s"]
}