From 30369cfdd396794f078115fcd133a5a1f6fef352 Mon Sep 17 00:00:00 2001 From: Kerdonov Date: Thu, 11 Dec 2025 01:46:57 +0200 Subject: [PATCH] initial parser working --- Cargo.lock | 20 ++- Cargo.toml | 1 + bacon.toml | 132 ++++++++++++++++++++ rust-toolchain | 1 - rust-toolchain.toml | 10 ++ src/ast.rs | 69 +++++++++++ src/main.rs | 5 +- src/parser/block.rs | 243 ++++++++++++++++++++++++++++++++++++ src/parser/inline.rs | 287 +++++++++++++++++++++++++++++++++++++++++++ src/parser/mod.rs | 3 + 10 files changed, 767 insertions(+), 4 deletions(-) create mode 100644 bacon.toml delete mode 100644 rust-toolchain create mode 100644 rust-toolchain.toml create mode 100644 src/ast.rs create mode 100644 src/parser/block.rs create mode 100644 src/parser/inline.rs create mode 100644 src/parser/mod.rs diff --git a/Cargo.lock b/Cargo.lock index 29c8ac6..7b45833 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,4 +4,22 @@ version = 4 [[package]] name = "marginal" -version = "0.1.0" +version = "0.0.1" +dependencies = [ + "nom", +] + +[[package]] +name = "memchr" +version = "2.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" + +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] diff --git a/Cargo.toml b/Cargo.toml index 8988115..abff013 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,3 +4,4 @@ version = "0.0.1" edition = "2024" [dependencies] +nom = "8.0.0" diff --git a/bacon.toml b/bacon.toml new file mode 100644 index 0000000..ec3d796 --- /dev/null +++ b/bacon.toml @@ -0,0 +1,132 @@ +# This is a configuration file for the bacon tool +# +# Complete help on configuration: https://dystroy.org/bacon/config/ +# +# You may check the current default at +# https://github.com/Canop/bacon/blob/main/defaults/default-bacon.toml + +default_job = "check" +env.CARGO_TERM_COLOR = "always" + +[jobs.check] +command = ["cargo", "check"] +need_stdout = false + +[jobs.check-all] +command = ["cargo", "check", "--all-targets"] +need_stdout = false + +# Run clippy on the default target +[jobs.clippy] +command = ["cargo", "clippy"] +need_stdout = false + +# Run clippy on all targets +# To disable some lints, you may change the job this way: +# [jobs.clippy-all] +# command = [ +# "cargo", "clippy", +# "--all-targets", +# "--", +# "-A", "clippy::bool_to_int_with_if", +# "-A", "clippy::collapsible_if", +# "-A", "clippy::derive_partial_eq_without_eq", +# ] +# need_stdout = false +[jobs.clippy-all] +command = ["cargo", "clippy", "--all-targets"] +need_stdout = false + +# Run clippy in pedantic mode +# The 'dismiss' feature may come handy +[jobs.pedantic] +command = [ + "cargo", "clippy", + "--", + "-W", "clippy::pedantic", +] +need_stdout = false + +# This job lets you run +# - all tests: bacon test +# - a specific test: bacon test -- config::test_default_files +# - the tests of a package: bacon test -- -- -p config +[jobs.test] +command = [ + "cargo", "nextest", "run", + "--hide-progress-bar", + "--failure-output", "final", + "--no-fail-fast" +] +need_stdout = true +analyzer = "nextest" + +[jobs.nextest] +command = [ + "cargo", "nextest", "run", + "--hide-progress-bar", + "--failure-output", "final", +] +need_stdout = true +analyzer = "nextest" + +[jobs.doc] +command = ["cargo", "doc", "--no-deps"] +need_stdout = false + +# If the doc compiles, then it opens in your browser and bacon switches +# to the previous job +[jobs.doc-open] +command = ["cargo", "doc", "--no-deps", "--open"] +need_stdout = false +on_success = "back" # so that we don't open the browser at each change + +# You can run your application and have the result displayed in bacon, +# if it makes sense for this crate. +[jobs.run] +command = [ + "cargo", "run", + # put launch parameters for your program behind a `--` separator +] +need_stdout = true +allow_warnings = true +background = true + +# Run your long-running application (eg server) and have the result displayed in bacon. +# For programs that never stop (eg a server), `background` is set to false +# to have the cargo run output immediately displayed instead of waiting for +# program's end. +# 'on_change_strategy' is set to `kill_then_restart` to have your program restart +# on every change (an alternative would be to use the 'F5' key manually in bacon). +# If you often use this job, it makes sense to override the 'r' key by adding +# a binding `r = job:run-long` at the end of this file . +# A custom kill command such as the one suggested below is frequently needed to kill +# long running programs (uncomment it if you need it) +[jobs.run-long] +command = [ + "cargo", "run", + # put launch parameters for your program behind a `--` separator +] +need_stdout = true +allow_warnings = true +background = false +on_change_strategy = "kill_then_restart" +# kill = ["pkill", "-TERM", "-P"] + +# This parameterized job runs the example of your choice, as soon +# as the code compiles. +# Call it as +# bacon ex -- my-example +[jobs.ex] +command = ["cargo", "run", "--example"] +need_stdout = true +allow_warnings = true + +# You may define here keybindings that would be specific to +# a project, for example a shortcut to launch a specific job. +# Shortcuts to internal functions (scrolling, toggling, etc.) +# should go in your personal global prefs.toml file instead. +[keybindings] +# alt-m = "job:my-job" +c = "job:clippy-all" # comment this to have 'c' run clippy on only the default target +p = "job:pedantic" diff --git a/rust-toolchain b/rust-toolchain deleted file mode 100644 index 2bf5ad0..0000000 --- a/rust-toolchain +++ /dev/null @@ -1 +0,0 @@ -stable diff --git a/rust-toolchain.toml b/rust-toolchain.toml new file mode 100644 index 0000000..89eb9bb --- /dev/null +++ b/rust-toolchain.toml @@ -0,0 +1,10 @@ +[toolchain] +channel = "stable" +targets = [ + "x86_64-unknown-linux-gnu" +] +components = [ + "clippy", + "rustfmt", + "rust-analyzer" +] diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..e6a8ab0 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,69 @@ +// Grammar rules: +// +// Markdown ::= Block Markdown | Block +// +// Block ::= (Heading | CodeBlock | Quote | Paragraph) "\n\n" +// Heading ::= "#{1,6}\s" Inline +// CodeBlock ::= "```.*\n" "(.*?\n)*" "```" +// Quote ::= ">" Block +// Paragraph ::= Inline +// +// Inline ::= InlineElem Inline | InlineElem +// InlineElem ::= Bold | Italic | Code | Link | Text +// Bold ::= "\*" Inline "\*" +// Italic ::= "_" Inline "_" +// Code ::= "`" "[.^`]*" "`" +// Link ::= "\[" Inline "\]\(" Href "\)" +// Href ::= "[.^\)]*" +// Text ::= "[.^`*_\[]*" + +#[derive(Debug, PartialEq)] +pub enum Inline { + Bold { inner: Vec }, + Italic { inner: Vec }, + Link { inner: Vec, href: Href }, + Code { content: String }, + Text { content: String }, +} + +#[derive(Debug, PartialEq)] +pub struct Href(pub String); + +impl Href { + pub fn new(href: &str) -> Self { + // can check for link correctness + Self(href.to_string()) + } +} + +/* +pub struct Markdown { + block: Block, + rest: Option>, +} + +pub enum Block { + Heading(HeadingBlock), + Code(CodeBlock), + Quote(QuoteBlock), + Paragraph(ParagraphBlock), +} + +pub struct HeadingBlock { + level: u8, + content: Inline, +} + +pub struct CodeBlock { + lang: String, + content: String, +} + +pub struct QuoteBlock { + content: Box, +} + +pub struct ParagraphBlock { + content: String, +} +*/ diff --git a/src/main.rs b/src/main.rs index 40e393c..29a7b29 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,7 @@ - mod ast; +mod parser; + fn main() { - + } diff --git a/src/parser/block.rs b/src/parser/block.rs new file mode 100644 index 0000000..cf520b4 --- /dev/null +++ b/src/parser/block.rs @@ -0,0 +1,243 @@ +#![allow(dead_code)] + +use crate::{ast::Inline, parser::inline::inline}; +use nom::{ + IResult, Parser, + bytes::complete::{tag, take_until}, + multi::{many_m_n, many1, many0}, + sequence::{terminated, delimited}, + branch::alt, +}; + +#[derive(Debug, PartialEq)] +pub enum Block { + Heading { inner: Vec, level: u8 }, + Code { content: String, lang: String }, + Quote { inner: Box }, + Paragraph { inner: Vec }, +} + +pub fn blocks(input: &str) -> IResult<&str, Vec> { + many0(block).parse(input) +} + + +pub fn block(input: &str) -> IResult<&str, Block> { + terminated( + alt((heading_block, code_block, quote_block, paragraph_block)), + tag("\n"), + ).parse(input) +} + +fn paragraph_block(input: &str) -> IResult<&str, Block> { + (inline) + .parse(input) + .map(|(rem, inl)| (rem, Block::Paragraph { inner: inl })) +} + +fn heading_block(input: &str) -> IResult<&str, Block> { + (many_m_n(1, 6, tag("#")), many1(tag(" ")), inline) + .parse(input) + .map(|(rem, (head, _, title))| { + ( + rem, + Block::Heading { + inner: title, + level: head.len() as u8, + }, + ) + }) +} + +fn code_block(input: &str) -> IResult<&str, Block> { + delimited( + tag("```"), + (take_until("\n"), tag("\n"), take_until("```\n")), + tag("```\n"), + ) + .parse(input) + .map(|(rem, (lang, _, code))| { + ( + rem, + Block::Code { + content: code.to_string(), + lang: lang.to_string(), + }, + ) + }) +} + +fn quote_block(input: &str) -> IResult<&str, Block> { + (tag(">"), many0(tag(" ")), block).parse(input).map(|(rem, (_, _, inner))| { + ( + rem, + Block::Quote { + inner: Box::new(inner), + }, + ) + }) +} + +//|-------------------------------------------------------------------------------| +//| TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS | +//|-------------------------------------------------------------------------------| + +#[cfg(test)] +mod test { + use super::*; + use crate::ast::Inline; + + #[test] + fn single_paragraph() { + let md = "Hello markdown!!"; + let (rem, block) = paragraph_block(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + block, + Block::Paragraph { + inner: vec![Inline::Text { + content: "Hello markdown!!".to_string() + }] + } + ); + } + + #[test] + fn single_code_block_with_language() { + let md = "```rust +fn main() { +\tprintln!(\"Hello, World\"); +} +``` +"; + let (rem, block) = code_block(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + block, + Block::Code { + content: "fn main() {\n\tprintln!(\"Hello, World\");\n}\n".to_string(), + lang: "rust".to_string(), + } + ) + } + + #[test] + fn single_code_block_without_language() { + let md = "``` +echo \"hello world\" +``` +"; + let (rem, block) = code_block(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + block, + Block::Code { + content: "echo \"hello world\"\n".to_string(), + lang: "".to_string(), + } + ); + } + + #[test] + fn single_code_block_fail() { + let md = "```abc +echo hello +```errortext +"; + assert!(code_block(md).is_err()); + } + + #[test] + fn level_1_heading() { + let md = "## Heading2"; + let (rem, block) = heading_block(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + block, + Block::Heading { + inner: vec![Inline::Text { + content: "Heading2".to_string() + }], + level: 2, + } + ); + } + + #[test] + fn heading_no_space() { + let md = "#heading"; + assert!(heading_block(md).is_err()); + } + + #[test] + fn level_6_heading() { + let md = "###### Heading6"; + let (rem, block) = heading_block(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + block, + Block::Heading { + inner: vec![Inline::Text { + content: "Heading6".to_string() + }], + level: 6, + } + ); + } + + #[test] + fn no_level_7_heading() { + let md = "####### Heading7"; + assert!(heading_block(md).is_err()); + } + + #[test] + fn single_quote_block_with_paragraph() { + let md = "> sun tzu\n"; + let (rem, block) = quote_block(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + block, + Block::Quote { + inner: Box::new(Block::Paragraph { + inner: vec![ + Inline::Text { content: "sun tzu".to_string() } + ] + }) + } + ); + } + + #[test] + fn heading_and_paragraph() { + let md = +"## Heading +Hello MD +"; + let (rem, blocks) = blocks(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + blocks, + vec![ + Block::Heading { + inner: vec![ + Inline::Text { content: "Heading".to_string() } + ], + level: 2 + }, + Block::Paragraph { + inner: vec![ + Inline::Text { content: "Hello MD".to_string() } + ] + } + ] + ); + } +} diff --git a/src/parser/inline.rs b/src/parser/inline.rs new file mode 100644 index 0000000..131d327 --- /dev/null +++ b/src/parser/inline.rs @@ -0,0 +1,287 @@ +#![allow(dead_code)] + +use nom::IResult; +use nom::{ + Parser, + branch::alt, + bytes::complete::{is_not, tag}, + error::context, + multi::many0, + sequence::delimited, +}; + +use crate::ast::{Inline, Href}; + + +pub fn inline(input: &str) -> IResult<&str, Vec> { + many0(alt((text_inline, bold_inline, italic_inline, code_inline, link_inline))).parse(input) +} + +fn text_inline(input: &str) -> IResult<&str, Inline> { + is_not("*_`[]\n").parse(input).map(|(rem, con)| { + ( + rem, + Inline::Text { + content: con.to_string(), + }, + ) + }) +} + +fn bold_inline(input: &str) -> IResult<&str, Inline> { + delimited( + context("opening bold tag", tag("*")), + inline, + context("closing bold tag", tag("*")), + ) + .parse(input) + .map(|(rem, inl)| (rem, Inline::Bold { inner: inl })) +} + +fn italic_inline(input: &str) -> IResult<&str, Inline> { + delimited( + context("opening italics tag", tag("_")), + inline, + context("closing italics tag", tag("_")), + ) + .parse(input) + .map(|(rem, inl)| (rem, Inline::Italic { inner: inl })) +} + +fn code_inline(input: &str) -> IResult<&str, Inline> { + delimited( + context("opening code tag", tag("`")), + context("inline code", is_not("`\n")), + context("closing code tag", tag("`")), + ) + .parse(input) + .map(|(rem, inl)| (rem, Inline::Code { content: inl.to_string() })) +} + +fn link_inline(input: &str) -> IResult<&str, Inline> { + ( + delimited( + context("opening link tag", tag("[")), + context("link name", inline), + context("closing link tag", tag("]")), + ), + delimited( + context("opening href tag", tag("(")), + context("link href", is_not(")\n")), + context("closing href tag", tag(")")), + ) + ) + .parse(input) + .map(|(rem, (name, href))| (rem, Inline::Link { inner: name, href: Href::new(href) })) +} + + +//|-------------------------------------------------------------------------------| +//| TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS | +//|-------------------------------------------------------------------------------| + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn single_text() { + let md = "hello normal inline"; + let (rem, parsed) = text_inline(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + parsed, + Inline::Text { + content: "hello normal inline".to_string() + } + ); + } + + #[test] + fn single_bold() { + let md = "*bold text*"; + let (rem, parsed) = bold_inline(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + parsed, + Inline::Bold { + inner: vec![Inline::Text { + content: "bold text".to_string() + }] + } + ); + } + + #[test] + fn bold_with_leftovers() { + let md = "*bold* leftover"; + let (rem, parsed) = bold_inline(md).unwrap(); + + assert_eq!(rem, " leftover"); + assert_eq!( + parsed, + Inline::Bold { + inner: vec![Inline::Text { + content: "bold".to_string() + }] + } + ) + } + + #[test] + fn inline_normal_and_bold() { + let md = "some *bold* text"; + let (rem, parsed) = inline(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + parsed, + vec![ + Inline::Text { + content: "some ".to_string() + }, + Inline::Bold { + inner: vec![Inline::Text { + content: "bold".to_string() + }] + }, + Inline::Text { + content: " text".to_string() + }, + ] + ) + } + + #[test] + fn multiple_normal_and_bold() { + let md = "some *bold* text and more *bold stuff*"; + let (rem, parsed) = inline(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + parsed, + vec![ + Inline::Text { + content: "some ".to_string() + }, + Inline::Bold { + inner: vec![Inline::Text { + content: "bold".to_string() + }] + }, + Inline::Text { + content: " text and more ".to_string() + }, + Inline::Bold { + inner: vec![Inline::Text { + content: "bold stuff".to_string() + }] + }, + ] + ); + } + + #[test] + fn normal_and_nested_bold() { + let md = "some **extra* bold* stuff"; + let (rem, parsed) = inline(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + parsed, + vec![ + Inline::Text { + content: "some ".to_string() + }, + Inline::Bold { inner: vec![] }, + Inline::Text { + content: "extra".to_string() + }, + Inline::Bold { + inner: vec![Inline::Text { + content: " bold".to_string() + }] + }, + Inline::Text { + content: " stuff".to_string() + }, + ] + ); + } + + #[test] + fn nested_bold_and_italics() { + let md = "some _nested *bold* + italics_, yeah"; + let (rem, parsed) = inline(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + parsed, + vec![ + Inline::Text { + content: "some ".to_string() + }, + Inline::Italic { + inner: vec![ + Inline::Text { + content: "nested ".to_string() + }, + Inline::Bold { + inner: vec![Inline::Text { + content: "bold".to_string() + }] + }, + Inline::Text { + content: " + italics".to_string() + }, + ] + }, + Inline::Text { + content: ", yeah".to_string() + }, + ] + ); + } + + #[test] + fn inline_code_bamboozle() { + let md = "take some `code and *bold* and _italics_` lmao"; + let (rem, parsed) = inline(md).unwrap(); + + assert_eq!(rem, ""); + assert_eq!( + parsed, + vec![ + Inline::Text { content: "take some ".to_string() }, + Inline::Code { content: "code and *bold* and _italics_".to_string() }, + Inline::Text { content: " lmao".to_string() } + ] + ); + } + + #[test] + fn bold_link_text() { + let md = "[this link is *important*](http://example.com)"; + let (rem, parsed) = link_inline(md).unwrap(); + + println!("{rem}"); + + assert_eq!(rem, ""); + assert_eq!( + parsed, + Inline::Link { + inner: vec![ + Inline::Text { content: "this link is ".to_string() }, + Inline::Bold { + inner: vec![Inline::Text { + content: "important".to_string() + }] + }, + ], + href: Href("http://example.com".to_string()) + } + ) + } +} diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..29867db --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,3 @@ +pub mod inline; +pub mod block; +