initial parser working
All checks were successful
Test the running changes / Test (push) Successful in 35s

This commit is contained in:
2025-12-11 01:46:57 +02:00
parent 617f124ac1
commit 30369cfdd3
10 changed files with 767 additions and 4 deletions

20
Cargo.lock generated
View File

@@ -4,4 +4,22 @@ version = 4
[[package]]
name = "marginal"
version = "0.1.0"
version = "0.0.1"
dependencies = [
"nom",
]
[[package]]
name = "memchr"
version = "2.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273"
[[package]]
name = "nom"
version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
dependencies = [
"memchr",
]

View File

@@ -4,3 +4,4 @@ version = "0.0.1"
edition = "2024"
[dependencies]
nom = "8.0.0"

132
bacon.toml Normal file
View File

@@ -0,0 +1,132 @@
# This is a configuration file for the bacon tool
#
# Complete help on configuration: https://dystroy.org/bacon/config/
#
# You may check the current default at
# https://github.com/Canop/bacon/blob/main/defaults/default-bacon.toml
default_job = "check"
env.CARGO_TERM_COLOR = "always"
[jobs.check]
command = ["cargo", "check"]
need_stdout = false
[jobs.check-all]
command = ["cargo", "check", "--all-targets"]
need_stdout = false
# Run clippy on the default target
[jobs.clippy]
command = ["cargo", "clippy"]
need_stdout = false
# Run clippy on all targets
# To disable some lints, you may change the job this way:
# [jobs.clippy-all]
# command = [
# "cargo", "clippy",
# "--all-targets",
# "--",
# "-A", "clippy::bool_to_int_with_if",
# "-A", "clippy::collapsible_if",
# "-A", "clippy::derive_partial_eq_without_eq",
# ]
# need_stdout = false
[jobs.clippy-all]
command = ["cargo", "clippy", "--all-targets"]
need_stdout = false
# Run clippy in pedantic mode
# The 'dismiss' feature may come handy
[jobs.pedantic]
command = [
"cargo", "clippy",
"--",
"-W", "clippy::pedantic",
]
need_stdout = false
# This job lets you run
# - all tests: bacon test
# - a specific test: bacon test -- config::test_default_files
# - the tests of a package: bacon test -- -- -p config
[jobs.test]
command = [
"cargo", "nextest", "run",
"--hide-progress-bar",
"--failure-output", "final",
"--no-fail-fast"
]
need_stdout = true
analyzer = "nextest"
[jobs.nextest]
command = [
"cargo", "nextest", "run",
"--hide-progress-bar",
"--failure-output", "final",
]
need_stdout = true
analyzer = "nextest"
[jobs.doc]
command = ["cargo", "doc", "--no-deps"]
need_stdout = false
# If the doc compiles, then it opens in your browser and bacon switches
# to the previous job
[jobs.doc-open]
command = ["cargo", "doc", "--no-deps", "--open"]
need_stdout = false
on_success = "back" # so that we don't open the browser at each change
# You can run your application and have the result displayed in bacon,
# if it makes sense for this crate.
[jobs.run]
command = [
"cargo", "run",
# put launch parameters for your program behind a `--` separator
]
need_stdout = true
allow_warnings = true
background = true
# Run your long-running application (eg server) and have the result displayed in bacon.
# For programs that never stop (eg a server), `background` is set to false
# to have the cargo run output immediately displayed instead of waiting for
# program's end.
# 'on_change_strategy' is set to `kill_then_restart` to have your program restart
# on every change (an alternative would be to use the 'F5' key manually in bacon).
# If you often use this job, it makes sense to override the 'r' key by adding
# a binding `r = job:run-long` at the end of this file .
# A custom kill command such as the one suggested below is frequently needed to kill
# long running programs (uncomment it if you need it)
[jobs.run-long]
command = [
"cargo", "run",
# put launch parameters for your program behind a `--` separator
]
need_stdout = true
allow_warnings = true
background = false
on_change_strategy = "kill_then_restart"
# kill = ["pkill", "-TERM", "-P"]
# This parameterized job runs the example of your choice, as soon
# as the code compiles.
# Call it as
# bacon ex -- my-example
[jobs.ex]
command = ["cargo", "run", "--example"]
need_stdout = true
allow_warnings = true
# You may define here keybindings that would be specific to
# a project, for example a shortcut to launch a specific job.
# Shortcuts to internal functions (scrolling, toggling, etc.)
# should go in your personal global prefs.toml file instead.
[keybindings]
# alt-m = "job:my-job"
c = "job:clippy-all" # comment this to have 'c' run clippy on only the default target
p = "job:pedantic"

View File

@@ -1 +0,0 @@
stable

10
rust-toolchain.toml Normal file
View File

@@ -0,0 +1,10 @@
[toolchain]
channel = "stable"
targets = [
"x86_64-unknown-linux-gnu"
]
components = [
"clippy",
"rustfmt",
"rust-analyzer"
]

69
src/ast.rs Normal file
View File

@@ -0,0 +1,69 @@
// Grammar rules:
//
// Markdown ::= Block Markdown | Block
//
// Block ::= (Heading | CodeBlock | Quote | Paragraph) "\n\n"
// Heading ::= "#{1,6}\s" Inline
// CodeBlock ::= "```.*\n" "(.*?\n)*" "```"
// Quote ::= ">" Block
// Paragraph ::= Inline
//
// Inline ::= InlineElem Inline | InlineElem
// InlineElem ::= Bold | Italic | Code | Link | Text
// Bold ::= "\*" Inline "\*"
// Italic ::= "_" Inline "_"
// Code ::= "`" "[.^`]*" "`"
// Link ::= "\[" Inline "\]\(" Href "\)"
// Href ::= "[.^\)]*"
// Text ::= "[.^`*_\[]*"
#[derive(Debug, PartialEq)]
pub enum Inline {
Bold { inner: Vec<Inline> },
Italic { inner: Vec<Inline> },
Link { inner: Vec<Inline>, href: Href },
Code { content: String },
Text { content: String },
}
#[derive(Debug, PartialEq)]
pub struct Href(pub String);
impl Href {
pub fn new(href: &str) -> Self {
// can check for link correctness
Self(href.to_string())
}
}
/*
pub struct Markdown {
block: Block,
rest: Option<Box<Markdown>>,
}
pub enum Block {
Heading(HeadingBlock),
Code(CodeBlock),
Quote(QuoteBlock),
Paragraph(ParagraphBlock),
}
pub struct HeadingBlock {
level: u8,
content: Inline,
}
pub struct CodeBlock {
lang: String,
content: String,
}
pub struct QuoteBlock {
content: Box<Block>,
}
pub struct ParagraphBlock {
content: String,
}
*/

View File

@@ -1,6 +1,7 @@
mod ast;
mod parser;
fn main() {
}

243
src/parser/block.rs Normal file
View File

@@ -0,0 +1,243 @@
#![allow(dead_code)]
use crate::{ast::Inline, parser::inline::inline};
use nom::{
IResult, Parser,
bytes::complete::{tag, take_until},
multi::{many_m_n, many1, many0},
sequence::{terminated, delimited},
branch::alt,
};
#[derive(Debug, PartialEq)]
pub enum Block {
Heading { inner: Vec<Inline>, level: u8 },
Code { content: String, lang: String },
Quote { inner: Box<Block> },
Paragraph { inner: Vec<Inline> },
}
pub fn blocks(input: &str) -> IResult<&str, Vec<Block>> {
many0(block).parse(input)
}
pub fn block(input: &str) -> IResult<&str, Block> {
terminated(
alt((heading_block, code_block, quote_block, paragraph_block)),
tag("\n"),
).parse(input)
}
fn paragraph_block(input: &str) -> IResult<&str, Block> {
(inline)
.parse(input)
.map(|(rem, inl)| (rem, Block::Paragraph { inner: inl }))
}
fn heading_block(input: &str) -> IResult<&str, Block> {
(many_m_n(1, 6, tag("#")), many1(tag(" ")), inline)
.parse(input)
.map(|(rem, (head, _, title))| {
(
rem,
Block::Heading {
inner: title,
level: head.len() as u8,
},
)
})
}
fn code_block(input: &str) -> IResult<&str, Block> {
delimited(
tag("```"),
(take_until("\n"), tag("\n"), take_until("```\n")),
tag("```\n"),
)
.parse(input)
.map(|(rem, (lang, _, code))| {
(
rem,
Block::Code {
content: code.to_string(),
lang: lang.to_string(),
},
)
})
}
fn quote_block(input: &str) -> IResult<&str, Block> {
(tag(">"), many0(tag(" ")), block).parse(input).map(|(rem, (_, _, inner))| {
(
rem,
Block::Quote {
inner: Box::new(inner),
},
)
})
}
//|-------------------------------------------------------------------------------|
//| TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS |
//|-------------------------------------------------------------------------------|
#[cfg(test)]
mod test {
use super::*;
use crate::ast::Inline;
#[test]
fn single_paragraph() {
let md = "Hello markdown!!";
let (rem, block) = paragraph_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Paragraph {
inner: vec![Inline::Text {
content: "Hello markdown!!".to_string()
}]
}
);
}
#[test]
fn single_code_block_with_language() {
let md = "```rust
fn main() {
\tprintln!(\"Hello, World\");
}
```
";
let (rem, block) = code_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Code {
content: "fn main() {\n\tprintln!(\"Hello, World\");\n}\n".to_string(),
lang: "rust".to_string(),
}
)
}
#[test]
fn single_code_block_without_language() {
let md = "```
echo \"hello world\"
```
";
let (rem, block) = code_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Code {
content: "echo \"hello world\"\n".to_string(),
lang: "".to_string(),
}
);
}
#[test]
fn single_code_block_fail() {
let md = "```abc
echo hello
```errortext
";
assert!(code_block(md).is_err());
}
#[test]
fn level_1_heading() {
let md = "## Heading2";
let (rem, block) = heading_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Heading {
inner: vec![Inline::Text {
content: "Heading2".to_string()
}],
level: 2,
}
);
}
#[test]
fn heading_no_space() {
let md = "#heading";
assert!(heading_block(md).is_err());
}
#[test]
fn level_6_heading() {
let md = "###### Heading6";
let (rem, block) = heading_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Heading {
inner: vec![Inline::Text {
content: "Heading6".to_string()
}],
level: 6,
}
);
}
#[test]
fn no_level_7_heading() {
let md = "####### Heading7";
assert!(heading_block(md).is_err());
}
#[test]
fn single_quote_block_with_paragraph() {
let md = "> sun tzu\n";
let (rem, block) = quote_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Quote {
inner: Box::new(Block::Paragraph {
inner: vec![
Inline::Text { content: "sun tzu".to_string() }
]
})
}
);
}
#[test]
fn heading_and_paragraph() {
let md =
"## Heading
Hello MD
";
let (rem, blocks) = blocks(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
blocks,
vec![
Block::Heading {
inner: vec![
Inline::Text { content: "Heading".to_string() }
],
level: 2
},
Block::Paragraph {
inner: vec![
Inline::Text { content: "Hello MD".to_string() }
]
}
]
);
}
}

287
src/parser/inline.rs Normal file
View File

@@ -0,0 +1,287 @@
#![allow(dead_code)]
use nom::IResult;
use nom::{
Parser,
branch::alt,
bytes::complete::{is_not, tag},
error::context,
multi::many0,
sequence::delimited,
};
use crate::ast::{Inline, Href};
pub fn inline(input: &str) -> IResult<&str, Vec<Inline>> {
many0(alt((text_inline, bold_inline, italic_inline, code_inline, link_inline))).parse(input)
}
fn text_inline(input: &str) -> IResult<&str, Inline> {
is_not("*_`[]\n").parse(input).map(|(rem, con)| {
(
rem,
Inline::Text {
content: con.to_string(),
},
)
})
}
fn bold_inline(input: &str) -> IResult<&str, Inline> {
delimited(
context("opening bold tag", tag("*")),
inline,
context("closing bold tag", tag("*")),
)
.parse(input)
.map(|(rem, inl)| (rem, Inline::Bold { inner: inl }))
}
fn italic_inline(input: &str) -> IResult<&str, Inline> {
delimited(
context("opening italics tag", tag("_")),
inline,
context("closing italics tag", tag("_")),
)
.parse(input)
.map(|(rem, inl)| (rem, Inline::Italic { inner: inl }))
}
fn code_inline(input: &str) -> IResult<&str, Inline> {
delimited(
context("opening code tag", tag("`")),
context("inline code", is_not("`\n")),
context("closing code tag", tag("`")),
)
.parse(input)
.map(|(rem, inl)| (rem, Inline::Code { content: inl.to_string() }))
}
fn link_inline(input: &str) -> IResult<&str, Inline> {
(
delimited(
context("opening link tag", tag("[")),
context("link name", inline),
context("closing link tag", tag("]")),
),
delimited(
context("opening href tag", tag("(")),
context("link href", is_not(")\n")),
context("closing href tag", tag(")")),
)
)
.parse(input)
.map(|(rem, (name, href))| (rem, Inline::Link { inner: name, href: Href::new(href) }))
}
//|-------------------------------------------------------------------------------|
//| TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS |
//|-------------------------------------------------------------------------------|
#[cfg(test)]
mod test {
use super::*;
#[test]
fn single_text() {
let md = "hello normal inline";
let (rem, parsed) = text_inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
Inline::Text {
content: "hello normal inline".to_string()
}
);
}
#[test]
fn single_bold() {
let md = "*bold text*";
let (rem, parsed) = bold_inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
Inline::Bold {
inner: vec![Inline::Text {
content: "bold text".to_string()
}]
}
);
}
#[test]
fn bold_with_leftovers() {
let md = "*bold* leftover";
let (rem, parsed) = bold_inline(md).unwrap();
assert_eq!(rem, " leftover");
assert_eq!(
parsed,
Inline::Bold {
inner: vec![Inline::Text {
content: "bold".to_string()
}]
}
)
}
#[test]
fn inline_normal_and_bold() {
let md = "some *bold* text";
let (rem, parsed) = inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
vec![
Inline::Text {
content: "some ".to_string()
},
Inline::Bold {
inner: vec![Inline::Text {
content: "bold".to_string()
}]
},
Inline::Text {
content: " text".to_string()
},
]
)
}
#[test]
fn multiple_normal_and_bold() {
let md = "some *bold* text and more *bold stuff*";
let (rem, parsed) = inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
vec![
Inline::Text {
content: "some ".to_string()
},
Inline::Bold {
inner: vec![Inline::Text {
content: "bold".to_string()
}]
},
Inline::Text {
content: " text and more ".to_string()
},
Inline::Bold {
inner: vec![Inline::Text {
content: "bold stuff".to_string()
}]
},
]
);
}
#[test]
fn normal_and_nested_bold() {
let md = "some **extra* bold* stuff";
let (rem, parsed) = inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
vec![
Inline::Text {
content: "some ".to_string()
},
Inline::Bold { inner: vec![] },
Inline::Text {
content: "extra".to_string()
},
Inline::Bold {
inner: vec![Inline::Text {
content: " bold".to_string()
}]
},
Inline::Text {
content: " stuff".to_string()
},
]
);
}
#[test]
fn nested_bold_and_italics() {
let md = "some _nested *bold* + italics_, yeah";
let (rem, parsed) = inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
vec![
Inline::Text {
content: "some ".to_string()
},
Inline::Italic {
inner: vec![
Inline::Text {
content: "nested ".to_string()
},
Inline::Bold {
inner: vec![Inline::Text {
content: "bold".to_string()
}]
},
Inline::Text {
content: " + italics".to_string()
},
]
},
Inline::Text {
content: ", yeah".to_string()
},
]
);
}
#[test]
fn inline_code_bamboozle() {
let md = "take some `code and *bold* and _italics_` lmao";
let (rem, parsed) = inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
vec![
Inline::Text { content: "take some ".to_string() },
Inline::Code { content: "code and *bold* and _italics_".to_string() },
Inline::Text { content: " lmao".to_string() }
]
);
}
#[test]
fn bold_link_text() {
let md = "[this link is *important*](http://example.com)";
let (rem, parsed) = link_inline(md).unwrap();
println!("{rem}");
assert_eq!(rem, "");
assert_eq!(
parsed,
Inline::Link {
inner: vec![
Inline::Text { content: "this link is ".to_string() },
Inline::Bold {
inner: vec![Inline::Text {
content: "important".to_string()
}]
},
],
href: Href("http://example.com".to_string())
}
)
}
}

3
src/parser/mod.rs Normal file
View File

@@ -0,0 +1,3 @@
pub mod inline;
pub mod block;