initial parser working
All checks were successful
Test the running changes / Test (push) Successful in 35s

This commit is contained in:
2025-12-11 01:46:57 +02:00
parent 617f124ac1
commit 30369cfdd3
10 changed files with 767 additions and 4 deletions

243
src/parser/block.rs Normal file
View File

@@ -0,0 +1,243 @@
#![allow(dead_code)]
use crate::{ast::Inline, parser::inline::inline};
use nom::{
IResult, Parser,
bytes::complete::{tag, take_until},
multi::{many_m_n, many1, many0},
sequence::{terminated, delimited},
branch::alt,
};
#[derive(Debug, PartialEq)]
pub enum Block {
Heading { inner: Vec<Inline>, level: u8 },
Code { content: String, lang: String },
Quote { inner: Box<Block> },
Paragraph { inner: Vec<Inline> },
}
pub fn blocks(input: &str) -> IResult<&str, Vec<Block>> {
many0(block).parse(input)
}
pub fn block(input: &str) -> IResult<&str, Block> {
terminated(
alt((heading_block, code_block, quote_block, paragraph_block)),
tag("\n"),
).parse(input)
}
fn paragraph_block(input: &str) -> IResult<&str, Block> {
(inline)
.parse(input)
.map(|(rem, inl)| (rem, Block::Paragraph { inner: inl }))
}
fn heading_block(input: &str) -> IResult<&str, Block> {
(many_m_n(1, 6, tag("#")), many1(tag(" ")), inline)
.parse(input)
.map(|(rem, (head, _, title))| {
(
rem,
Block::Heading {
inner: title,
level: head.len() as u8,
},
)
})
}
fn code_block(input: &str) -> IResult<&str, Block> {
delimited(
tag("```"),
(take_until("\n"), tag("\n"), take_until("```\n")),
tag("```\n"),
)
.parse(input)
.map(|(rem, (lang, _, code))| {
(
rem,
Block::Code {
content: code.to_string(),
lang: lang.to_string(),
},
)
})
}
fn quote_block(input: &str) -> IResult<&str, Block> {
(tag(">"), many0(tag(" ")), block).parse(input).map(|(rem, (_, _, inner))| {
(
rem,
Block::Quote {
inner: Box::new(inner),
},
)
})
}
//|-------------------------------------------------------------------------------|
//| TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS |
//|-------------------------------------------------------------------------------|
#[cfg(test)]
mod test {
use super::*;
use crate::ast::Inline;
#[test]
fn single_paragraph() {
let md = "Hello markdown!!";
let (rem, block) = paragraph_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Paragraph {
inner: vec![Inline::Text {
content: "Hello markdown!!".to_string()
}]
}
);
}
#[test]
fn single_code_block_with_language() {
let md = "```rust
fn main() {
\tprintln!(\"Hello, World\");
}
```
";
let (rem, block) = code_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Code {
content: "fn main() {\n\tprintln!(\"Hello, World\");\n}\n".to_string(),
lang: "rust".to_string(),
}
)
}
#[test]
fn single_code_block_without_language() {
let md = "```
echo \"hello world\"
```
";
let (rem, block) = code_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Code {
content: "echo \"hello world\"\n".to_string(),
lang: "".to_string(),
}
);
}
#[test]
fn single_code_block_fail() {
let md = "```abc
echo hello
```errortext
";
assert!(code_block(md).is_err());
}
#[test]
fn level_1_heading() {
let md = "## Heading2";
let (rem, block) = heading_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Heading {
inner: vec![Inline::Text {
content: "Heading2".to_string()
}],
level: 2,
}
);
}
#[test]
fn heading_no_space() {
let md = "#heading";
assert!(heading_block(md).is_err());
}
#[test]
fn level_6_heading() {
let md = "###### Heading6";
let (rem, block) = heading_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Heading {
inner: vec![Inline::Text {
content: "Heading6".to_string()
}],
level: 6,
}
);
}
#[test]
fn no_level_7_heading() {
let md = "####### Heading7";
assert!(heading_block(md).is_err());
}
#[test]
fn single_quote_block_with_paragraph() {
let md = "> sun tzu\n";
let (rem, block) = quote_block(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
block,
Block::Quote {
inner: Box::new(Block::Paragraph {
inner: vec![
Inline::Text { content: "sun tzu".to_string() }
]
})
}
);
}
#[test]
fn heading_and_paragraph() {
let md =
"## Heading
Hello MD
";
let (rem, blocks) = blocks(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
blocks,
vec![
Block::Heading {
inner: vec![
Inline::Text { content: "Heading".to_string() }
],
level: 2
},
Block::Paragraph {
inner: vec![
Inline::Text { content: "Hello MD".to_string() }
]
}
]
);
}
}

287
src/parser/inline.rs Normal file
View File

@@ -0,0 +1,287 @@
#![allow(dead_code)]
use nom::IResult;
use nom::{
Parser,
branch::alt,
bytes::complete::{is_not, tag},
error::context,
multi::many0,
sequence::delimited,
};
use crate::ast::{Inline, Href};
pub fn inline(input: &str) -> IResult<&str, Vec<Inline>> {
many0(alt((text_inline, bold_inline, italic_inline, code_inline, link_inline))).parse(input)
}
fn text_inline(input: &str) -> IResult<&str, Inline> {
is_not("*_`[]\n").parse(input).map(|(rem, con)| {
(
rem,
Inline::Text {
content: con.to_string(),
},
)
})
}
fn bold_inline(input: &str) -> IResult<&str, Inline> {
delimited(
context("opening bold tag", tag("*")),
inline,
context("closing bold tag", tag("*")),
)
.parse(input)
.map(|(rem, inl)| (rem, Inline::Bold { inner: inl }))
}
fn italic_inline(input: &str) -> IResult<&str, Inline> {
delimited(
context("opening italics tag", tag("_")),
inline,
context("closing italics tag", tag("_")),
)
.parse(input)
.map(|(rem, inl)| (rem, Inline::Italic { inner: inl }))
}
fn code_inline(input: &str) -> IResult<&str, Inline> {
delimited(
context("opening code tag", tag("`")),
context("inline code", is_not("`\n")),
context("closing code tag", tag("`")),
)
.parse(input)
.map(|(rem, inl)| (rem, Inline::Code { content: inl.to_string() }))
}
fn link_inline(input: &str) -> IResult<&str, Inline> {
(
delimited(
context("opening link tag", tag("[")),
context("link name", inline),
context("closing link tag", tag("]")),
),
delimited(
context("opening href tag", tag("(")),
context("link href", is_not(")\n")),
context("closing href tag", tag(")")),
)
)
.parse(input)
.map(|(rem, (name, href))| (rem, Inline::Link { inner: name, href: Href::new(href) }))
}
//|-------------------------------------------------------------------------------|
//| TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS TESTS |
//|-------------------------------------------------------------------------------|
#[cfg(test)]
mod test {
use super::*;
#[test]
fn single_text() {
let md = "hello normal inline";
let (rem, parsed) = text_inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
Inline::Text {
content: "hello normal inline".to_string()
}
);
}
#[test]
fn single_bold() {
let md = "*bold text*";
let (rem, parsed) = bold_inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
Inline::Bold {
inner: vec![Inline::Text {
content: "bold text".to_string()
}]
}
);
}
#[test]
fn bold_with_leftovers() {
let md = "*bold* leftover";
let (rem, parsed) = bold_inline(md).unwrap();
assert_eq!(rem, " leftover");
assert_eq!(
parsed,
Inline::Bold {
inner: vec![Inline::Text {
content: "bold".to_string()
}]
}
)
}
#[test]
fn inline_normal_and_bold() {
let md = "some *bold* text";
let (rem, parsed) = inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
vec![
Inline::Text {
content: "some ".to_string()
},
Inline::Bold {
inner: vec![Inline::Text {
content: "bold".to_string()
}]
},
Inline::Text {
content: " text".to_string()
},
]
)
}
#[test]
fn multiple_normal_and_bold() {
let md = "some *bold* text and more *bold stuff*";
let (rem, parsed) = inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
vec![
Inline::Text {
content: "some ".to_string()
},
Inline::Bold {
inner: vec![Inline::Text {
content: "bold".to_string()
}]
},
Inline::Text {
content: " text and more ".to_string()
},
Inline::Bold {
inner: vec![Inline::Text {
content: "bold stuff".to_string()
}]
},
]
);
}
#[test]
fn normal_and_nested_bold() {
let md = "some **extra* bold* stuff";
let (rem, parsed) = inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
vec![
Inline::Text {
content: "some ".to_string()
},
Inline::Bold { inner: vec![] },
Inline::Text {
content: "extra".to_string()
},
Inline::Bold {
inner: vec![Inline::Text {
content: " bold".to_string()
}]
},
Inline::Text {
content: " stuff".to_string()
},
]
);
}
#[test]
fn nested_bold_and_italics() {
let md = "some _nested *bold* + italics_, yeah";
let (rem, parsed) = inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
vec![
Inline::Text {
content: "some ".to_string()
},
Inline::Italic {
inner: vec![
Inline::Text {
content: "nested ".to_string()
},
Inline::Bold {
inner: vec![Inline::Text {
content: "bold".to_string()
}]
},
Inline::Text {
content: " + italics".to_string()
},
]
},
Inline::Text {
content: ", yeah".to_string()
},
]
);
}
#[test]
fn inline_code_bamboozle() {
let md = "take some `code and *bold* and _italics_` lmao";
let (rem, parsed) = inline(md).unwrap();
assert_eq!(rem, "");
assert_eq!(
parsed,
vec![
Inline::Text { content: "take some ".to_string() },
Inline::Code { content: "code and *bold* and _italics_".to_string() },
Inline::Text { content: " lmao".to_string() }
]
);
}
#[test]
fn bold_link_text() {
let md = "[this link is *important*](http://example.com)";
let (rem, parsed) = link_inline(md).unwrap();
println!("{rem}");
assert_eq!(rem, "");
assert_eq!(
parsed,
Inline::Link {
inner: vec![
Inline::Text { content: "this link is ".to_string() },
Inline::Bold {
inner: vec![Inline::Text {
content: "important".to_string()
}]
},
],
href: Href("http://example.com".to_string())
}
)
}
}

3
src/parser/mod.rs Normal file
View File

@@ -0,0 +1,3 @@
pub mod inline;
pub mod block;