refactored markdown parsing, added some documentation
All checks were successful
Test the running changes / Test (push) Successful in 40s

This commit is contained in:
2025-11-15 01:41:14 +02:00
parent 05a0b32d9b
commit 8d47704b7e
18 changed files with 428 additions and 290 deletions

View File

@@ -1,19 +1,10 @@
use crate::to_html::ToHtml;
//! Abstract syntax tree of "Markdown".
#[derive(Debug, Clone, PartialEq)]
pub struct Document {
pub blocks: Vec<Block>,
}
impl ToHtml for Document {
fn to_html(self) -> String {
format!(
"<!doctype html><html lang=en><head></head><body>{}</body></html>",
self.blocks.to_html()
)
}
}
#[derive(Debug, Clone, PartialEq)]
pub enum Block {
Paragraph(Vec<Inline>),
@@ -29,24 +20,6 @@ pub enum Block {
Quote(Vec<Block>),
}
impl ToHtml for Block {
fn to_html(self) -> String {
match self {
Self::Paragraph(content) => format!("<p>{}</p>", content.to_html()),
Self::Heading { level, content } => {
format!("<h{}>{}</h{}>", level, content.to_html(), level)
}
Self::Code {
language: _,
content,
} => {
format!("<pre><code>{}</code></pre>", content)
}
_ => todo!(),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct ListItem {
pub blocks: Vec<Block>,
@@ -60,167 +33,3 @@ pub enum Inline {
Code(String),
Link { text: Vec<Inline>, href: String },
}
impl ToHtml for Inline {
fn to_html(self) -> String {
match self {
Self::Text(s) => s,
Self::Bold(content) => format!("<b>{}</b>", content.to_html()),
Self::Italic(content) => format!("<i>{}</i>", content.to_html()),
Self::Code(s) => format!("<code>{}</code>", s),
Self::Link { text, href } => format!("<a href=\"{}\">{}</a>", href, text.to_html()),
}
}
}
impl<T> ToHtml for Vec<T>
where
T: ToHtml,
{
fn to_html(self) -> String {
let mut rendered = String::new();
for i in self {
rendered.push_str(&i.to_html());
}
rendered
}
}
// --------------------
// TESTS
// --------------------
#[cfg(test)]
mod unit_test {
use super::*;
#[test]
fn single_header() {
let ast = Document {
blocks: vec![Block::Heading {
level: 1,
content: vec![Inline::Text("Heading 1".to_string())],
}],
};
let html = ast.to_html();
assert_eq!(
html,
"<!doctype html><html lang=en><head></head><body><h1>Heading 1</h1></body></html>"
);
}
#[test]
fn inline_bold_header() {
let ast = Document {
blocks: vec![Block::Heading {
level: 1,
content: vec![
Inline::Bold(vec![Inline::Text("Bold".to_string())]),
Inline::Text(" heading 1".to_string()),
],
}],
};
let html = ast.to_html();
assert_eq!(
html,
"<!doctype html><html lang=en><head></head><body><h1><b>Bold</b> heading 1</h1></body></html>"
);
}
#[test]
fn headings_and_paragraph_nested_code() {
let ast = Document {
blocks: vec![
Block::Heading {
level: 1,
content: vec![
Inline::Bold(vec![Inline::Text("Bold".to_string())]),
Inline::Text(" heading 1".to_string()),
],
},
Block::Heading {
level: 2,
content: vec![Inline::Text("Heading 2".to_string())],
},
Block::Paragraph(vec![
Inline::Text("run ".to_string()),
Inline::Code("sudo rm -rf /".to_string()),
Inline::Text(" on your computer".to_string()),
]),
],
};
let html = ast.to_html();
assert_eq!(
html,
"<!doctype html><html lang=en><head></head><body><h1><b>Bold</b> heading 1</h1><h2>Heading 2</h2><p>run <code>sudo rm -rf /</code> on your computer</p></body></html>"
);
}
}
#[cfg(test)]
mod convert_md_to_html_test {
use crate::parser::parse;
use crate::to_html::ToHtml;
#[test]
fn single_header() {
let md = "# Header 1";
let ast = match parse(md) {
Ok(a) => a,
Err(e) => panic!("{}", e),
};
let html = ast.to_html();
assert_eq!(
html,
"<!doctype html><html lang=en><head></head><body><h1>Header 1</h1></body></html>"
);
}
#[test]
fn single_header_wrong_format() {
let md = "#Whoops";
let ast = parse(md);
assert!(ast.is_err());
}
#[test]
fn nested_bold_headers_and_nested_code_paragraph() {
let md = "# *Bold* header 1\n## Header 2\nrun `sudo rm -rf /` on your computer";
let ast = match parse(md) {
Ok(a) => a,
Err(e) => panic!("{}", e),
};
let html = ast.to_html();
assert_eq!(
html,
"<!doctype html><html lang=en><head></head><body><h1><b>Bold</b> header 1</h1><h2>Header 2</h2><p>run <code>sudo rm -rf /</code> on your computer</p></body></html>"
);
}
}
#[cfg(test)]
mod parse_real_md {
use std::fs;
use crate::parser::parse;
#[test]
fn go() {
let file = "./test.md";
let md = fs::read_to_string(file).expect("reading ./test.md failed");
let _ast = match parse(&md).map_err(|e| e.set_file(file.into())) {
Ok(a) => a,
Err(e) => panic!("{}", e),
};
}
}

View File

@@ -1,4 +1,8 @@
//! A "Markdown" parser and HTML generator. Part of a static site generator `marksmith-rs`.
//! Not following any standards, only vibes.
#![deny(unused_imports)]
#![allow(clippy::needless_pass_by_value)]
use fstools::crawl_fs;
use parser::parse;
@@ -57,6 +61,7 @@ impl MdParseError {
}
*/
#[must_use]
pub fn set_line(self, line: usize) -> Self {
Self {
file: self.file,
@@ -67,6 +72,7 @@ impl MdParseError {
}
}
#[must_use]
pub fn set_file(self, file: PathBuf) -> Self {
Self {
file: Some(file),
@@ -98,7 +104,9 @@ impl std::error::Error for MdParseError {}
#[derive(Debug)]
pub enum Error {
InDirIsNotDir,
OutDirIsNotEmpty,
OutDirIsNotDir,
OutDirFileDeleteNotAllowed,
OutDirDirectoryInPlaceOfFile,
FileRead,
@@ -125,7 +133,18 @@ impl std::error::Error for Error {}
type Result<T> = std::result::Result<T, crate::Error>;
/// Takes two directories and a force flag as parameters, generates html files to the outdir in the
/// same directory structure as the md files in indir.
///
/// # Errors
/// Anything wrong with reading files from the directories or parsing the files.
pub fn generate(indir: &PathBuf, outdir: &PathBuf, force: bool) -> Result<()> {
if !indir.is_dir() {
Err(Error::InDirIsNotDir)?;
}
if !outdir.is_dir() {
Err(Error::OutDirIsNotDir)?;
}
let files = crawl_fs(indir);
for path in files {

View File

@@ -65,7 +65,7 @@ pub trait ParsePattern: Iterator + Clone {
}
*/
pub trait Parse: Iterator {
pub trait Parse: Iterator + Clone {
fn follows(&mut self, token: char) -> bool;
fn parse_token(&mut self, token: char) -> bool {
@@ -77,23 +77,32 @@ pub trait Parse: Iterator {
}
}
fn parse_str(&mut self, _tokens: &str) -> bool {
todo!()
fn parse_str(&mut self, tokens: &str) -> bool {
let mut cloned = self.clone();
for pat_token in tokens.chars() {
if cloned.follows(pat_token) {
cloned.next();
} else {
return false;
}
}
*self = cloned;
true
}
}
impl Parse for std::iter::Peekable<std::str::Chars<'_>> {
fn follows(&mut self, token: char) -> bool {
self.peek().map(|c| c == &token).unwrap_or(false)
self.peek().is_some_and(|c| c == &token)
}
}
impl Parse for std::iter::Peekable<std::iter::Enumerate<std::str::Chars<'_>>> {
fn follows(&mut self, token: char) -> bool {
self.peekable()
.peek()
.map(|&(_i, c)| c == token)
.unwrap_or(false)
self.peek().is_some_and(|&(_i, c)| c == token)
}
}
@@ -108,4 +117,60 @@ mod test {
assert!(c.follows('a'));
assert!(c.follows('a'));
}
#[test]
fn chars_parse_tokens() {
let mut c = "abcdef".chars().peekable();
assert!(c.parse_token('a'));
assert!(c.parse_token('b'));
}
#[test]
fn chars_parse_str() {
let mut c = "abcdef".chars().peekable();
assert!(c.parse_str("abc"));
assert!(c.parse_str("def"));
}
#[test]
fn enumerate_parse_follows_double() {
let mut c = "abc".chars().enumerate().peekable();
assert!(c.follows('a'));
assert!(c.follows('a'));
}
#[test]
fn enumerate_parse_tokens() {
let mut c = "abcdef".chars().enumerate().peekable();
assert!(c.parse_token('a'));
assert!(c.parse_token('b'));
}
#[test]
fn enumerate_parse_str() {
let mut c = "abcdef".chars().enumerate().peekable();
assert!(c.parse_str("abc"));
assert!(c.parse_str("def"));
}
#[test]
fn enumerate_parse_token_failed_not_consume() {
let mut c = "abc".chars().enumerate().peekable();
assert!(!c.parse_token('b'));
assert!(c.parse_token('a'));
}
#[test]
fn enumerate_parse_str_failed_not_consume() {
let mut c = "abcdef".chars().enumerate().peekable();
assert!(!c.parse_str("def"));
assert!(c.parse_str("abc"));
}
}

View File

@@ -1,3 +1,5 @@
//! Parse "Markdown" to AST.
mod block;
mod inline;
@@ -5,13 +7,15 @@ use block::parse_blocks;
use crate::{MdParseError, ast::Document};
/// Parses the incoming data to a Markdown abstract syntax tree.
/// # Errors
/// This function will return an `MdParseError` when any part of the input is invalid Markdown.
pub fn parse(s: &str) -> Result<Document, MdParseError> {
Ok(Document {
blocks: parse_blocks(s)?,
})
}
/*
#[cfg(test)]
mod test {
use crate::ast::*;
@@ -21,7 +25,7 @@ mod test {
fn only_paragraph() {
let md = "testing paragraph";
let doc = parse(md);
let doc = parse(md).unwrap();
assert_eq!(
doc,
Document {
@@ -36,7 +40,7 @@ mod test {
fn different_headers() {
let md = "# Header 1\n## Header 2";
let doc = parse(md);
let doc = parse(md).unwrap();
assert_eq!(
doc,
@@ -59,7 +63,7 @@ mod test {
fn inline_bold_and_italics() {
let md = "some *bold* and _italic_ text";
let doc = parse(md);
let doc = parse(md).unwrap();
assert_eq!(
doc,
@@ -79,7 +83,7 @@ mod test {
fn inline_code() {
let md = "run command `sudo rm -rf /`";
let doc = parse(md);
let doc = parse(md).unwrap();
assert_eq!(
doc,
@@ -96,7 +100,7 @@ mod test {
fn bold_header() {
let md = "# Header is *bold*";
let doc = parse(md);
let doc = parse(md).unwrap();
assert_eq!(
doc,
@@ -116,7 +120,7 @@ mod test {
fn anonymous_code_block() {
let md = "```\necho hello\n```";
let doc = parse(md);
let doc = parse(md).unwrap();
assert_eq!(
doc,
@@ -133,7 +137,7 @@ mod test {
fn rust_code_block() {
let md = "```rust\nfn main() {\n\tprintln!(\"Hello world!\");\n}\n```";
let doc = parse(md);
let doc = parse(md).unwrap();
assert_eq!(
doc,

View File

@@ -10,6 +10,11 @@ pub fn parse_blocks(input: &str) -> Result<Vec<Block>, MdParseError> {
while let Some((i, line)) = lines.next() {
let mut line_chars = line.chars().peekable();
// empty line
if line_chars.peek().is_none() {
continue;
}
// header
let mut heading_level = 0;
while line_chars.parse_token('#') {
@@ -53,6 +58,7 @@ pub fn parse_blocks(input: &str) -> Result<Vec<Block>, MdParseError> {
};
let mut code = String::new();
let mut successful = false;
for (j, line) in lines.by_ref() {
let mut code_line_chars = line.chars().peekable();
// code block end
@@ -63,23 +69,31 @@ pub fn parse_blocks(input: &str) -> Result<Vec<Block>, MdParseError> {
language: lang,
content: code,
});
successful = true;
break;
} else {
Err(MdParseError::from_line(
j + 1,
"```",
format!("```{}", remaining),
))?;
}
Err(MdParseError::from_line(
j + 1,
"```",
format!("```{remaining}"),
))?;
} else {
code.push_str(line);
code.push('\n');
}
}
if successful {
continue;
}
Err(MdParseError::from_line(i + 1, "a terminating '```'", ""))?;
}
// lists TODO
// paragraph
blocks.push(Block::Paragraph(
parse_inlines(line).map_err(|e| e.set_line(i + 1))?,
));
}
Ok(blocks)

View File

@@ -36,11 +36,12 @@ pub fn parse_inlines(input: &str) -> Result<Vec<Inline>, MdParseError> {
_ => {
let mut text = String::new();
text.push(c);
while let Some(nc) = chars.next() {
while let Some(&nc) = chars.peek() {
if matches!(nc, '*' | '_' | '`' | '[') {
break;
}
text.push(nc);
let c = chars.next().ok_or(MdParseError::new("a character", ""))?;
text.push(c);
}
inlines.push(Inline::Text(text));
}
@@ -55,7 +56,7 @@ fn collect_until<I: Iterator<Item = char>>(
end: char,
) -> Result<String, MdParseError> {
let mut s = String::new();
while let Some(c) = chars.next() {
for c in chars.by_ref() {
if c == end {
return Ok(s);
}
@@ -63,3 +64,68 @@ fn collect_until<I: Iterator<Item = char>>(
}
Err(MdParseError::new(end, ""))
}
#[cfg(test)]
mod test {
use crate::ast::Inline;
use super::parse_inlines;
#[test]
fn bold_text() {
let md = "*abc*";
let inl = parse_inlines(md).unwrap();
assert_eq!(
inl,
vec![Inline::Bold(vec![Inline::Text("abc".to_string())])]
);
}
#[test]
fn italic_text() {
let md = "_abc_";
let inl = parse_inlines(md).unwrap();
assert_eq!(
inl,
vec![Inline::Italic(vec![Inline::Text("abc".to_string())])]
);
}
#[test]
fn bold_italic_text() {
let md = "*_abc_*";
let inl = parse_inlines(md).unwrap();
assert_eq!(
inl,
vec![Inline::Bold(vec![Inline::Italic(vec![Inline::Text(
"abc".to_string()
)])])]
);
}
#[test]
fn code() {
let md = "`sudo rm -rf /`";
let inl = parse_inlines(md).unwrap();
assert_eq!(inl, vec![Inline::Code("sudo rm -rf /".to_string())]);
}
#[test]
fn text_and_code() {
let md = "run `sudo rm -rf /` on your computer";
let inl = parse_inlines(md).unwrap();
assert_eq!(
inl,
vec![
Inline::Text("run ".to_string()),
Inline::Code("sudo rm -rf /".to_string()),
Inline::Text(" on your computer".to_string())
]
);
}
}

View File

@@ -1,3 +1,198 @@
//! A trait + implementations for generating HTML.
use crate::ast::{Block, Document, Inline};
pub trait ToHtml {
fn to_html(self) -> String;
}
impl ToHtml for Document {
fn to_html(self) -> String {
format!(
"<!doctype html><html lang=en><head></head><body>{}</body></html>",
self.blocks.to_html()
)
}
}
impl ToHtml for Block {
fn to_html(self) -> String {
match self {
Self::Paragraph(content) => format!("<p>{}</p>", content.to_html()),
Self::Heading { level, content } => {
format!("<h{}>{}</h{}>", level, content.to_html(), level)
}
Self::Code {
language: _,
content,
} => {
format!("<pre><code>{content}</code></pre>")
}
_ => todo!(),
}
}
}
impl ToHtml for Inline {
fn to_html(self) -> String {
match self {
Self::Text(s) => s,
Self::Bold(content) => format!("<b>{}</b>", content.to_html()),
Self::Italic(content) => format!("<i>{}</i>", content.to_html()),
Self::Code(s) => format!("<code>{s}</code>"),
Self::Link { text, href } => format!("<a href=\"{}\">{}</a>", href, text.to_html()),
}
}
}
impl<T> ToHtml for Vec<T>
where
T: ToHtml,
{
fn to_html(self) -> String {
let mut rendered = String::new();
for i in self {
rendered.push_str(&i.to_html());
}
rendered
}
}
// --------------------
// TESTS
// --------------------
#[cfg(test)]
mod unit_test {
use super::*;
#[test]
fn single_header() {
let ast = Document {
blocks: vec![Block::Heading {
level: 1,
content: vec![Inline::Text("Heading 1".to_string())],
}],
};
let html = ast.to_html();
assert_eq!(
html,
"<!doctype html><html lang=en><head></head><body><h1>Heading 1</h1></body></html>"
);
}
#[test]
fn inline_bold_header() {
let ast = Document {
blocks: vec![Block::Heading {
level: 1,
content: vec![
Inline::Bold(vec![Inline::Text("Bold".to_string())]),
Inline::Text(" heading 1".to_string()),
],
}],
};
let html = ast.to_html();
assert_eq!(
html,
"<!doctype html><html lang=en><head></head><body><h1><b>Bold</b> heading 1</h1></body></html>"
);
}
#[test]
fn headings_and_paragraph_nested_code() {
let ast = Document {
blocks: vec![
Block::Heading {
level: 1,
content: vec![
Inline::Bold(vec![Inline::Text("Bold".to_string())]),
Inline::Text(" heading 1".to_string()),
],
},
Block::Heading {
level: 2,
content: vec![Inline::Text("Heading 2".to_string())],
},
Block::Paragraph(vec![
Inline::Text("run ".to_string()),
Inline::Code("sudo rm -rf /".to_string()),
Inline::Text(" on your computer".to_string()),
]),
],
};
let html = ast.to_html();
assert_eq!(
html,
"<!doctype html><html lang=en><head></head><body><h1><b>Bold</b> heading 1</h1><h2>Heading 2</h2><p>run <code>sudo rm -rf /</code> on your computer</p></body></html>"
);
}
}
#[cfg(test)]
mod convert_md_to_html_test {
use crate::parser::parse;
use crate::to_html::ToHtml;
#[test]
fn single_header() {
let md = "# Header 1";
let ast = match parse(md) {
Ok(a) => a,
Err(e) => panic!("{}", e),
};
let html = ast.to_html();
assert_eq!(
html,
"<!doctype html><html lang=en><head></head><body><h1>Header 1</h1></body></html>"
);
}
#[test]
fn single_header_wrong_format() {
let md = "#Whoops";
let ast = parse(md);
assert!(ast.is_err());
}
#[test]
fn nested_bold_headers_and_nested_code_paragraph() {
let md = "# *Bold* header 1\n## Header 2\nrun `sudo rm -rf /` on your computer";
let ast = match parse(md) {
Ok(a) => a,
Err(e) => panic!("{}", e),
};
let html = ast.to_html();
assert_eq!(
html,
"<!doctype html><html lang=en><head></head><body><h1><b>Bold</b> header 1</h1><h2>Header 2</h2><p>run <code>sudo rm -rf /</code> on your computer</p></body></html>"
);
}
}
#[cfg(test)]
mod parse_real_md {
use std::fs;
use crate::parser::parse;
#[test]
fn go() {
let file = "./test.md";
let md = fs::read_to_string(file).expect("reading ./test.md failed");
let _ast = match parse(&md).map_err(|e| e.set_file(file.into())) {
Ok(a) => a,
Err(e) => panic!("{}", e),
};
}
}

View File

@@ -1,7 +1,7 @@
# Header *1kkkkkkkkkkkkkkkkkkkkkk*
this is some code: `abc
this is some code: `abc`
```code
oiajwefoijao089uaoisdjfoijasdfoijasdofij
```