refactor md parser, TODO: parse_str

This commit is contained in:
2025-11-14 02:22:51 +02:00
parent d74613aa93
commit 446a27c040
7 changed files with 382 additions and 42 deletions

View File

@@ -1,22 +1,106 @@
use crate::ast::Block;
use super::inline::parse_inlines;
use crate::{MdParseError, ast::Block};
pub fn parse_blocks(input: &str) -> Vec<Block> {
use crate::parse_trait::Parse;
pub fn parse_blocks(input: &str) -> Result<Vec<Block>, MdParseError> {
let mut blocks = Vec::new();
let mut lines = input.lines().enumerate().peekable();
while let Some((i, line)) = lines.next() {
let mut line_chars = line.chars().peekable();
// header
let mut heading_level = 0;
while line_chars.parse_token('#') {
if heading_level < 6 {
heading_level += 1;
}
}
if heading_level > 0 {
if !line_chars.parse_token(' ') {
Err(MdParseError::from_line(
i + 1,
"<space> after #",
"no <space>",
))?;
}
let line_content: String = line_chars.collect();
blocks.push(Block::Heading {
level: heading_level,
content: parse_inlines(&line_content)?,
});
continue;
}
// quote TODO
/*
if line_chars.parse_str("> ") {
let content: String = line_chars.collect();
let quote_blocks = parse_blocks(&content).map_err(|e| e.set_line(i + 1))?;
blocks.push(Block::Quote(quote_blocks));
continue;
}
*/
// code
if line_chars.parse_str("```") {
let lang_line: String = line_chars.collect();
let lang = if lang_line.is_empty() {
None
} else {
Some(lang_line)
};
let mut code = String::new();
for (j, line) in lines.by_ref() {
let mut code_line_chars = line.chars().peekable();
// code block end
if code_line_chars.parse_str("```") {
let remaining: String = code_line_chars.collect();
if remaining.is_empty() {
blocks.push(Block::Code {
language: lang,
content: code,
});
break;
} else {
Err(MdParseError::from_line(
j + 1,
"```",
format!("```{}", remaining),
))?;
}
} else {
code.push_str(line);
code.push('\n');
}
}
Err(MdParseError::from_line(i + 1, "a terminating '```'", ""))?;
}
// lists TODO
}
Ok(blocks)
}
/*
pub fn parse_blocks(input: &str) -> Result<Vec<Block>, MdParseError> {
let mut blocks = Vec::new();
let mut lines = input.lines().peekable();
let mut lines = input.lines().enumerate().peekable();
while let Some(line) = lines.next() {
while let Some((i, line)) = lines.next() {
if line.starts_with("#") {
let level = line.chars().take_while(|&c| c == '#').count() as u8;
let text = line[level as usize..].trim();
blocks.push(Block::Heading {
level,
content: parse_inlines(text),
content: parse_inlines(text).map_err(|e| e.set_line(i + 1))?,
});
} else if let Some(quote_body) = line.strip_prefix(">") {
let quote_blocks = parse_blocks(quote_body);
let quote_blocks = parse_blocks(quote_body).map_err(|e| e.set_line(i + 1))?;
blocks.push(Block::Quote(quote_blocks));
} else if line.starts_with("```") {
let lang_line = line.strip_prefix("```").unwrap().to_string();
@@ -26,8 +110,16 @@ pub fn parse_blocks(input: &str) -> Vec<Block> {
Some(lang_line)
};
let mut code = String::new();
while lines.peek().is_some() && !lines.peek().unwrap().starts_with("```") {
code.push_str(&format!("{}\n", lines.next().unwrap()));
while lines.peek().is_some()
&& !lines
.peek()
.ok_or(MdParseError::from_line(i + 1, "a line", ""))?
.1
.starts_with("```")
{
if let Some((_i, l)) = lines.next() {
code.push_str(&format!("{}\n", l));
}
}
lines.next();
blocks.push(Block::Code {
@@ -37,9 +129,12 @@ pub fn parse_blocks(input: &str) -> Vec<Block> {
} else if line.trim().is_empty() {
continue;
} else {
blocks.push(Block::Paragraph(parse_inlines(line)));
blocks.push(Block::Paragraph(
parse_inlines(line).map_err(|e| e.set_line(i + 1))?,
));
}
}
blocks
Ok(blocks)
}
*/

View File

@@ -1,61 +1,65 @@
use crate::ast::Inline;
use crate::{MdParseError, ast::Inline};
pub fn parse_inlines(input: &str) -> Vec<Inline> {
pub fn parse_inlines(input: &str) -> Result<Vec<Inline>, MdParseError> {
let mut inlines = Vec::new();
let mut chars = input.chars().peekable();
while let Some(c) = chars.next() {
match c {
'*' => {
let inner = collect_until(&mut chars, '*');
inlines.push(Inline::Bold(parse_inlines(&inner)));
let inner = collect_until(&mut chars, '*')?;
inlines.push(Inline::Bold(parse_inlines(&inner)?));
}
'_' => {
let inner = collect_until(&mut chars, '_');
inlines.push(Inline::Italic(parse_inlines(&inner)));
let inner = collect_until(&mut chars, '_')?;
inlines.push(Inline::Italic(parse_inlines(&inner)?));
}
'`' => {
let code = collect_until(&mut chars, '`');
let code = collect_until(&mut chars, '`')?;
inlines.push(Inline::Code(code));
}
'[' => {
let text = collect_until(&mut chars, ']');
if chars.next() == Some('(') {
let href = collect_until(&mut chars, ')');
let text = collect_until(&mut chars, ']')?;
if let Some('(') = chars.next() {
let href = collect_until(&mut chars, ')')?;
inlines.push(Inline::Link {
text: parse_inlines(&text),
text: parse_inlines(&text)?,
href,
});
} else {
Err(MdParseError::new(
"(<href>)",
chars.next().unwrap_or_default(),
))?;
}
}
_ => {
let mut text = String::new();
text.push(c);
while let Some(&nc) = chars.peek() {
while let Some(nc) = chars.next() {
if matches!(nc, '*' | '_' | '`' | '[') {
break;
}
text.push(chars.next().unwrap());
text.push(nc);
}
inlines.push(Inline::Text(text));
}
}
}
inlines
Ok(inlines)
}
fn collect_until<I: Iterator<Item = char>>(
chars: &mut std::iter::Peekable<I>,
end: char,
) -> String {
) -> Result<String, MdParseError> {
let mut s = String::new();
while let Some(&c) = chars.peek() {
while let Some(c) = chars.next() {
if c == end {
chars.next();
break;
return Ok(s);
}
s.push(chars.next().unwrap());
s.push(c);
}
s
Err(MdParseError::new(end, ""))
}