diff --git a/cracked_md/src/ast.rs b/cracked_md/src/ast.rs index b8d34a3..f081930 100644 --- a/cracked_md/src/ast.rs +++ b/cracked_md/src/ast.rs @@ -171,8 +171,11 @@ mod convert_md_to_html_test { #[test] fn single_header() { let md = "# Header 1"; - - let html = parse(md).to_html(); + let ast = match parse(md) { + Ok(a) => a, + Err(e) => panic!("{}", e), + }; + let html = ast.to_html(); assert_eq!( html, @@ -180,11 +183,22 @@ mod convert_md_to_html_test { ); } + #[test] + fn single_header_wrong_format() { + let md = "#Whoops"; + let ast = parse(md); + + assert!(ast.is_err()); + } + #[test] fn nested_bold_headers_and_nested_code_paragraph() { let md = "# *Bold* header 1\n## Header 2\nrun `sudo rm -rf /` on your computer"; - - let html = parse(md).to_html(); + let ast = match parse(md) { + Ok(a) => a, + Err(e) => panic!("{}", e), + }; + let html = ast.to_html(); assert_eq!( html, @@ -192,3 +206,21 @@ mod convert_md_to_html_test { ); } } + +#[cfg(test)] +mod parse_real_md { + use std::fs; + + use crate::parser::parse; + + #[test] + fn go() { + let file = "./test.md"; + let md = fs::read_to_string(file).expect("reading ./test.md failed"); + + let _ast = match parse(&md).map_err(|e| e.set_file(file.into())) { + Ok(a) => a, + Err(e) => panic!("{}", e), + }; + } +} diff --git a/cracked_md/src/lib.rs b/cracked_md/src/lib.rs index 7a5a4c0..7589724 100644 --- a/cracked_md/src/lib.rs +++ b/cracked_md/src/lib.rs @@ -1,4 +1,4 @@ -#![deny(dead_code, unused_imports)] +#![deny(unused_imports)] use fstools::crawl_fs; use parser::parse; @@ -11,9 +11,91 @@ use std::{ use to_html::ToHtml; pub mod ast; +mod parse_trait; pub mod parser; pub mod to_html; +#[derive(Debug)] +pub struct MdParseError { + file: Option, + line: Option, + //col: Option, + expected: String, + got: String, +} + +impl MdParseError { + pub fn new(expected: impl ToString, got: impl ToString) -> Self { + Self { + file: None, + line: None, + //col: None, + expected: expected.to_string(), + got: got.to_string(), + } + } + + pub fn from_line(line: usize, expected: impl ToString, got: impl ToString) -> Self { + Self { + file: None, + line: Some(line), + //col: None, + expected: expected.to_string(), + got: got.to_string(), + } + } + + /* + pub fn from_col(col: usize, expected: impl ToString, got: impl ToString) -> Self { + Self { + file: None, + line: None, + col: Some(col), + expected: expected.to_string(), + got: got.to_string(), + } + } + */ + + pub fn set_line(self, line: usize) -> Self { + Self { + file: self.file, + line: Some(line), + //col: self.col, + expected: self.expected, + got: self.got, + } + } + + pub fn set_file(self, file: PathBuf) -> Self { + Self { + file: Some(file), + line: self.line, + //col: self.col, + expected: self.expected, + got: self.got, + } + } +} + +impl Display for MdParseError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + // no error message :/ + let file = self.file.clone().unwrap_or("".into()); + write!( + f, + "Parse error in '{}' on line {}: expected '{}', got '{}'", + file.display(), + self.line.unwrap_or(0), + //self.col.unwrap_or(0), + self.expected, + self.got + ) + } +} + +impl std::error::Error for MdParseError {} + #[derive(Debug)] pub enum Error { OutDirIsNotEmpty, @@ -24,6 +106,7 @@ pub enum Error { FileWrite, FileCreate, DirCreate, + Parse(MdParseError), } impl Display for Error { @@ -32,6 +115,12 @@ impl Display for Error { } } +impl From for Error { + fn from(value: MdParseError) -> Self { + Error::Parse(value) + } +} + impl std::error::Error for Error {} type Result = std::result::Result; @@ -44,7 +133,7 @@ pub fn generate(indir: &PathBuf, outdir: &PathBuf, force: bool) -> Result<()> { // read and parse md file let content = fs::read_to_string(&fullpath).map_err(|_e| Error::FileRead)?; - let html = parse(&content).to_html(); + let html = parse(&content)?.to_html(); // write html data to file let mut newpath = outdir.to_owned(); diff --git a/cracked_md/src/parse_trait.rs b/cracked_md/src/parse_trait.rs new file mode 100644 index 0000000..932387f --- /dev/null +++ b/cracked_md/src/parse_trait.rs @@ -0,0 +1,111 @@ +/* + +use crate::MdParseError; + +pub type Pattern = Vec>; + +pub enum PatternToken { + Once(T), + Optional(T), + AtLeastOnce(T), + NTimes(T), +} + +/// panics: on invalid pattern +pub fn char_pattern(s: &str) -> Pattern { + let mut s_chars = s.chars().peekable(); + let mut pat: Pattern = Vec::new(); + while let Some(token) = s_chars.next() { + pat.push(if let Some(&next) = s_chars.peek() { + match next { + '?' => { + s_chars.next().unwrap(); + PatternToken::Optional(token) + } + '+' => { + s_chars.next().unwrap(); + PatternToken::AtLeastOnce(token) + } + '*' => { + s_chars.next().unwrap(); + PatternToken::NTimes(token) + } + _ => PatternToken::Once(token), + } + } else { + PatternToken::Once(token) + }); + } + pat +} + +pub trait ParsePattern: Iterator + Clone { + fn parse(&mut self, expect: Pattern) -> Result, MdParseError> + where + T: PartialEq<::Item>, + { + let mut consumed = Vec::new(); + let mut cloned = self.clone(); + + for pat_token in expect { + match pat_token { + PatternToken::Once(c) => { + if !cloned.next().map(|v| c == v).unwrap_or(false) { + return None; + } + } + PatternToken::Optional(c) => if cloned.peek().map(|v| c == *v).unwrap_or(false) {}, + } + } + + *self = cloned; + + Some(consumed) + } +} +*/ + +pub trait Parse: Iterator { + fn follows(&mut self, token: char) -> bool; + + fn parse_token(&mut self, token: char) -> bool { + if self.follows(token) { + let _ = self.next(); + true + } else { + false + } + } + + fn parse_str(&mut self, _tokens: &str) -> bool { + todo!() + } +} + +impl Parse for std::iter::Peekable> { + fn follows(&mut self, token: char) -> bool { + self.peek().map(|c| c == &token).unwrap_or(false) + } +} + +impl Parse for std::iter::Peekable>> { + fn follows(&mut self, token: char) -> bool { + self.peekable() + .peek() + .map(|&(_i, c)| c == token) + .unwrap_or(false) + } +} + +#[cfg(test)] +mod test { + use super::*; + + #[test] + fn chars_parse_follows_double() { + let mut c = "abc".chars().peekable(); + + assert!(c.follows('a')); + assert!(c.follows('a')); + } +} diff --git a/cracked_md/src/parser.rs b/cracked_md/src/parser.rs index 75e905f..1a8e0a2 100644 --- a/cracked_md/src/parser.rs +++ b/cracked_md/src/parser.rs @@ -3,14 +3,15 @@ mod inline; use block::parse_blocks; -use crate::ast::Document; +use crate::{MdParseError, ast::Document}; -pub fn parse(s: &str) -> Document { - Document { - blocks: parse_blocks(s), - } +pub fn parse(s: &str) -> Result { + Ok(Document { + blocks: parse_blocks(s)?, + }) } +/* #[cfg(test)] mod test { use crate::ast::*; @@ -145,3 +146,4 @@ mod test { ); } } +// */ diff --git a/cracked_md/src/parser/block.rs b/cracked_md/src/parser/block.rs index 1a37aa8..410f6b1 100644 --- a/cracked_md/src/parser/block.rs +++ b/cracked_md/src/parser/block.rs @@ -1,22 +1,106 @@ -use crate::ast::Block; - use super::inline::parse_inlines; +use crate::{MdParseError, ast::Block}; -pub fn parse_blocks(input: &str) -> Vec { +use crate::parse_trait::Parse; + +pub fn parse_blocks(input: &str) -> Result, MdParseError> { + let mut blocks = Vec::new(); + let mut lines = input.lines().enumerate().peekable(); + + while let Some((i, line)) = lines.next() { + let mut line_chars = line.chars().peekable(); + + // header + let mut heading_level = 0; + while line_chars.parse_token('#') { + if heading_level < 6 { + heading_level += 1; + } + } + if heading_level > 0 { + if !line_chars.parse_token(' ') { + Err(MdParseError::from_line( + i + 1, + " after #", + "no ", + ))?; + } + let line_content: String = line_chars.collect(); + blocks.push(Block::Heading { + level: heading_level, + content: parse_inlines(&line_content)?, + }); + continue; + } + + // quote TODO + /* + if line_chars.parse_str("> ") { + let content: String = line_chars.collect(); + let quote_blocks = parse_blocks(&content).map_err(|e| e.set_line(i + 1))?; + blocks.push(Block::Quote(quote_blocks)); + continue; + } + */ + + // code + if line_chars.parse_str("```") { + let lang_line: String = line_chars.collect(); + let lang = if lang_line.is_empty() { + None + } else { + Some(lang_line) + }; + let mut code = String::new(); + + for (j, line) in lines.by_ref() { + let mut code_line_chars = line.chars().peekable(); + // code block end + if code_line_chars.parse_str("```") { + let remaining: String = code_line_chars.collect(); + if remaining.is_empty() { + blocks.push(Block::Code { + language: lang, + content: code, + }); + break; + } else { + Err(MdParseError::from_line( + j + 1, + "```", + format!("```{}", remaining), + ))?; + } + } else { + code.push_str(line); + code.push('\n'); + } + } + Err(MdParseError::from_line(i + 1, "a terminating '```'", ""))?; + } + + // lists TODO + } + + Ok(blocks) +} + +/* +pub fn parse_blocks(input: &str) -> Result, MdParseError> { let mut blocks = Vec::new(); - let mut lines = input.lines().peekable(); + let mut lines = input.lines().enumerate().peekable(); - while let Some(line) = lines.next() { + while let Some((i, line)) = lines.next() { if line.starts_with("#") { let level = line.chars().take_while(|&c| c == '#').count() as u8; let text = line[level as usize..].trim(); blocks.push(Block::Heading { level, - content: parse_inlines(text), + content: parse_inlines(text).map_err(|e| e.set_line(i + 1))?, }); } else if let Some(quote_body) = line.strip_prefix(">") { - let quote_blocks = parse_blocks(quote_body); + let quote_blocks = parse_blocks(quote_body).map_err(|e| e.set_line(i + 1))?; blocks.push(Block::Quote(quote_blocks)); } else if line.starts_with("```") { let lang_line = line.strip_prefix("```").unwrap().to_string(); @@ -26,8 +110,16 @@ pub fn parse_blocks(input: &str) -> Vec { Some(lang_line) }; let mut code = String::new(); - while lines.peek().is_some() && !lines.peek().unwrap().starts_with("```") { - code.push_str(&format!("{}\n", lines.next().unwrap())); + while lines.peek().is_some() + && !lines + .peek() + .ok_or(MdParseError::from_line(i + 1, "a line", ""))? + .1 + .starts_with("```") + { + if let Some((_i, l)) = lines.next() { + code.push_str(&format!("{}\n", l)); + } } lines.next(); blocks.push(Block::Code { @@ -37,9 +129,12 @@ pub fn parse_blocks(input: &str) -> Vec { } else if line.trim().is_empty() { continue; } else { - blocks.push(Block::Paragraph(parse_inlines(line))); + blocks.push(Block::Paragraph( + parse_inlines(line).map_err(|e| e.set_line(i + 1))?, + )); } } - blocks + Ok(blocks) } +*/ diff --git a/cracked_md/src/parser/inline.rs b/cracked_md/src/parser/inline.rs index f2cdd3b..42a9fd1 100644 --- a/cracked_md/src/parser/inline.rs +++ b/cracked_md/src/parser/inline.rs @@ -1,61 +1,65 @@ -use crate::ast::Inline; +use crate::{MdParseError, ast::Inline}; -pub fn parse_inlines(input: &str) -> Vec { +pub fn parse_inlines(input: &str) -> Result, MdParseError> { let mut inlines = Vec::new(); let mut chars = input.chars().peekable(); while let Some(c) = chars.next() { match c { '*' => { - let inner = collect_until(&mut chars, '*'); - inlines.push(Inline::Bold(parse_inlines(&inner))); + let inner = collect_until(&mut chars, '*')?; + inlines.push(Inline::Bold(parse_inlines(&inner)?)); } '_' => { - let inner = collect_until(&mut chars, '_'); - inlines.push(Inline::Italic(parse_inlines(&inner))); + let inner = collect_until(&mut chars, '_')?; + inlines.push(Inline::Italic(parse_inlines(&inner)?)); } '`' => { - let code = collect_until(&mut chars, '`'); + let code = collect_until(&mut chars, '`')?; inlines.push(Inline::Code(code)); } '[' => { - let text = collect_until(&mut chars, ']'); - if chars.next() == Some('(') { - let href = collect_until(&mut chars, ')'); + let text = collect_until(&mut chars, ']')?; + if let Some('(') = chars.next() { + let href = collect_until(&mut chars, ')')?; inlines.push(Inline::Link { - text: parse_inlines(&text), + text: parse_inlines(&text)?, href, }); + } else { + Err(MdParseError::new( + "()", + chars.next().unwrap_or_default(), + ))?; } } _ => { let mut text = String::new(); text.push(c); - while let Some(&nc) = chars.peek() { + while let Some(nc) = chars.next() { if matches!(nc, '*' | '_' | '`' | '[') { break; } - text.push(chars.next().unwrap()); + text.push(nc); } inlines.push(Inline::Text(text)); } } } - inlines + Ok(inlines) } fn collect_until>( chars: &mut std::iter::Peekable, end: char, -) -> String { +) -> Result { let mut s = String::new(); - while let Some(&c) = chars.peek() { + while let Some(c) = chars.next() { if c == end { - chars.next(); - break; + return Ok(s); } - s.push(chars.next().unwrap()); + s.push(c); } - s + Err(MdParseError::new(end, "")) } diff --git a/cracked_md/test.md b/cracked_md/test.md new file mode 100644 index 0000000..0361040 --- /dev/null +++ b/cracked_md/test.md @@ -0,0 +1,7 @@ +# Header *1kkkkkkkkkkkkkkkkkkkkkk* + +this is some code: `abc + +```code + +oiajwefoijao089uaoisdjfoijasdfoijasdofij